From 70d4102f48dce2d5755e9139a15eeec606f97bff Mon Sep 17 00:00:00 2001
From: Yuriy Chernyshov <thegeorg@yandex-team.com>
Date: Mon, 9 May 2022 15:36:49 +0300
Subject: [PATCH 001/395] Fix compiling edata.h with MSVC

At the time an attempt to compile jemalloc 5.3.0 with MSVC 2019 results in the followin error message:

> jemalloc/include/jemalloc/internal/edata.h:660: error C4576: a parenthesized type followed by an initializer list is a non-standard explicit type conversion syntax
---
 include/jemalloc/internal/edata.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index af039ea7..e77a55e6 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -656,8 +656,10 @@ edata_ead_comp(const edata_t *a, const edata_t *b) {
 
 static inline edata_cmp_summary_t
 edata_cmp_summary_get(const edata_t *edata) {
-	return (edata_cmp_summary_t){edata_sn_get(edata),
-		(uintptr_t)edata_addr_get(edata)};
+	edata_cmp_summary_t result;
+	result.sn = edata_sn_get(edata);
+	result.addr = (uintptr_t)edata_addr_get(edata);
+	return result;
 }
 
 static inline int

From cd5aaf308a46ce8ad0232ee9efb697b4ed33a7e4 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 17 May 2022 13:11:44 -0700
Subject: [PATCH 002/395] Improve the failure message upon
 opt_experimental_infallible_new.

---
 src/jemalloc_cpp.cpp                         | 10 ++++++++--
 test/integration/cpp/infallible_new_true.cpp |  4 ++--
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/jemalloc_cpp.cpp b/src/jemalloc_cpp.cpp
index 451655f1..8b53a392 100644
--- a/src/jemalloc_cpp.cpp
+++ b/src/jemalloc_cpp.cpp
@@ -57,8 +57,14 @@ JEMALLOC_NOINLINE
 static void *
 handleOOM(std::size_t size, bool nothrow) {
 	if (opt_experimental_infallible_new) {
-		safety_check_fail("<jemalloc>: Allocation failed and "
-		    "opt.experimental_infallible_new is true. Aborting.\n");
+		const char *huge_warning = (size >= ((std::size_t)1 << 30)) ?
+		    "This may be caused by heap corruption, if the large size "
+		    "is unexpected (suggest building with sanitizers for "
+		    "debugging)." : "";
+
+		safety_check_fail("<jemalloc>: Allocation of size %zu failed. "
+		    "%s opt.experimental_infallible_new is true. Aborting.\n",
+		    size, huge_warning);
 		return nullptr;
 	}
 
diff --git a/test/integration/cpp/infallible_new_true.cpp b/test/integration/cpp/infallible_new_true.cpp
index d6754128..3976f08b 100644
--- a/test/integration/cpp/infallible_new_true.cpp
+++ b/test/integration/cpp/infallible_new_true.cpp
@@ -9,8 +9,8 @@
 typedef void (*abort_hook_t)(const char *message);
 bool fake_abort_called;
 void fake_abort(const char *message) {
-	if (strcmp(message, "<jemalloc>: Allocation failed and "
-	    "opt.experimental_infallible_new is true. Aborting.\n") != 0) {
+	const char *expected_start = "<jemalloc>: Allocation of size";
+	if (strncmp(message, expected_start, strlen(expected_start) != 0)) {
 		abort();
 	}
 	fake_abort_called = true;

From 5b1f2cc5d79672e0d8852da1b705d68a74d22cd4 Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Tue, 19 Apr 2022 19:51:27 -0700
Subject: [PATCH 003/395] Implement pvalloc replacement

Despite being an obsolete function, pvalloc is still present in GLIBC and should
work correctly when jemalloc replaces libc allocator.
---
 configure.ac                                  |  6 +++
 include/jemalloc/internal/hook.h              |  1 +
 .../internal/jemalloc_internal_defs.h.in      |  1 +
 include/jemalloc/jemalloc_defs.h.in           |  1 +
 include/jemalloc/jemalloc_protos.h.in         |  6 +++
 src/jemalloc.c                                | 46 +++++++++++++++++++
 test/unit/hook.c                              | 14 ++++++
 7 files changed, 75 insertions(+)

diff --git a/configure.ac b/configure.ac
index f6d25f33..8248f52d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1064,6 +1064,9 @@ AC_CHECK_FUNC([memalign],
 AC_CHECK_FUNC([valloc],
 	      [AC_DEFINE([JEMALLOC_OVERRIDE_VALLOC], [ ], [ ])
 	       public_syms="${public_syms} valloc"])
+AC_CHECK_FUNC([pvalloc],
+	      [AC_DEFINE([JEMALLOC_OVERRIDE_PVALLOC], [ ], [ ])
+	       public_syms="${public_syms} pvalloc"])
 AC_CHECK_FUNC([malloc_size],
 	      [AC_DEFINE([JEMALLOC_HAVE_MALLOC_SIZE], [ ], [ ])
 	       public_syms="${public_syms} malloc_size"])
@@ -1089,6 +1092,9 @@ if test "x${JEMALLOC_PREFIX}" = "x" ; then
   AC_CHECK_FUNC([__libc_valloc],
 		[AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_VALLOC], [ ], [ ])
 		 wrap_syms="${wrap_syms} __libc_valloc"])
+  AC_CHECK_FUNC([__libc_pvalloc],
+		[AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_PVALLOC], [ ], [ ])
+		 wrap_syms="${wrap_syms} __libc_pvalloc"])
   AC_CHECK_FUNC([__posix_memalign],
 		[AC_DEFINE([JEMALLOC_OVERRIDE___POSIX_MEMALIGN], [ ], [ ])
 		 wrap_syms="${wrap_syms} __posix_memalign"])
diff --git a/include/jemalloc/internal/hook.h b/include/jemalloc/internal/hook.h
index ee246b1e..af03d2f5 100644
--- a/include/jemalloc/internal/hook.h
+++ b/include/jemalloc/internal/hook.h
@@ -55,6 +55,7 @@ enum hook_alloc_e {
 	hook_alloc_calloc,
 	hook_alloc_memalign,
 	hook_alloc_valloc,
+	hook_alloc_pvalloc,
 	hook_alloc_mallocx,
 
 	/* The reallocating functions have both alloc and dalloc variants */
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 3588072f..888ef470 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -18,6 +18,7 @@
 #undef JEMALLOC_OVERRIDE___LIBC_MEMALIGN
 #undef JEMALLOC_OVERRIDE___LIBC_REALLOC
 #undef JEMALLOC_OVERRIDE___LIBC_VALLOC
+#undef JEMALLOC_OVERRIDE___LIBC_PVALLOC
 #undef JEMALLOC_OVERRIDE___POSIX_MEMALIGN
 
 /*
diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in
index cbe2fca6..77d9d3b5 100644
--- a/include/jemalloc/jemalloc_defs.h.in
+++ b/include/jemalloc/jemalloc_defs.h.in
@@ -25,6 +25,7 @@
  */
 #undef JEMALLOC_OVERRIDE_MEMALIGN
 #undef JEMALLOC_OVERRIDE_VALLOC
+#undef JEMALLOC_OVERRIDE_PVALLOC
 
 /*
  * At least Linux omits the "const" in:
diff --git a/include/jemalloc/jemalloc_protos.h.in b/include/jemalloc/jemalloc_protos.h.in
index 356221cc..3f9fc848 100644
--- a/include/jemalloc/jemalloc_protos.h.in
+++ b/include/jemalloc/jemalloc_protos.h.in
@@ -69,3 +69,9 @@ JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
     void JEMALLOC_SYS_NOTHROW	*@je_@valloc(size_t size) JEMALLOC_CXX_THROW
     JEMALLOC_ATTR(malloc);
 #endif
+
+#ifdef JEMALLOC_OVERRIDE_PVALLOC
+JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
+    void JEMALLOC_SYS_NOTHROW	*@je_@pvalloc(size_t size) JEMALLOC_CXX_THROW
+    JEMALLOC_ATTR(malloc);
+#endif
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 7655de4e..68db1f36 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -3250,6 +3250,49 @@ je_valloc(size_t size) {
 }
 #endif
 
+#ifdef JEMALLOC_OVERRIDE_PVALLOC
+JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
+void JEMALLOC_NOTHROW *
+JEMALLOC_ATTR(malloc)
+je_pvalloc(size_t size) {
+	void *ret;
+
+	static_opts_t sopts;
+	dynamic_opts_t dopts;
+
+	LOG("core.pvalloc.entry", "size: %zu\n", size);
+
+	static_opts_init(&sopts);
+	dynamic_opts_init(&dopts);
+
+	sopts.null_out_result_on_error = true;
+	sopts.min_alignment = PAGE;
+	sopts.oom_string =
+	    "<jemalloc>: Error allocating aligned memory: out of memory\n";
+	sopts.invalid_alignment_string =
+	    "<jemalloc>: Error allocating aligned memory: invalid alignment\n";
+
+	dopts.result = &ret;
+	dopts.num_items = 1;
+	/*
+	 * This is the only difference from je_valloc - size is rounded up to
+	 * a PAGE multiple.
+	 */
+	dopts.item_size = PAGE_CEILING(size);
+	dopts.alignment = PAGE;
+
+	imalloc(&sopts, &dopts);
+	if (sopts.slow) {
+		uintptr_t args[3] = {size};
+		hook_invoke_alloc(hook_alloc_pvalloc, ret, (uintptr_t)ret,
+		    args);
+	}
+
+	LOG("core.pvalloc.exit", "result: %p\n", ret);
+	return ret;
+}
+#endif
+
 #if defined(JEMALLOC_IS_MALLOC) && defined(JEMALLOC_GLIBC_MALLOC_HOOK)
 /*
  * glibc provides the RTLD_DEEPBIND flag for dlopen which can make it possible
@@ -3297,6 +3340,9 @@ void *__libc_realloc(void* ptr, size_t size) PREALIAS(je_realloc);
 #    ifdef JEMALLOC_OVERRIDE___LIBC_VALLOC
 void *__libc_valloc(size_t size) PREALIAS(je_valloc);
 #    endif
+#    ifdef JEMALLOC_OVERRIDE___LIBC_PVALLOC
+void *__libc_pvalloc(size_t size) PREALIAS(je_pvalloc);
+#    endif
 #    ifdef JEMALLOC_OVERRIDE___POSIX_MEMALIGN
 int __posix_memalign(void** r, size_t a, size_t s) PREALIAS(je_posix_memalign);
 #    endif
diff --git a/test/unit/hook.c b/test/unit/hook.c
index 16a6f1b0..36dbd269 100644
--- a/test/unit/hook.c
+++ b/test/unit/hook.c
@@ -313,6 +313,20 @@ TEST_BEGIN(test_hooks_alloc_simple) {
 	free(ptr);
 #endif /* JEMALLOC_OVERRIDE_VALLOC */
 
+	/* pvalloc */
+#ifdef JEMALLOC_OVERRIDE_PVALLOC
+	reset();
+	ptr = pvalloc(1);
+	expect_d_eq(call_count, 1, "Hook not called");
+	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+	expect_d_eq(arg_type, (int)hook_alloc_pvalloc, "Wrong hook type");
+	expect_ptr_eq(ptr, arg_result, "Wrong result");
+	expect_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
+	    "Wrong raw result");
+	expect_u64_eq((uintptr_t)1, arg_args_raw[0], "Wrong argument");
+	free(ptr);
+#endif /* JEMALLOC_OVERRIDE_PVALLOC */
+
 	/* mallocx */
 	reset();
 	ptr = mallocx(1, MALLOCX_LG_ALIGN(10));

From 70e3735f3a71d3e05faa05c58ff3ca82ebaad908 Mon Sep 17 00:00:00 2001
From: barracuda156 <vital.had@gmail.com>
Date: Sat, 21 May 2022 23:36:15 +0800
Subject: [PATCH 004/395] jemalloc: fix PowerPC definitions in quantum.h

---
 include/jemalloc/internal/quantum.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/quantum.h b/include/jemalloc/internal/quantum.h
index c22d753a..a97f54ca 100644
--- a/include/jemalloc/internal/quantum.h
+++ b/include/jemalloc/internal/quantum.h
@@ -49,7 +49,7 @@
 #  ifdef __or1k__
 #    define LG_QUANTUM		3
 #  endif
-#  ifdef __powerpc__
+#  if defined(__powerpc__) || defined(__ppc__) || defined(__powerpc64__) || defined(__ppc64__)
 #    define LG_QUANTUM		4
 #  endif
 #  if defined(__riscv) || defined(__riscv__)

From df7ad8a9b6121c5c4b15bad5606b51bf734416a6 Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Tue, 7 Jun 2022 12:25:44 -0700
Subject: [PATCH 005/395] Revert "Echo installed files via verbose 'install'
 command"

This reverts commit f15d8f3b416f6812ac030bc1a7aacf05927a4d7f. "install -v"
turned out to be not portable and not work on NetBSD.
---
 Makefile.in | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/Makefile.in b/Makefile.in
index 1193cd85..6809fb29 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -555,18 +555,20 @@ endif
 install_bin:
 	$(INSTALL) -d $(BINDIR)
 	@for b in $(BINS); do \
-	$(INSTALL) -v -m 755 $$b $(BINDIR); \
+	echo "$(INSTALL) -m 755 $$b $(BINDIR)"; \
+	$(INSTALL) -m 755 $$b $(BINDIR); \
 done
 
 install_include:
 	$(INSTALL) -d $(INCLUDEDIR)/jemalloc
 	@for h in $(C_HDRS); do \
-	$(INSTALL) -v -m 644 $$h $(INCLUDEDIR)/jemalloc; \
+	echo "$(INSTALL) -m 644 $$h $(INCLUDEDIR)/jemalloc"; \
+	$(INSTALL) -m 644 $$h $(INCLUDEDIR)/jemalloc; \
 done
 
 install_lib_shared: $(DSOS)
 	$(INSTALL) -d $(LIBDIR)
-	$(INSTALL) -v -m 755 $(objroot)lib/$(LIBJEMALLOC).$(SOREV) $(LIBDIR)
+	$(INSTALL) -m 755 $(objroot)lib/$(LIBJEMALLOC).$(SOREV) $(LIBDIR)
 ifneq ($(SOREV),$(SO))
 	ln -sf $(LIBJEMALLOC).$(SOREV) $(LIBDIR)/$(LIBJEMALLOC).$(SO)
 endif
@@ -574,13 +576,15 @@ endif
 install_lib_static: $(STATIC_LIBS)
 	$(INSTALL) -d $(LIBDIR)
 	@for l in $(STATIC_LIBS); do \
-	$(INSTALL) -v -m 755 $$l $(LIBDIR); \
+	echo "$(INSTALL) -m 755 $$l $(LIBDIR)"; \
+	$(INSTALL) -m 755 $$l $(LIBDIR); \
 done
 
 install_lib_pc: $(PC)
 	$(INSTALL) -d $(LIBDIR)/pkgconfig
 	@for l in $(PC); do \
-	$(INSTALL) -v -m 644 $$l $(LIBDIR)/pkgconfig; \
+	echo "$(INSTALL) -m 644 $$l $(LIBDIR)/pkgconfig"; \
+	$(INSTALL) -m 644 $$l $(LIBDIR)/pkgconfig; \
 done
 
 ifeq ($(enable_shared), 1)
@@ -594,13 +598,15 @@ install_lib: install_lib_pc
 install_doc_html: build_doc_html
 	$(INSTALL) -d $(DATADIR)/doc/jemalloc$(install_suffix)
 	@for d in $(DOCS_HTML); do \
-	$(INSTALL) -v -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix); \
+	echo "$(INSTALL) -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix)"; \
+	$(INSTALL) -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix); \
 done
 
 install_doc_man: build_doc_man
 	$(INSTALL) -d $(MANDIR)/man3
 	@for d in $(DOCS_MAN3); do \
-	$(INSTALL) -v -m 644 $$d $(MANDIR)/man3; \
+	echo "$(INSTALL) -m 644 $$d $(MANDIR)/man3"; \
+	$(INSTALL) -m 644 $$d $(MANDIR)/man3; \
 done
 
 install_doc: install_doc_html install_doc_man

From df8f7d10af15d549ab73ba807b2e14a9d7fe1cc2 Mon Sep 17 00:00:00 2001
From: David Carlier <devnexen@gmail.com>
Date: Fri, 20 May 2022 20:14:33 +0100
Subject: [PATCH 006/395] Implement malloc_getcpu for amd64 and arm64 macOS

This enables per CPU arena on MacOS
---
 configure.ac                                    | 17 +++++++++++++++++
 .../internal/jemalloc_internal_inlines_a.h      |  9 +++++++++
 2 files changed, 26 insertions(+)

diff --git a/configure.ac b/configure.ac
index 8248f52d..66eb7c91 100644
--- a/configure.ac
+++ b/configure.ac
@@ -510,6 +510,23 @@ typedef unsigned __int32 uint32_t;
       else
         AC_MSG_ERROR([cannot determine number of significant virtual address bits])
       fi
+      AC_CACHE_CHECK([rdtscp support],
+		     [je_cv_rdtscp],
+		     AC_RUN_IFELSE([AC_LANG_PROGRAM(
+[[
+#include <stdint.h>
+]],
+[[
+      unsigned int dx;
+      asm volatile("rdtscp" : "=d"(dx) ::);
+      return 0;
+]])],
+      [je_cv_rdtscp=yes],
+      [je_cv_rdstcp=no],
+      [je_cv_rdtscp=no]))
+      if test "x${je_cv_rdtscp}" = "xyes"; then
+        AC_DEFINE([HAVE_RDTSCP], 1, [])
+      fi
     fi
     ;;
   *)
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
index 9e27cc30..7686a9b7 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -14,6 +14,15 @@ malloc_getcpu(void) {
 	return GetCurrentProcessorNumber();
 #elif defined(JEMALLOC_HAVE_SCHED_GETCPU)
 	return (malloc_cpuid_t)sched_getcpu();
+#elif defined(HAVE_RDTSCP)
+	unsigned int ax, cx, dx;
+	asm volatile("rdtscp" : "=a"(ax), "=d"(dx), "=c"(cx) ::);
+	return (malloc_cpuid_t)(dx & 0xfff);
+#elif defined(__aarch64__) && defined(__APPLE__)
+	/* Other oses most likely use tpidr_el0 instead */
+	uintptr_t c;
+	asm volatile("mrs %x0, tpidrro_el0" : "=r"(c) :: "memory");
+	return (malloc_cpuid_t)(c & (1 << 3) - 1);
 #else
 	not_reached();
 	return -1;

From b950934916b2973fd4131ebfb684e53df305001a Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 8 Jun 2022 14:24:55 -0700
Subject: [PATCH 007/395] Enable retain by default on macOS.

High number of mappings result in unusually high fork() cost on macOS.  Retain
fixes the issue, at a small cost of extra VM space reserved.
---
 configure.ac | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/configure.ac b/configure.ac
index 66eb7c91..917d9a80 100644
--- a/configure.ac
+++ b/configure.ac
@@ -671,6 +671,9 @@ case "${host}" in
 	SOREV="${rev}.${so}"
 	sbrk_deprecated="1"
 	SYM_PREFIX="_"
+	if test "${LG_SIZEOF_PTR}" = "3"; then
+	  default_retain="1"
+	fi
 	;;
   *-*-freebsd*)
 	JE_APPEND_VS(CPPFLAGS, -D_BSD_SOURCE)

From 4fc5c4fbac156c9f44452d3f30216451711dfa18 Mon Sep 17 00:00:00 2001
From: David Carlier <devnexen@gmail.com>
Date: Wed, 1 Jun 2022 22:04:11 +0100
Subject: [PATCH 008/395] New configure option '--enable-pageid' for Linux

The option makes jemalloc use prctl with PR_SET_VMA to tag memory mappings with
"jemalloc_pg" or "jemalloc_pg_overcommit". This allows to easily identify
jemalloc's mappings in /proc/<pid>/maps. PR_SET_VMA is only available in Linux
5.17 and above.
---
 configure.ac                                  | 25 +++++++++++++++++
 .../internal/jemalloc_internal_defs.h.in      |  6 ++++
 src/pages.c                                   | 28 +++++++++++++++++++
 3 files changed, 59 insertions(+)

diff --git a/configure.ac b/configure.ac
index 917d9a80..0ae579ee 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2067,6 +2067,14 @@ if test "x$have_memcntl" = "x1" ; then
   AC_DEFINE([JEMALLOC_HAVE_MEMCNTL], [ ], [ ])
 fi
 
+AC_CHECK_FUNC([prctl],
+	      [have_prctl="1"],
+	      [have_prctl="0"],
+	      )
+if test "x$have_prctl" = "x1" ; then
+  AC_DEFINE([JEMALLOC_HAVE_PRCTL], [ ], [ ])
+fi
+
 dnl Disable lazy locking by default.
 AC_ARG_ENABLE([lazy_lock],
   [AS_HELP_STRING([--enable-lazy-lock],
@@ -2435,6 +2443,22 @@ else
   AC_DEFINE([JEMALLOC_TLS_MODEL], [ ], [ ])
 fi
 
+dnl Do not compile with debugging by default.
+AC_ARG_ENABLE([pageid],
+  [AS_HELP_STRING([--enable-pageid],
+                  [Enable named pages])],
+[if test "x$enable_pageid" = "xno" ; then
+  enable_pageid="0"
+else
+  enable_pageid="1"
+fi
+],
+[enable_pageid="0"]
+)
+if test "x$enable_pageid" = "x1" ; then
+  AC_DEFINE([JEMALLOC_PAGEID], [ ], [ ])
+fi
+
 dnl ============================================================================
 dnl Enable background threads if possible.
 
@@ -2691,5 +2715,6 @@ AC_MSG_RESULT([xmalloc            : ${enable_xmalloc}])
 AC_MSG_RESULT([log                : ${enable_log}])
 AC_MSG_RESULT([lazy_lock          : ${enable_lazy_lock}])
 AC_MSG_RESULT([cache-oblivious    : ${enable_cache_oblivious}])
+AC_MSG_RESULT([pageid             : ${enable_pageid}])
 AC_MSG_RESULT([cxx                : ${enable_cxx}])
 AC_MSG_RESULT([===============================================================================])
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 888ef470..6dbd8780 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -162,6 +162,12 @@
 /* Use gcc intrinsics for profile backtracing if defined. */
 #undef JEMALLOC_PROF_GCC
 
+/* JEMALLOC_PAGEID enabled page id */
+#undef JEMALLOC_PAGEID
+
+/* JEMALLOC_HAVE_PRCTL checks prctl */
+#undef JEMALLOC_HAVE_PRCTL
+
 /*
  * JEMALLOC_DSS enables use of sbrk(2) to allocate extents from the data storage
  * segment (DSS).
diff --git a/src/pages.c b/src/pages.c
index 8c83a7de..b672e4de 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -21,6 +21,13 @@
 #else
 #define PAGES_FD_TAG -1
 #endif
+#ifdef JEMALLOC_HAVE_PRCTL
+#include <sys/prctl.h>
+#ifndef PR_SET_VMA
+#define PR_SET_VMA 0x53564d41
+#define PR_SET_VMA_ANON_NAME 0
+#endif
+#endif
 
 /******************************************************************************/
 /* Data. */
@@ -98,6 +105,22 @@ static int madvise_MADV_DONTNEED_zeroes_pages()
 }
 #endif
 
+#ifdef JEMALLOC_PAGEID
+static int os_page_id(void *addr, size_t size, const char *name)
+{
+#ifdef JEMALLOC_HAVE_PRCTL
+	/*
+	 * While parsing `/proc/<pid>/maps` file, the block could appear as
+	 * 7f4836000000-7f4836800000 rw-p 00000000 00:00 0 [anon:jemalloc_pg_overcommit]`
+	 */
+	return prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, (uintptr_t)addr, size,
+	    (uintptr_t)name);
+#else
+	return 0;
+#endif
+}
+#endif
+
 /******************************************************************************/
 /*
  * Function prototypes for static functions that are referenced prior to
@@ -162,6 +185,11 @@ os_pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
 #endif
 	assert(ret == NULL || (addr == NULL && ret != addr) || (addr != NULL &&
 	    ret == addr));
+#ifdef JEMALLOC_PAGEID
+	int n = os_page_id(ret, size,
+	    os_overcommits ? "jemalloc_pg_overcommit" : "jemalloc_pg");
+	assert(n == 0 || (n == -1 && get_errno() == EINVAL));
+#endif
 	return ret;
 }
 

From 3713932836db1190ebadd4a0643db2d354b84fa3 Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Mon, 13 Jun 2022 15:32:33 -0700
Subject: [PATCH 009/395] Update building for Windows instructions

Explain how to build for Windows in INSTALL.md and remove another readme.txt in
an obscure location.
---
 INSTALL.md      | 79 +++++++++++++++++++++++++++++++++++++++++++++++++
 msvc/ReadMe.txt | 23 --------------
 2 files changed, 79 insertions(+), 23 deletions(-)
 delete mode 100644 msvc/ReadMe.txt

diff --git a/INSTALL.md b/INSTALL.md
index 90da718d..9dffa646 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -396,6 +396,85 @@ exclusively):
 
     Use this to search for programs used during configuration and building.
 
+## Building for Windows
+
+There are at least two ways to build jemalloc's libraries for Windows. They
+differ in their ease of use and flexibility.
+
+### With MSVC solutions
+This is the easy, but less flexible approach. It doesn't let you specify
+arguments to the `configure` script.
+  
+1. Install Cygwin with at least the following packages:
+   * autoconf
+   * autogen
+   * gawk
+   * grep
+   * sed
+
+2. Install Visual Studio 2015 or 2017 with Visual C++
+
+3. Add Cygwin\bin to the PATH environment variable
+
+4. Open "x64 Native Tools Command Prompt for VS 2017"
+   (note: x86/x64 doesn't matter at this point)
+
+5. Generate header files:
+   sh -c "CC=cl ./autogen.sh"
+
+6. Now the project can be opened and built in Visual Studio:
+   msvc\jemalloc_vc2017.sln
+
+### With MSYS
+This is a more involved approach that offers the same configuration flexibility
+as Linux builds. We use it for our CI workflow to test different jemalloc
+configurations on Windows.
+
+1. Install the prerequisites
+    1. MSYS2
+    2. Chocolatey
+    3. Visual Studio if you want to compile with MSVC compiler
+
+2. Run your bash emulation. It could be MSYS2 or Git Bash (this manual was
+   tested on both)
+3. Manually and selectively follow
+   [before_install.sh](https://github.com/jemalloc/jemalloc/blob/dev/scripts/windows/before_install.sh)
+   script.
+    1. Skip the `TRAVIS_OS_NAME` check, `rm -rf C:/tools/msys64` and `choco
+       uninstall/upgrade` part.
+    2.  If using `msys2` shell, add path to `RefreshEnv.cmd` to `PATH`:
+        `PATH="$PATH:/c/ProgramData/chocolatey/bin"`
+    3. Assign `msys_shell_cmd`, `msys2`, `mingw32` and `mingw64` as in the
+       script.
+    4. Pick `CROSS_COMPILE_32BIT` , `CC` and `USE_MSVC` values depending on
+       your needs. For instance, if you'd like to build for x86_64 Windows
+       with `gcc`, then `CROSS_COMPILE_32BIT="no"`, `CC="gcc"` and
+       `USE_MSVC=""`. If you'd like to build for x86 Windows with `cl.exe`,
+       then `CROSS_COMPILE_32BIT="yes"`, `CC="cl.exe"`, `USE_MSVC="x86"`.
+       For x86_64 builds with `cl.exe`, assign `USE_MSVC="amd64"` and
+       `CROSS_COMPILE_32BIT="no"`.
+    5. Replace the path to `vcvarsall.bat` with the path on your system. For
+       instance, on my Windows PC with Visual Studio 17, the path is
+       `C:\Program Files (x86)\Microsoft Visual
+       Studio\2017\BuildTools\VC\Auxiliary\Build\vcvarsall.bat`.
+    6. Execute the rest of the script. It will install the required
+       dependencies and assign the variable `build_env`, which is a function
+       that executes following commands with the correct environment
+       variables set.
+4. Use `$build_env <command>` as you would in a Linux shell:
+     1. `$build_env autoconf`
+     2. `$build_env ./configure CC="<desired compiler>" <configuration flags>`
+     3. `$build_env mingw32-make`
+
+If you're having any issues with the above, ensure the following:
+
+5. When you run `cmd //C RefreshEnv.cmd`, you get an output line starting with
+   `Refreshing` . If it errors saying `RefreshEnv.cmd` is not found, then you
+   need to add it to your `PATH` as described above in item 3.2
+
+6. When you run `cmd //C $vcvarsall`, it prints a bunch of environment
+   variables. Otherwise, check the path to the `vcvarsall.bat` in `$vcvarsall`
+   script and fix it.
 
 ## Development
 
diff --git a/msvc/ReadMe.txt b/msvc/ReadMe.txt
deleted file mode 100644
index 633a7d49..00000000
--- a/msvc/ReadMe.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-
-How to build jemalloc for Windows
-=================================
-
-1. Install Cygwin with at least the following packages:
-   * autoconf
-   * autogen
-   * gawk
-   * grep
-   * sed
-
-2. Install Visual Studio 2015 or 2017 with Visual C++
-
-3. Add Cygwin\bin to the PATH environment variable
-
-4. Open "x64 Native Tools Command Prompt for VS 2017"
-   (note: x86/x64 doesn't matter at this point)
-
-5. Generate header files:
-   sh -c "CC=cl ./autogen.sh"
-
-6. Now the project can be opened and built in Visual Studio:
-   msvc\jemalloc_vc2017.sln

From a9215bf18aed1a1e59cbc7dfb9a0f018697d491d Mon Sep 17 00:00:00 2001
From: David CARLIER <devnexen@gmail.com>
Date: Sat, 18 Jun 2022 17:17:26 +0100
Subject: [PATCH 010/395] CI update FreeBSD version.

---
 .cirrus.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.cirrus.yml b/.cirrus.yml
index 75695398..a68f3dc1 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -29,7 +29,7 @@ task:
         UNCOMMON_CONFIG: --with-lg-page=16 --with-malloc-conf=tcache:false
   freebsd_instance:
     matrix:
-      image: freebsd-12-3-release-amd64
+      image: freebsd-13-0-release-amd64
   install_script:
     - sed -i.bak -e 's,pkg+http://pkg.FreeBSD.org/\${ABI}/quarterly,pkg+http://pkg.FreeBSD.org/\${ABI}/latest,' /etc/pkg/FreeBSD.conf
     - pkg upgrade -y

From cb578bbe01326bfc4a7b676f6921189d84518f03 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sun, 3 Jul 2022 20:23:59 +0300
Subject: [PATCH 011/395] Fix possible "nmalloc >= ndalloc" assertion

In arena_stats_merge() first nmalloc was read, and after ndalloc.

However with this order, it is possible for some thread to incement
ndalloc in between, and then nmalloc < ndalloc, and assertion will fail,
like again found by ClickHouse CI [1] (even after #2234).

  [1]: https://github.com/ClickHouse/ClickHouse/issues/31531

Swap the order to avoid possible assertion.

Cc: @interwq
Follow-up for: #2234
---
 src/arena.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 857b27c5..1ab2775e 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -106,18 +106,21 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	astats->metadata_thp += metadata_thp;
 
 	for (szind_t i = 0; i < SC_NSIZES - SC_NBINS; i++) {
-		uint64_t nmalloc = locked_read_u64(tsdn,
-		    LOCKEDINT_MTX(arena->stats.mtx),
-		    &arena->stats.lstats[i].nmalloc);
-		locked_inc_u64_unsynchronized(&lstats[i].nmalloc, nmalloc);
-		astats->nmalloc_large += nmalloc;
-
+		/* ndalloc should be read before nmalloc,
+		 * since otherwise it is possible for ndalloc to be incremented,
+		 * and the following can become true: ndalloc > nmalloc */
 		uint64_t ndalloc = locked_read_u64(tsdn,
 		    LOCKEDINT_MTX(arena->stats.mtx),
 		    &arena->stats.lstats[i].ndalloc);
 		locked_inc_u64_unsynchronized(&lstats[i].ndalloc, ndalloc);
 		astats->ndalloc_large += ndalloc;
 
+		uint64_t nmalloc = locked_read_u64(tsdn,
+		    LOCKEDINT_MTX(arena->stats.mtx),
+		    &arena->stats.lstats[i].nmalloc);
+		locked_inc_u64_unsynchronized(&lstats[i].nmalloc, nmalloc);
+		astats->nmalloc_large += nmalloc;
+
 		uint64_t nrequests = locked_read_u64(tsdn,
 		    LOCKEDINT_MTX(arena->stats.mtx),
 		    &arena->stats.lstats[i].nrequests);

From 41a859ef7325569c6c25f92d294d45123bb81355 Mon Sep 17 00:00:00 2001
From: Jasmin Parent <guildwarsguy12@gmail.com>
Date: Sat, 2 Jul 2022 14:44:46 -0400
Subject: [PATCH 012/395] Remove duplicated words in documentation

---
 doc/jemalloc.xml.in | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index e28e8f38..98f86f95 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1121,9 +1121,7 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
         linkend="arena.i.dirty_decay_ms"><mallctl>arena.&lt;i&gt;.dirty_decay_ms</mallctl></link>
         for related dynamic control options.  See <link
         linkend="opt.muzzy_decay_ms"><mallctl>opt.muzzy_decay_ms</mallctl></link>
-        for a description of muzzy pages.for a description of muzzy pages.  Note
-        that when the <link
-        linkend="opt.oversize_threshold"><mallctl>oversize_threshold</mallctl></link>
+        for a description of muzzy pages.  Note that when the <link linkend="opt.oversize_threshold"><mallctl>oversize_threshold</mallctl></link>
         feature is enabled, the arenas reserved for oversize requests may have
         its own default decay settings.</para></listitem>
       </varlistentry>

From a1c7d9c046c2a90b978dc409d366b89303c96ab6 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 12 Jul 2022 12:08:10 -0700
Subject: [PATCH 013/395] Add the missing opt.cache_oblivious handling.

---
 src/jemalloc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 68db1f36..7ccbf8ac 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1220,6 +1220,7 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 
 			CONF_HANDLE_BOOL(opt_abort, "abort")
 			CONF_HANDLE_BOOL(opt_abort_conf, "abort_conf")
+			CONF_HANDLE_BOOL(opt_cache_oblivious, "cache_oblivious")
 			CONF_HANDLE_BOOL(opt_trust_madvise, "trust_madvise")
 			if (strncmp("metadata_thp", k, klen) == 0) {
 				int m;

From 58478412be842e140cc03dbb0c6ce84b2b8d096e Mon Sep 17 00:00:00 2001
From: David Carlier <devnexen@gmail.com>
Date: Tue, 7 Jun 2022 21:43:08 +0100
Subject: [PATCH 014/395] OpenBSD build fix. still no cpu affinity.

- enabling pthread_get/pthread_set_name_np api.
- disabling per thread cpu affinity handling, unsupported on this platform.
---
 include/jemalloc/internal/jemalloc_internal_decls.h | 2 +-
 include/jemalloc/jemalloc_macros.h.in               | 2 +-
 src/background_thread.c                             | 4 ++++
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_decls.h b/include/jemalloc/internal/jemalloc_internal_decls.h
index 983027c8..77ba1c9a 100644
--- a/include/jemalloc/internal/jemalloc_internal_decls.h
+++ b/include/jemalloc/internal/jemalloc_internal_decls.h
@@ -32,7 +32,7 @@
 #    include <sys/uio.h>
 #  endif
 #  include <pthread.h>
-#  if defined(__FreeBSD__) || defined(__DragonFly__)
+#  if defined(__FreeBSD__) || defined(__DragonFly__) || defined(__OpenBSD__)
 #  include <pthread_np.h>
 #  include <sched.h>
 #  if defined(__FreeBSD__)
diff --git a/include/jemalloc/jemalloc_macros.h.in b/include/jemalloc/jemalloc_macros.h.in
index ebb3137e..2de3f27d 100644
--- a/include/jemalloc/jemalloc_macros.h.in
+++ b/include/jemalloc/jemalloc_macros.h.in
@@ -142,7 +142,7 @@
 #  define JEMALLOC_COLD
 #endif
 
-#if (defined(__APPLE__) || defined(__FreeBSD__)) && !defined(JEMALLOC_NO_RENAME)
+#if (defined(__APPLE__) || defined(__FreeBSD__) || defined(__OpenBSD__)) && !defined(JEMALLOC_NO_RENAME)
 #  define JEMALLOC_SYS_NOTHROW
 #else
 #  define JEMALLOC_SYS_NOTHROW JEMALLOC_NOTHROW
diff --git a/src/background_thread.c b/src/background_thread.c
index 3bb8d26c..f22174d6 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -80,6 +80,9 @@ background_thread_info_init(tsdn_t *tsdn, background_thread_info_t *info) {
 
 static inline bool
 set_current_thread_affinity(int cpu) {
+#ifdef __OpenBSD__
+	return false;
+#else
 #if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
 	cpu_set_t cpuset;
 #else
@@ -110,6 +113,7 @@ set_current_thread_affinity(int cpu) {
 #  endif
 	return ret != 0;
 #endif
+#endif
 }
 
 #define BILLION UINT64_C(1000000000)

From 4e12d21c8ddb9a70a12c8194c8b6c331fad7154a Mon Sep 17 00:00:00 2001
From: David CARLIER <devnexen@gmail.com>
Date: Sat, 11 Jun 2022 07:04:26 +0100
Subject: [PATCH 015/395] enabled percpu_arena settings on macOs.

follow-up on #2280
---
 include/jemalloc/internal/jemalloc_preamble.h.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in
index 5ce77d96..d7086302 100644
--- a/include/jemalloc/internal/jemalloc_preamble.h.in
+++ b/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -215,7 +215,7 @@ static const bool config_enable_cxx =
 #endif
 ;
 
-#if defined(_WIN32) || defined(JEMALLOC_HAVE_SCHED_GETCPU)
+#if defined(_WIN32) || defined(__APPLE__) || defined(JEMALLOC_HAVE_SCHED_GETCPU)
 /* Currently percpu_arena depends on sched_getcpu. */
 #define JEMALLOC_PERCPU_ARENA
 #endif

From adc70c051135ac8909ca37492d7b104150077033 Mon Sep 17 00:00:00 2001
From: David CARLIER <devnexen@gmail.com>
Date: Sat, 11 Jun 2022 07:57:22 +0100
Subject: [PATCH 016/395] update travis

---
 .travis.yml           | 3 +++
 scripts/gen_travis.py | 1 -
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index bf44fad4..49e6aa7e 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -376,6 +376,9 @@ jobs:
     - os: osx
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+    - os: osx
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
     # Development build
     - os: linux
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --disable-cache-oblivious --enable-stats --enable-log --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
index 4366a066..b49905f9 100755
--- a/scripts/gen_travis.py
+++ b/scripts/gen_travis.py
@@ -257,7 +257,6 @@ def generate_macos(arch):
 
     exclude = ([Option.as_malloc_conf(opt) for opt in (
             'dss:primary',
-            'percpu_arena:percpu',
             'background_thread:true')] +
         [Option.as_configure_flag('--enable-prof')] +
         [CLANG,])

From 36366f3c4c741723369853c923e56999716398fc Mon Sep 17 00:00:00 2001
From: Ivan Zaitsev <ivanzaitsev@fb.com>
Date: Wed, 20 Jul 2022 15:25:56 -0700
Subject: [PATCH 017/395] Add double free detection in thread cache for debug
 build

Add new runtime option `debug_double_free_max_scan` that specifies the max
number of stack entries to scan in the cache bit when trying to detect the
double free bug (currently debug build only).
---
 include/jemalloc/internal/cache_bin.h         | 34 +++++++++++++
 .../internal/jemalloc_internal_externs.h      |  1 +
 include/jemalloc/internal/safety_check.h      |  2 +
 src/ctl.c                                     |  7 ++-
 src/jemalloc.c                                | 11 +++++
 src/stats.c                                   |  1 +
 test/unit/double_free.c                       | 49 ++++++++++++++++---
 test/unit/mallctl.c                           |  1 +
 8 files changed, 97 insertions(+), 9 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index caf5be33..87c7ea5e 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -2,6 +2,7 @@
 #define JEMALLOC_INTERNAL_CACHE_BIN_H
 
 #include "jemalloc/internal/ql.h"
+#include "jemalloc/internal/safety_check.h"
 #include "jemalloc/internal/sz.h"
 
 /*
@@ -427,6 +428,35 @@ cache_bin_full(cache_bin_t *bin) {
 	return ((uint16_t)(uintptr_t)bin->stack_head == bin->low_bits_full);
 }
 
+/*
+ * Scans the allocated area of the cache_bin for the given pointer up to limit.
+ * Fires safety_check_fail if the ptr is found and returns true.
+ */
+JEMALLOC_ALWAYS_INLINE bool
+cache_bin_dalloc_safety_checks(cache_bin_t *bin, void *ptr) {
+	if (!config_debug || opt_debug_double_free_max_scan == 0) {
+		return false;
+	}
+
+	cache_bin_sz_t ncached = cache_bin_ncached_get_internal(bin, false);
+	unsigned max_scan = opt_debug_double_free_max_scan < ncached
+	    ? opt_debug_double_free_max_scan
+	    : ncached;
+
+	void **cur = bin->stack_head;
+	void **limit = cur + max_scan;
+	for (; cur < limit; cur++) {
+		if (*cur == ptr) {
+			safety_check_fail(
+			    "Invalid deallocation detected: double free of "
+			    "pointer %p\n",
+			    ptr);
+			return true;
+		}
+	}
+	return false;
+}
+
 /*
  * Free an object into the given bin.  Fails only if the bin is full.
  */
@@ -436,6 +466,10 @@ cache_bin_dalloc_easy(cache_bin_t *bin, void *ptr) {
 		return false;
 	}
 
+        if (unlikely(cache_bin_dalloc_safety_checks(bin, ptr))) {
+                return true;
+        }
+
 	bin->stack_head--;
 	*bin->stack_head = ptr;
 	cache_bin_assert_earlier(bin, bin->low_bits_full,
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index fc834c67..63b9bd2c 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -34,6 +34,7 @@ extern malloc_init_t malloc_init_state;
 extern const char *zero_realloc_mode_names[];
 extern atomic_zu_t zero_realloc_count;
 extern bool opt_cache_oblivious;
+extern unsigned opt_debug_double_free_max_scan;
 
 /* Escape free-fastpath when ptr & mask == 0 (for sanitization purpose). */
 extern uintptr_t san_cache_bin_nonfast_mask;
diff --git a/include/jemalloc/internal/safety_check.h b/include/jemalloc/internal/safety_check.h
index f1a74f17..900cfa55 100644
--- a/include/jemalloc/internal/safety_check.h
+++ b/include/jemalloc/internal/safety_check.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_SAFETY_CHECK_H
 #define JEMALLOC_INTERNAL_SAFETY_CHECK_H
 
+#define SAFETY_CHECK_DOUBLE_FREE_MAX_SCAN_DEFAULT 32
+
 void safety_check_fail_sized_dealloc(bool current_dealloc, const void *ptr,
     size_t true_size, size_t input_size);
 void safety_check_fail(const char *format, ...);
diff --git a/src/ctl.c b/src/ctl.c
index 135271ba..e942cb1a 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -92,6 +92,7 @@ CTL_PROTO(config_xmalloc)
 CTL_PROTO(opt_abort)
 CTL_PROTO(opt_abort_conf)
 CTL_PROTO(opt_cache_oblivious)
+CTL_PROTO(opt_debug_double_free_max_scan)
 CTL_PROTO(opt_trust_madvise)
 CTL_PROTO(opt_confirm_conf)
 CTL_PROTO(opt_hpa)
@@ -479,7 +480,9 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("prof_sys_thread_name"),	CTL(opt_prof_sys_thread_name)},
 	{NAME("prof_time_resolution"),	CTL(opt_prof_time_res)},
 	{NAME("lg_san_uaf_align"),	CTL(opt_lg_san_uaf_align)},
-	{NAME("zero_realloc"),	CTL(opt_zero_realloc)}
+	{NAME("zero_realloc"),	CTL(opt_zero_realloc)},
+	{NAME("debug_double_free_max_scan"),
+		CTL(opt_debug_double_free_max_scan)}
 };
 
 static const ctl_named_node_t	tcache_node[] = {
@@ -2128,6 +2131,8 @@ CTL_RO_CONFIG_GEN(config_xmalloc, bool)
 CTL_RO_NL_GEN(opt_abort, opt_abort, bool)
 CTL_RO_NL_GEN(opt_abort_conf, opt_abort_conf, bool)
 CTL_RO_NL_GEN(opt_cache_oblivious, opt_cache_oblivious, bool)
+CTL_RO_NL_GEN(opt_debug_double_free_max_scan,
+    opt_debug_double_free_max_scan, unsigned)
 CTL_RO_NL_GEN(opt_trust_madvise, opt_trust_madvise, bool)
 CTL_RO_NL_GEN(opt_confirm_conf, opt_confirm_conf, bool)
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 7ccbf8ac..83d69dd0 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -154,6 +154,9 @@ fxp_t		opt_narenas_ratio = FXP_INIT_INT(4);
 
 unsigned	ncpus;
 
+unsigned opt_debug_double_free_max_scan =
+    SAFETY_CHECK_DOUBLE_FREE_MAX_SCAN_DEFAULT;
+
 /* Protects arenas initialization. */
 malloc_mutex_t arenas_lock;
 
@@ -1420,6 +1423,10 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			CONF_HANDLE_UNSIGNED(opt_lg_tcache_flush_large_div,
 			    "lg_tcache_flush_large_div", 1, 16,
 			    CONF_CHECK_MIN, CONF_CHECK_MAX, /* clip */ true)
+			CONF_HANDLE_UNSIGNED(opt_debug_double_free_max_scan,
+			    "debug_double_free_max_scan", 0, UINT_MAX,
+			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX,
+			    /* clip */ false)
 
 			/*
 			 * The runtime option of oversize_threshold remains
@@ -1737,6 +1744,10 @@ malloc_conf_init_check_deps(void) {
 		    "prof_final.\n");
 		return true;
 	}
+	/* To emphasize in the stats output that opt is disabled when !debug. */
+	if (!config_debug) {
+		opt_debug_double_free_max_scan = 0;
+	}
 
 	return false;
 }
diff --git a/src/stats.c b/src/stats.c
index efc70fd3..d150baef 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1518,6 +1518,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_SIZE_T("tcache_gc_delay_bytes")
 	OPT_WRITE_UNSIGNED("lg_tcache_flush_small_div")
 	OPT_WRITE_UNSIGNED("lg_tcache_flush_large_div")
+	OPT_WRITE_UNSIGNED("debug_double_free_max_scan")
 	OPT_WRITE_CHAR_P("thp")
 	OPT_WRITE_BOOL("prof")
 	OPT_WRITE_CHAR_P("prof_prefix")
diff --git a/test/unit/double_free.c b/test/unit/double_free.c
index 12122c1b..b52fcf90 100644
--- a/test/unit/double_free.c
+++ b/test/unit/double_free.c
@@ -10,13 +10,13 @@ void fake_abort(const char *message) {
 }
 
 void
-test_large_double_free_pre(void) {
+test_double_free_pre(void) {
 	safety_check_set_abort(&fake_abort);
 	fake_abort_called = false;
 }
 
 void
-test_large_double_free_post() {
+test_double_free_post() {
 	expect_b_eq(fake_abort_called, true, "Double-free check didn't fire.");
 	safety_check_set_abort(NULL);
 }
@@ -29,7 +29,7 @@ TEST_BEGIN(test_large_double_free_tcache) {
 	 */
 	test_skip_if(config_debug);
 
-	test_large_double_free_pre();
+	test_double_free_pre();
 	char *ptr = malloc(SC_LARGE_MINCLASS);
 	bool guarded = extent_is_guarded(tsdn_fetch(), ptr);
 	free(ptr);
@@ -44,7 +44,7 @@ TEST_BEGIN(test_large_double_free_tcache) {
 		fake_abort_called = true;
 	}
 	mallctl("thread.tcache.flush", NULL, NULL, NULL, 0);
-	test_large_double_free_post();
+	test_double_free_post();
 }
 TEST_END
 
@@ -52,7 +52,7 @@ TEST_BEGIN(test_large_double_free_no_tcache) {
 	test_skip_if(!config_opt_safety_checks);
 	test_skip_if(config_debug);
 
-	test_large_double_free_pre();
+	test_double_free_pre();
 	char *ptr = mallocx(SC_LARGE_MINCLASS, MALLOCX_TCACHE_NONE);
 	bool guarded = extent_is_guarded(tsdn_fetch(), ptr);
 	dallocx(ptr, MALLOCX_TCACHE_NONE);
@@ -66,12 +66,45 @@ TEST_BEGIN(test_large_double_free_no_tcache) {
 		 */
 		fake_abort_called = true;
 	}
-	test_large_double_free_post();
+	test_double_free_post();
+}
+TEST_END
+
+TEST_BEGIN(test_small_double_free_tcache) {
+	test_skip_if(!config_debug);
+
+	test_skip_if(opt_debug_double_free_max_scan == 0);
+
+	bool tcache_enabled;
+	size_t sz = sizeof(tcache_enabled);
+	assert_d_eq(
+	    mallctl("thread.tcache.enabled", &tcache_enabled, &sz, NULL, 0), 0,
+	    "Unexpected mallctl failure");
+	test_skip_if(!tcache_enabled);
+
+	test_double_free_pre();
+	char *ptr = malloc(1);
+	bool guarded = extent_is_guarded(tsdn_fetch(), ptr);
+	free(ptr);
+	if (!guarded) {
+		free(ptr);
+	} else {
+		/*
+		 * Skip because guarded extents may unguard immediately on
+		 * deallocation, in which case the second free will crash before
+		 * reaching the intended safety check.
+		 */
+		fake_abort_called = true;
+	}
+	mallctl("thread.tcache.flush", NULL, NULL, NULL, 0);
+	test_double_free_post();
 }
 TEST_END
 
 int
 main(void) {
-	return test(test_large_double_free_no_tcache,
-	    test_large_double_free_tcache);
+	return test(
+	    test_large_double_free_no_tcache,
+	    test_large_double_free_tcache,
+	    test_small_double_free_tcache);
 }
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 6efc8f1b..62bd1a2d 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -325,6 +325,7 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(bool, prof_stats, prof);
 	TEST_MALLCTL_OPT(bool, prof_sys_thread_name, prof);
 	TEST_MALLCTL_OPT(ssize_t, lg_san_uaf_align, uaf_detection);
+	TEST_MALLCTL_OPT(unsigned, debug_double_free_max_scan, always);
 
 #undef TEST_MALLCTL_OPT
 }

From 42daa1ac4405a06ed79f68dc2c0ca8c5ad477ecd Mon Sep 17 00:00:00 2001
From: Guangli Dai <gdai@fb.com>
Date: Tue, 9 Aug 2022 16:39:02 -0700
Subject: [PATCH 018/395] Add double free detection using slab bitmap for debug
 build

Add a sanity check for double free issue in the arena in case that the tcache has been flushed.
---
 include/jemalloc/internal/arena_inlines_b.h | 71 ++++++++++++++++-----
 test/unit/double_free.c                     | 50 ++++++++++++---
 2 files changed, 96 insertions(+), 25 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index fa81537c..69617fb7 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -298,6 +298,54 @@ arena_dalloc_large(tsdn_t *tsdn, void *ptr, tcache_t *tcache, szind_t szind,
 	}
 }
 
+/* Find the region index of a pointer. */
+JEMALLOC_ALWAYS_INLINE size_t
+arena_slab_regind_impl(div_info_t* div_info, szind_t binind,
+    edata_t *slab, const void *ptr) {
+	size_t diff, regind;
+
+	/* Freeing a pointer outside the slab can cause assertion failure. */
+	assert((uintptr_t)ptr >= (uintptr_t)edata_addr_get(slab));
+	assert((uintptr_t)ptr < (uintptr_t)edata_past_get(slab));
+	/* Freeing an interior pointer can cause assertion failure. */
+	assert(((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab)) %
+	    (uintptr_t)bin_infos[binind].reg_size == 0);
+
+	diff = (size_t)((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab));
+
+	/* Avoid doing division with a variable divisor. */
+	regind = div_compute(div_info, diff);
+	assert(regind < bin_infos[binind].nregs);
+	return regind;
+}
+
+/* Checks whether ptr is currently active in the arena. */
+JEMALLOC_ALWAYS_INLINE bool
+arena_tcache_dalloc_small_safety_check(tsdn_t *tsdn, void *ptr) {
+	if (!config_debug) {
+		return false;
+	}
+	edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
+	szind_t binind = edata_szind_get(edata);
+	div_info_t div_info = arena_binind_div_info[binind];
+	/*
+	 * Calls the internal function arena_slab_regind_impl because the
+	 * safety check does not require a lock.
+	 */
+	size_t regind = arena_slab_regind_impl(&div_info, binind, edata, ptr);
+	slab_data_t *slab_data = edata_slab_data_get(edata);
+	const bin_info_t *bin_info = &bin_infos[binind];
+	assert(edata_nfree_get(edata) < bin_info->nregs);
+	if (unlikely(!bitmap_get(slab_data->bitmap, &bin_info->bitmap_info,
+	    regind))) {
+		safety_check_fail(
+		    "Invalid deallocation detected: the pointer being freed (%p) not "
+		    "currently active, possibly caused by double free bugs.\n", ptr);
+		return true;
+	}
+	return false;
+}
+
 JEMALLOC_ALWAYS_INLINE void
 arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
     emap_alloc_ctx_t *caller_alloc_ctx, bool slow_path) {
@@ -328,6 +376,9 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 
 	if (likely(alloc_ctx.slab)) {
 		/* Small allocation. */
+		if (arena_tcache_dalloc_small_safety_check(tsdn, ptr)) {
+			return;
+		}
 		tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr,
 		    alloc_ctx.szind, slow_path);
 	} else {
@@ -415,6 +466,9 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 
 	if (likely(alloc_ctx.slab)) {
 		/* Small allocation. */
+		if (arena_tcache_dalloc_small_safety_check(tsdn, ptr)) {
+			return;
+		}
 		tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr,
 		    alloc_ctx.szind, slow_path);
 	} else {
@@ -465,22 +519,7 @@ struct arena_dalloc_bin_locked_info_s {
 JEMALLOC_ALWAYS_INLINE size_t
 arena_slab_regind(arena_dalloc_bin_locked_info_t *info, szind_t binind,
     edata_t *slab, const void *ptr) {
-	size_t diff, regind;
-
-	/* Freeing a pointer outside the slab can cause assertion failure. */
-	assert((uintptr_t)ptr >= (uintptr_t)edata_addr_get(slab));
-	assert((uintptr_t)ptr < (uintptr_t)edata_past_get(slab));
-	/* Freeing an interior pointer can cause assertion failure. */
-	assert(((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab)) %
-	    (uintptr_t)bin_infos[binind].reg_size == 0);
-
-	diff = (size_t)((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab));
-
-	/* Avoid doing division with a variable divisor. */
-	regind = div_compute(&info->div_info, diff);
-
-	assert(regind < bin_infos[binind].nregs);
-
+	size_t regind = arena_slab_regind_impl(&info->div_info, binind, slab, ptr);
 	return regind;
 }
 
diff --git a/test/unit/double_free.c b/test/unit/double_free.c
index b52fcf90..e73efe71 100644
--- a/test/unit/double_free.c
+++ b/test/unit/double_free.c
@@ -21,6 +21,15 @@ test_double_free_post() {
 	safety_check_set_abort(NULL);
 }
 
+bool tcache_enabled() {
+	bool enabled;
+	size_t sz = sizeof(enabled);
+	assert_d_eq(
+	    mallctl("thread.tcache.enabled", &enabled, &sz, NULL, 0), 0,
+	    "Unexpected mallctl failure");
+	return enabled;
+}
+
 TEST_BEGIN(test_large_double_free_tcache) {
 	test_skip_if(!config_opt_safety_checks);
 	/*
@@ -72,15 +81,8 @@ TEST_END
 
 TEST_BEGIN(test_small_double_free_tcache) {
 	test_skip_if(!config_debug);
-
 	test_skip_if(opt_debug_double_free_max_scan == 0);
-
-	bool tcache_enabled;
-	size_t sz = sizeof(tcache_enabled);
-	assert_d_eq(
-	    mallctl("thread.tcache.enabled", &tcache_enabled, &sz, NULL, 0), 0,
-	    "Unexpected mallctl failure");
-	test_skip_if(!tcache_enabled);
+	test_skip_if(!tcache_enabled());
 
 	test_double_free_pre();
 	char *ptr = malloc(1);
@@ -101,10 +103,40 @@ TEST_BEGIN(test_small_double_free_tcache) {
 }
 TEST_END
 
+TEST_BEGIN(test_small_double_free_arena) {
+	test_skip_if(!config_debug);
+	test_skip_if(!tcache_enabled());
+
+	test_double_free_pre();
+	/*
+	 * Allocate one more pointer to keep the slab partially used after
+	 * flushing the cache.
+	 */
+	char *ptr1 = malloc(1);
+	char *ptr = malloc(1);
+	bool guarded = extent_is_guarded(tsdn_fetch(), ptr);
+	free(ptr);
+	if (!guarded) {
+		mallctl("thread.tcache.flush", NULL, NULL, NULL, 0);
+		free(ptr);
+	} else {
+		/*
+		 * Skip because guarded extents may unguard immediately on
+		 * deallocation, in which case the second free will crash before
+		 * reaching the intended safety check.
+		 */
+		fake_abort_called = true;
+	}
+	test_double_free_post();
+	free(ptr1);
+}
+TEST_END
+
 int
 main(void) {
 	return test(
 	    test_large_double_free_no_tcache,
 	    test_large_double_free_tcache,
-	    test_small_double_free_tcache);
+	    test_small_double_free_tcache,
+	    test_small_double_free_arena);
 }

From ce29b4c3d9256956a8d60302b5d1fa72c3479686 Mon Sep 17 00:00:00 2001
From: Guangli Dai <gdai@fb.com>
Date: Fri, 12 Aug 2022 11:31:07 -0700
Subject: [PATCH 019/395] Refactor the remote / cross thread cache bin stats
 reading

Refactored cache_bin.h so that only one function is racy.
---
 include/jemalloc/internal/cache_bin.h | 100 +++++++++++++-------------
 src/cache_bin.c                       |   3 +-
 2 files changed, 51 insertions(+), 52 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 87c7ea5e..ee8b1ae2 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -195,27 +195,18 @@ cache_bin_assert_earlier(cache_bin_t *bin, uint16_t earlier, uint16_t later) {
  * be associated with the position earlier in memory.
  */
 static inline uint16_t
-cache_bin_diff(cache_bin_t *bin, uint16_t earlier, uint16_t later, bool racy) {
-	/*
-	 * When it's racy, bin->low_bits_full can be modified concurrently. It
-	 * can cross the uint16_t max value and become less than
-	 * bin->low_bits_empty at the time of the check.
-	 */
-	if (!racy) {
-		cache_bin_assert_earlier(bin, earlier, later);
-	}
+cache_bin_diff(cache_bin_t *bin, uint16_t earlier, uint16_t later) {
+	cache_bin_assert_earlier(bin, earlier, later);
 	return later - earlier;
 }
 
 /*
  * Number of items currently cached in the bin, without checking ncached_max.
- * We require specifying whether or not the request is racy or not (i.e. whether
- * or not concurrent modifications are possible).
  */
 static inline cache_bin_sz_t
-cache_bin_ncached_get_internal(cache_bin_t *bin, bool racy) {
+cache_bin_ncached_get_internal(cache_bin_t *bin) {
 	cache_bin_sz_t diff = cache_bin_diff(bin,
-	    (uint16_t)(uintptr_t)bin->stack_head, bin->low_bits_empty, racy);
+	    (uint16_t)(uintptr_t)bin->stack_head, bin->low_bits_empty);
 	cache_bin_sz_t n = diff / sizeof(void *);
 	/*
 	 * We have undefined behavior here; if this function is called from the
@@ -226,7 +217,7 @@ cache_bin_ncached_get_internal(cache_bin_t *bin, bool racy) {
 	 * fast paths.  This should still be "safe" in the sense of generating
 	 * the correct assembly for the foreseeable future, though.
 	 */
-	assert(n == 0 || *(bin->stack_head) != NULL || racy);
+	assert(n == 0 || *(bin->stack_head) != NULL);
 	return n;
 }
 
@@ -237,8 +228,7 @@ cache_bin_ncached_get_internal(cache_bin_t *bin, bool racy) {
  */
 static inline cache_bin_sz_t
 cache_bin_ncached_get_local(cache_bin_t *bin, cache_bin_info_t *info) {
-	cache_bin_sz_t n = cache_bin_ncached_get_internal(bin,
-	    /* racy */ false);
+	cache_bin_sz_t n = cache_bin_ncached_get_internal(bin);
 	assert(n <= cache_bin_info_ncached_max(info));
 	return n;
 }
@@ -254,8 +244,7 @@ cache_bin_ncached_get_local(cache_bin_t *bin, cache_bin_info_t *info) {
 static inline void **
 cache_bin_empty_position_get(cache_bin_t *bin) {
 	cache_bin_sz_t diff = cache_bin_diff(bin,
-	    (uint16_t)(uintptr_t)bin->stack_head, bin->low_bits_empty,
-	    /* racy */ false);
+	    (uint16_t)(uintptr_t)bin->stack_head, bin->low_bits_empty);
 	uintptr_t empty_bits = (uintptr_t)bin->stack_head + diff;
 	void **ret = (void **)empty_bits;
 
@@ -312,7 +301,7 @@ cache_bin_assert_empty(cache_bin_t *bin, cache_bin_info_t *info) {
 static inline cache_bin_sz_t
 cache_bin_low_water_get_internal(cache_bin_t *bin) {
 	return cache_bin_diff(bin, bin->low_bits_low_water,
-	    bin->low_bits_empty, /* racy */ false) / sizeof(void *);
+	    bin->low_bits_empty) / sizeof(void *);
 }
 
 /* Returns the numeric value of low water in [0, ncached]. */
@@ -339,7 +328,7 @@ cache_bin_low_water_set(cache_bin_t *bin) {
 
 static inline void
 cache_bin_low_water_adjust(cache_bin_t *bin) {
-	if (cache_bin_ncached_get_internal(bin, /* racy */ false)
+	if (cache_bin_ncached_get_internal(bin)
 	    < cache_bin_low_water_get_internal(bin)) {
 		cache_bin_low_water_set(bin);
 	}
@@ -411,8 +400,7 @@ cache_bin_alloc(cache_bin_t *bin, bool *success) {
 
 JEMALLOC_ALWAYS_INLINE cache_bin_sz_t
 cache_bin_alloc_batch(cache_bin_t *bin, size_t num, void **out) {
-	cache_bin_sz_t n = cache_bin_ncached_get_internal(bin,
-	    /* racy */ false);
+	cache_bin_sz_t n = cache_bin_ncached_get_internal(bin);
 	if (n > num) {
 		n = (cache_bin_sz_t)num;
 	}
@@ -438,7 +426,7 @@ cache_bin_dalloc_safety_checks(cache_bin_t *bin, void *ptr) {
 		return false;
 	}
 
-	cache_bin_sz_t ncached = cache_bin_ncached_get_internal(bin, false);
+	cache_bin_sz_t ncached = cache_bin_ncached_get_internal(bin);
 	unsigned max_scan = opt_debug_double_free_max_scan < ncached
 	    ? opt_debug_double_free_max_scan
 	    : ncached;
@@ -488,8 +476,7 @@ cache_bin_stash(cache_bin_t *bin, void *ptr) {
 	/* Stash at the full position, in the [full, head) range. */
 	uint16_t low_bits_head = (uint16_t)(uintptr_t)bin->stack_head;
 	/* Wraparound handled as well. */
-	uint16_t diff = cache_bin_diff(bin, bin->low_bits_full, low_bits_head,
-	    /* racy */ false);
+	uint16_t diff = cache_bin_diff(bin, bin->low_bits_full, low_bits_head);
 	*(void **)((uintptr_t)bin->stack_head - diff) = ptr;
 
 	assert(!cache_bin_full(bin));
@@ -499,46 +486,35 @@ cache_bin_stash(cache_bin_t *bin, void *ptr) {
 	return true;
 }
 
-/*
- * Get the number of stashed pointers.
- *
- * When called from a thread not owning the TLS (i.e. racy = true), it's
- * important to keep in mind that 'bin->stack_head' and 'bin->low_bits_full' can
- * be modified concurrently and almost none assertions about their values can be
- * made.
- */
+/* Get the number of stashed pointers. */
 JEMALLOC_ALWAYS_INLINE cache_bin_sz_t
-cache_bin_nstashed_get_internal(cache_bin_t *bin, cache_bin_info_t *info,
-    bool racy) {
+cache_bin_nstashed_get_internal(cache_bin_t *bin, cache_bin_info_t *info) {
 	cache_bin_sz_t ncached_max = cache_bin_info_ncached_max(info);
 	uint16_t low_bits_low_bound = cache_bin_low_bits_low_bound_get(bin,
 	    info);
 
 	cache_bin_sz_t n = cache_bin_diff(bin, low_bits_low_bound,
-	    bin->low_bits_full, racy) / sizeof(void *);
+	    bin->low_bits_full) / sizeof(void *);
 	assert(n <= ncached_max);
 
-	if (!racy) {
-		/* Below are for assertions only. */
-		void **low_bound = cache_bin_low_bound_get(bin, info);
+	/* Below are for assertions only. */
+	void **low_bound = cache_bin_low_bound_get(bin, info);
 
-		assert((uint16_t)(uintptr_t)low_bound == low_bits_low_bound);
-		void *stashed = *(low_bound + n - 1);
-		bool aligned = cache_bin_nonfast_aligned(stashed);
+	assert((uint16_t)(uintptr_t)low_bound == low_bits_low_bound);
+	void *stashed = *(low_bound + n - 1);
+	bool aligned = cache_bin_nonfast_aligned(stashed);
 #ifdef JEMALLOC_JET
-		/* Allow arbitrary pointers to be stashed in tests. */
-		aligned = true;
+	/* Allow arbitrary pointers to be stashed in tests. */
+	aligned = true;
 #endif
-		assert(n == 0 || (stashed != NULL && aligned));
-	}
+	assert(n == 0 || (stashed != NULL && aligned));
 
 	return n;
 }
 
 JEMALLOC_ALWAYS_INLINE cache_bin_sz_t
 cache_bin_nstashed_get_local(cache_bin_t *bin, cache_bin_info_t *info) {
-	cache_bin_sz_t n = cache_bin_nstashed_get_internal(bin, info,
-	    /* racy */ false);
+	cache_bin_sz_t n = cache_bin_nstashed_get_internal(bin, info);
 	assert(n <= cache_bin_info_ncached_max(info));
 	return n;
 }
@@ -546,15 +522,39 @@ cache_bin_nstashed_get_local(cache_bin_t *bin, cache_bin_info_t *info) {
 /*
  * Obtain a racy view of the number of items currently in the cache bin, in the
  * presence of possible concurrent modifications.
+ *
+ * Note that this is the only racy function in this header.  Any other functions
+ * are assumed to be non-racy.  The "racy" term here means accessed from another
+ * thread (that is not the owner of the specific cache bin).  This only happens
+ * when gathering stats (read-only).  The only change because of the racy
+ * condition is that assertions based on mutable fields are omitted.
+ *
+ * It's important to keep in mind that 'bin->stack_head' and
+ * 'bin->low_bits_full' can be modified concurrently and almost no assertions
+ * about their values can be made.
+ *
+ * This function should not call other utility functions because the racy
+ * condition may cause unexpected / undefined behaviors in unverified utility
+ * functions.  Currently, this function calls two utility functions
+ * cache_bin_info_ncached_max and cache_bin_low_bits_low_bound_get because they
+ * help access values that will not be concurrently modified.
  */
 static inline void
 cache_bin_nitems_get_remote(cache_bin_t *bin, cache_bin_info_t *info,
     cache_bin_sz_t *ncached, cache_bin_sz_t *nstashed) {
-	cache_bin_sz_t n = cache_bin_ncached_get_internal(bin, /* racy */ true);
+	/* Racy version of cache_bin_ncached_get_internal. */
+	cache_bin_sz_t diff = bin->low_bits_empty -
+	    (uint16_t)(uintptr_t)bin->stack_head;
+	cache_bin_sz_t n = diff / sizeof(void *);
+
 	assert(n <= cache_bin_info_ncached_max(info));
 	*ncached = n;
 
-	n = cache_bin_nstashed_get_internal(bin, info, /* racy */ true);
+	/* Racy version of cache_bin_nstashed_get_internal. */
+	uint16_t low_bits_low_bound = cache_bin_low_bits_low_bound_get(bin,
+	    info);
+	n = (bin->low_bits_full - low_bits_low_bound) / sizeof(void *);
+
 	assert(n <= cache_bin_info_ncached_max(info));
 	*nstashed = n;
 	/* Note that cannot assert ncached + nstashed <= ncached_max (racy). */
diff --git a/src/cache_bin.c b/src/cache_bin.c
index 9ae072a0..a4c22bd7 100644
--- a/src/cache_bin.c
+++ b/src/cache_bin.c
@@ -84,8 +84,7 @@ cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
 	bin->low_bits_full = (uint16_t)(uintptr_t)full_position;
 	bin->low_bits_empty = (uint16_t)(uintptr_t)empty_position;
 	cache_bin_sz_t free_spots = cache_bin_diff(bin,
-	    bin->low_bits_full, (uint16_t)(uintptr_t)bin->stack_head,
-	    /* racy */ false);
+	    bin->low_bits_full, (uint16_t)(uintptr_t)bin->stack_head);
 	assert(free_spots == bin_stack_size);
 	assert(cache_bin_ncached_get_local(bin, info) == 0);
 	assert(cache_bin_empty_position_get(bin) == empty_position);

From 56ddbea270e5c73ba5a4977550e02c2b3706ae80 Mon Sep 17 00:00:00 2001
From: Abael He <abaelhe@icloud.com>
Date: Thu, 25 Aug 2022 11:12:08 +0800
Subject: [PATCH 020/395] error: implicit declaration of function
 'pthread_create_fptr_init' is invalid in C99

./autogen.sh \
&& ./configure --prefix=/usr/local  --enable-static   --enable-autogen --enable-xmalloc --with-static-libunwind=/usr/local/lib/libunwind.a --enable-lazy-lock --with-jemalloc-prefix='' \
&& make -j16

...
gcc -std=gnu11 -Werror=unknown-warning-option -Wall -Wextra -Wshorten-64-to-32 -Wsign-compare -Wundef -Wno-format-zero-length -Wpointer-arith -Wno-missing-braces -Wno-missing-field-initializers -pipe -g3 -Wimplicit-fallthrough -O3 -funroll-loops -fPIC -DPIC -c -D_REENTRANT -Iinclude -Iinclude -DJEMALLOC_NO_PRIVATE_NAMESPACE -o src/edata_cache.sym.o src/edata_cache.c
src/background_thread.c:768:6: error: implicit declaration of function 'pthread_create_fptr_init' is invalid in C99 [-Werror,-Wimplicit-function-declaration]
            pthread_create_fptr_init()) {
            ^
src/background_thread.c:768:6: note: did you mean 'pthread_create_wrapper_init'?
src/background_thread.c:34:1: note: 'pthread_create_wrapper_init' declared here
pthread_create_wrapper_init(void) {
^
1 error generated.
make: *** [src/background_thread.sym.o] Error 1
make: *** Waiting for unfinished jobs....
---
 src/background_thread.c | 66 ++++++++++++++++++++---------------------
 1 file changed, 33 insertions(+), 33 deletions(-)

diff --git a/src/background_thread.c b/src/background_thread.c
index f22174d6..3171dd31 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -46,6 +46,39 @@ pthread_create_wrapper(pthread_t *__restrict thread, const pthread_attr_t *attr,
 
 	return pthread_create_fptr(thread, attr, start_routine, arg);
 }
+
+#ifdef JEMALLOC_HAVE_DLSYM
+#include <dlfcn.h>
+#endif
+
+static bool
+pthread_create_fptr_init(void) {
+	if (pthread_create_fptr != NULL) {
+		return false;
+	}
+	/*
+	 * Try the next symbol first, because 1) when use lazy_lock we have a
+	 * wrapper for pthread_create; and 2) application may define its own
+	 * wrapper as well (and can call malloc within the wrapper).
+	 */
+#ifdef JEMALLOC_HAVE_DLSYM
+	pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create");
+#else
+	pthread_create_fptr = NULL;
+#endif
+	if (pthread_create_fptr == NULL) {
+		if (config_lazy_lock) {
+			malloc_write("<jemalloc>: Error in dlsym(RTLD_NEXT, "
+			    "\"pthread_create\")\n");
+			abort();
+		} else {
+			/* Fall back to the default symbol. */
+			pthread_create_fptr = pthread_create;
+		}
+	}
+
+	return false;
+}
 #endif /* JEMALLOC_PTHREAD_CREATE_WRAPPER */
 
 #ifndef JEMALLOC_BACKGROUND_THREAD
@@ -710,39 +743,6 @@ background_thread_stats_read(tsdn_t *tsdn, background_thread_stats_t *stats) {
 #undef BILLION
 #undef BACKGROUND_THREAD_MIN_INTERVAL_NS
 
-#ifdef JEMALLOC_HAVE_DLSYM
-#include <dlfcn.h>
-#endif
-
-static bool
-pthread_create_fptr_init(void) {
-	if (pthread_create_fptr != NULL) {
-		return false;
-	}
-	/*
-	 * Try the next symbol first, because 1) when use lazy_lock we have a
-	 * wrapper for pthread_create; and 2) application may define its own
-	 * wrapper as well (and can call malloc within the wrapper).
-	 */
-#ifdef JEMALLOC_HAVE_DLSYM
-	pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create");
-#else
-	pthread_create_fptr = NULL;
-#endif
-	if (pthread_create_fptr == NULL) {
-		if (config_lazy_lock) {
-			malloc_write("<jemalloc>: Error in dlsym(RTLD_NEXT, "
-			    "\"pthread_create\")\n");
-			abort();
-		} else {
-			/* Fall back to the default symbol. */
-			pthread_create_fptr = pthread_create;
-		}
-	}
-
-	return false;
-}
-
 /*
  * When lazy lock is enabled, we need to make sure setting isthreaded before
  * taking any background_thread locks.  This is called early in ctl (instead of

From a0734fd6ee326cd2059edbe4bca7092988a63684 Mon Sep 17 00:00:00 2001
From: Guangli Dai <gdai@fb.com>
Date: Fri, 19 Aug 2022 12:17:10 -0700
Subject: [PATCH 021/395] Making jemalloc max stack depth a runtime option

---
 include/jemalloc/internal/prof_externs.h |   1 +
 include/jemalloc/internal/prof_structs.h |   2 +-
 include/jemalloc/internal/prof_types.h   |   7 +-
 src/ctl.c                                |   3 +
 src/jemalloc.c                           |   3 +
 src/prof.c                               |   1 +
 src/prof_data.c                          |   7 +-
 src/prof_sys.c                           | 152 ++++++++++++++++++++++-
 src/stats.c                              |   1 +
 test/unit/mallctl.c                      |   1 +
 test/unit/prof_hook.c                    |   1 +
 test/unit/prof_hook.sh                   |   3 +-
 12 files changed, 171 insertions(+), 11 deletions(-)

diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index bdff1349..d1101561 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -7,6 +7,7 @@
 extern bool opt_prof;
 extern bool opt_prof_active;
 extern bool opt_prof_thread_active_init;
+extern unsigned opt_prof_bt_max;
 extern size_t opt_lg_prof_sample;    /* Mean bytes between samples. */
 extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */
 extern bool opt_prof_gdump;          /* High-water memory dumping. */
diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h
index dd22115f..9331fba4 100644
--- a/include/jemalloc/internal/prof_structs.h
+++ b/include/jemalloc/internal/prof_structs.h
@@ -202,7 +202,7 @@ struct prof_tdata_s {
 	prof_cnt_t		cnt_summed;
 
 	/* Backtrace vector, used for calls to prof_backtrace(). */
-	void			*vec[PROF_BT_MAX];
+	void 			**vec;
 };
 typedef rb_tree(prof_tdata_t) prof_tdata_tree_t;
 
diff --git a/include/jemalloc/internal/prof_types.h b/include/jemalloc/internal/prof_types.h
index ba628654..87cbb4ab 100644
--- a/include/jemalloc/internal/prof_types.h
+++ b/include/jemalloc/internal/prof_types.h
@@ -23,7 +23,12 @@ typedef struct prof_recent_s prof_recent_t;
  * is based on __builtin_return_address() necessarily has a hard-coded number
  * of backtrace frame handlers, and should be kept in sync with this setting.
  */
-#define PROF_BT_MAX			128
+#ifdef JEMALLOC_PROF_GCC
+#  define PROF_BT_MAX_LIMIT 256
+#else
+#  define PROF_BT_MAX_LIMIT UINT_MAX
+#endif
+#define PROF_BT_MAX_DEFAULT			128
 
 /* Initial hash table size. */
 #define PROF_CKH_MINITEMS		64
diff --git a/src/ctl.c b/src/ctl.c
index e942cb1a..6b03f986 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -142,6 +142,7 @@ CTL_PROTO(opt_prof)
 CTL_PROTO(opt_prof_prefix)
 CTL_PROTO(opt_prof_active)
 CTL_PROTO(opt_prof_thread_active_init)
+CTL_PROTO(opt_prof_bt_max)
 CTL_PROTO(opt_lg_prof_sample)
 CTL_PROTO(opt_lg_prof_interval)
 CTL_PROTO(opt_prof_gdump)
@@ -468,6 +469,7 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("prof_prefix"),	CTL(opt_prof_prefix)},
 	{NAME("prof_active"),	CTL(opt_prof_active)},
 	{NAME("prof_thread_active_init"), CTL(opt_prof_thread_active_init)},
+	{NAME("prof_bt_max"), CTL(opt_prof_bt_max)},
 	{NAME("lg_prof_sample"), CTL(opt_lg_prof_sample)},
 	{NAME("lg_prof_interval"), CTL(opt_lg_prof_interval)},
 	{NAME("prof_gdump"),	CTL(opt_prof_gdump)},
@@ -2205,6 +2207,7 @@ CTL_RO_NL_CGEN(config_prof, opt_prof_prefix, opt_prof_prefix, const char *)
 CTL_RO_NL_CGEN(config_prof, opt_prof_active, opt_prof_active, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_thread_active_init,
     opt_prof_thread_active_init, bool)
+CTL_RO_NL_CGEN(config_prof, opt_prof_bt_max, opt_prof_bt_max, unsigned)
 CTL_RO_NL_CGEN(config_prof, opt_lg_prof_sample, opt_lg_prof_sample, size_t)
 CTL_RO_NL_CGEN(config_prof, opt_prof_accum, opt_prof_accum, bool)
 CTL_RO_NL_CGEN(config_prof, opt_lg_prof_interval, opt_lg_prof_interval, ssize_t)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 83d69dd0..a4761c9b 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1585,6 +1585,9 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				    - 1, CONF_DONT_CHECK_MIN, CONF_CHECK_MAX,
 				    true)
 				CONF_HANDLE_BOOL(opt_prof_accum, "prof_accum")
+				CONF_HANDLE_UNSIGNED(opt_prof_bt_max, "prof_bt_max",
+				    1, PROF_BT_MAX_LIMIT, CONF_CHECK_MIN, CONF_CHECK_MAX,
+				    /* clip */ true)
 				CONF_HANDLE_SSIZE_T(opt_lg_prof_interval,
 				    "lg_prof_interval", -1,
 				    (sizeof(uint64_t) << 3) - 1)
diff --git a/src/prof.c b/src/prof.c
index 7a6d5d56..3deac0b5 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -26,6 +26,7 @@
 bool opt_prof = false;
 bool opt_prof_active = true;
 bool opt_prof_thread_active_init = true;
+unsigned opt_prof_bt_max = PROF_BT_MAX_DEFAULT;
 size_t opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT;
 ssize_t opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT;
 bool opt_prof_gdump = false;
diff --git a/src/prof_data.c b/src/prof_data.c
index bfa55be1..f8b19594 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -1167,13 +1167,16 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
 	cassert(config_prof);
 
 	/* Initialize an empty cache for this thread. */
-	tdata = (prof_tdata_t *)iallocztm(tsd_tsdn(tsd), sizeof(prof_tdata_t),
-	    sz_size2index(sizeof(prof_tdata_t)), false, NULL, true,
+	size_t tdata_sz = ALIGNMENT_CEILING(sizeof(prof_tdata_t), QUANTUM);
+	size_t total_sz = tdata_sz + sizeof(void *) * opt_prof_bt_max;
+	tdata = (prof_tdata_t *)iallocztm(tsd_tsdn(tsd),
+	    total_sz, sz_size2index(total_sz), false, NULL, true,
 	    arena_get(TSDN_NULL, 0, true), true);
 	if (tdata == NULL) {
 		return NULL;
 	}
 
+	tdata->vec = (void **)((uintptr_t)tdata + tdata_sz);
 	tdata->lock = prof_tdata_mutex_choose(thr_uid);
 	tdata->thr_uid = thr_uid;
 	tdata->thr_discrim = thr_discrim;
diff --git a/src/prof_sys.c b/src/prof_sys.c
index b5f1f5b2..99fa3a77 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -55,9 +55,9 @@ prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
 	cassert(config_prof);
 	assert(*len == 0);
 	assert(vec != NULL);
-	assert(max_len == PROF_BT_MAX);
+	assert(max_len <= PROF_BT_MAX_LIMIT);
 
-	nframes = unw_backtrace(vec, PROF_BT_MAX);
+	nframes = unw_backtrace(vec, max_len);
 	if (nframes <= 0) {
 		return;
 	}
@@ -97,13 +97,14 @@ prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
 
 	cassert(config_prof);
 	assert(vec != NULL);
-	assert(max_len == PROF_BT_MAX);
+	assert(max_len <= PROF_BT_MAX_LIMIT);
 
 	_Unwind_Backtrace(prof_unwind_callback, &data);
 }
 #elif (defined(JEMALLOC_PROF_GCC))
 static void
 prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
+/* The input arg must be a constant for __builtin_return_address. */
 #define BT_FRAME(i)							\
 	if ((i) < max_len) {						\
 		void *p;						\
@@ -122,7 +123,7 @@ prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
 
 	cassert(config_prof);
 	assert(vec != NULL);
-	assert(max_len == PROF_BT_MAX);
+	assert(max_len <= PROF_BT_MAX_LIMIT);
 
 	BT_FRAME(0)
 	BT_FRAME(1)
@@ -264,6 +265,147 @@ prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
 	BT_FRAME(125)
 	BT_FRAME(126)
 	BT_FRAME(127)
+	BT_FRAME(128)
+	BT_FRAME(129)
+
+	BT_FRAME(130)
+	BT_FRAME(131)
+	BT_FRAME(132)
+	BT_FRAME(133)
+	BT_FRAME(134)
+	BT_FRAME(135)
+	BT_FRAME(136)
+	BT_FRAME(137)
+	BT_FRAME(138)
+	BT_FRAME(139)
+
+	BT_FRAME(140)
+	BT_FRAME(141)
+	BT_FRAME(142)
+	BT_FRAME(143)
+	BT_FRAME(144)
+	BT_FRAME(145)
+	BT_FRAME(146)
+	BT_FRAME(147)
+	BT_FRAME(148)
+	BT_FRAME(149)
+
+	BT_FRAME(150)
+	BT_FRAME(151)
+	BT_FRAME(152)
+	BT_FRAME(153)
+	BT_FRAME(154)
+	BT_FRAME(155)
+	BT_FRAME(156)
+	BT_FRAME(157)
+	BT_FRAME(158)
+	BT_FRAME(159)
+
+	BT_FRAME(160)
+	BT_FRAME(161)
+	BT_FRAME(162)
+	BT_FRAME(163)
+	BT_FRAME(164)
+	BT_FRAME(165)
+	BT_FRAME(166)
+	BT_FRAME(167)
+	BT_FRAME(168)
+	BT_FRAME(169)
+
+	BT_FRAME(170)
+	BT_FRAME(171)
+	BT_FRAME(172)
+	BT_FRAME(173)
+	BT_FRAME(174)
+	BT_FRAME(175)
+	BT_FRAME(176)
+	BT_FRAME(177)
+	BT_FRAME(178)
+	BT_FRAME(179)
+
+	BT_FRAME(180)
+	BT_FRAME(181)
+	BT_FRAME(182)
+	BT_FRAME(183)
+	BT_FRAME(184)
+	BT_FRAME(185)
+	BT_FRAME(186)
+	BT_FRAME(187)
+	BT_FRAME(188)
+	BT_FRAME(189)
+
+	BT_FRAME(190)
+	BT_FRAME(191)
+	BT_FRAME(192)
+	BT_FRAME(193)
+	BT_FRAME(194)
+	BT_FRAME(195)
+	BT_FRAME(196)
+	BT_FRAME(197)
+	BT_FRAME(198)
+	BT_FRAME(199)
+
+	BT_FRAME(200)
+	BT_FRAME(201)
+	BT_FRAME(202)
+	BT_FRAME(203)
+	BT_FRAME(204)
+	BT_FRAME(205)
+	BT_FRAME(206)
+	BT_FRAME(207)
+	BT_FRAME(208)
+	BT_FRAME(209)
+
+	BT_FRAME(210)
+	BT_FRAME(211)
+	BT_FRAME(212)
+	BT_FRAME(213)
+	BT_FRAME(214)
+	BT_FRAME(215)
+	BT_FRAME(216)
+	BT_FRAME(217)
+	BT_FRAME(218)
+	BT_FRAME(219)
+
+	BT_FRAME(220)
+	BT_FRAME(221)
+	BT_FRAME(222)
+	BT_FRAME(223)
+	BT_FRAME(224)
+	BT_FRAME(225)
+	BT_FRAME(226)
+	BT_FRAME(227)
+	BT_FRAME(228)
+	BT_FRAME(229)
+
+	BT_FRAME(230)
+	BT_FRAME(231)
+	BT_FRAME(232)
+	BT_FRAME(233)
+	BT_FRAME(234)
+	BT_FRAME(235)
+	BT_FRAME(236)
+	BT_FRAME(237)
+	BT_FRAME(238)
+	BT_FRAME(239)
+
+	BT_FRAME(240)
+	BT_FRAME(241)
+	BT_FRAME(242)
+	BT_FRAME(243)
+	BT_FRAME(244)
+	BT_FRAME(245)
+	BT_FRAME(246)
+	BT_FRAME(247)
+	BT_FRAME(248)
+	BT_FRAME(249)
+
+	BT_FRAME(250)
+	BT_FRAME(251)
+	BT_FRAME(252)
+	BT_FRAME(253)
+	BT_FRAME(254)
+	BT_FRAME(255)
 #undef BT_FRAME
 }
 #else
@@ -281,7 +423,7 @@ prof_backtrace(tsd_t *tsd, prof_bt_t *bt) {
 	assert(prof_backtrace_hook != NULL);
 
 	pre_reentrancy(tsd, NULL);
-	prof_backtrace_hook(bt->vec, &bt->len, PROF_BT_MAX);
+	prof_backtrace_hook(bt->vec, &bt->len, opt_prof_bt_max);
 	post_reentrancy(tsd);
 }
 
diff --git a/src/stats.c b/src/stats.c
index d150baef..5bb1a346 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1521,6 +1521,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_UNSIGNED("debug_double_free_max_scan")
 	OPT_WRITE_CHAR_P("thp")
 	OPT_WRITE_BOOL("prof")
+	OPT_WRITE_UNSIGNED("prof_bt_max")
 	OPT_WRITE_CHAR_P("prof_prefix")
 	OPT_WRITE_BOOL_MUTABLE("prof_active", "prof.active")
 	OPT_WRITE_BOOL_MUTABLE("prof_thread_active_init",
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 62bd1a2d..14fe7993 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -314,6 +314,7 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(bool, prof, prof);
 	TEST_MALLCTL_OPT(const char *, prof_prefix, prof);
 	TEST_MALLCTL_OPT(bool, prof_active, prof);
+	TEST_MALLCTL_OPT(unsigned, prof_bt_max, prof);
 	TEST_MALLCTL_OPT(ssize_t, lg_prof_sample, prof);
 	TEST_MALLCTL_OPT(bool, prof_accum, prof);
 	TEST_MALLCTL_OPT(ssize_t, lg_prof_interval, prof);
diff --git a/test/unit/prof_hook.c b/test/unit/prof_hook.c
index 6480d930..fc06d84e 100644
--- a/test/unit/prof_hook.c
+++ b/test/unit/prof_hook.c
@@ -129,6 +129,7 @@ TEST_END
 TEST_BEGIN(test_prof_dump_hook) {
 
 	test_skip_if(!config_prof);
+	expect_u_eq(opt_prof_bt_max, 200, "Unexpected backtrace stack depth");
 
 	mock_dump_hook_called = false;
 
diff --git a/test/unit/prof_hook.sh b/test/unit/prof_hook.sh
index c7ebd8f9..48cd51a5 100644
--- a/test/unit/prof_hook.sh
+++ b/test/unit/prof_hook.sh
@@ -1,6 +1,5 @@
 #!/bin/sh
 
 if [ "x${enable_prof}" = "x1" ] ; then
-  export MALLOC_CONF="prof:true,prof_active:true,lg_prof_sample:0"
+  export MALLOC_CONF="prof:true,prof_active:true,lg_prof_sample:0,prof_bt_max:200"
 fi
-

From ba19d2cb78176ef715aca461c7a7a7b2afb35772 Mon Sep 17 00:00:00 2001
From: Guangli Dai <gdai@fb.com>
Date: Thu, 1 Sep 2022 16:42:56 -0700
Subject: [PATCH 022/395] Add arena-level name.

An arena-level name can help identify manual arenas.
---
 include/jemalloc/internal/arena_externs.h |  2 +
 include/jemalloc/internal/arena_structs.h |  3 +
 include/jemalloc/internal/arena_types.h   |  2 +
 src/arena.c                               | 21 +++++++
 src/ctl.c                                 | 67 +++++++++++++++++++++--
 src/stats.c                               | 13 ++++-
 test/include/test/test.h                  |  4 +-
 test/unit/mallctl.c                       | 43 +++++++++++++++
 8 files changed, 146 insertions(+), 9 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index e6fceaaf..9f5c1958 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -88,6 +88,8 @@ ehooks_t *arena_get_ehooks(arena_t *arena);
 extent_hooks_t *arena_set_extent_hooks(tsd_t *tsd, arena_t *arena,
     extent_hooks_t *extent_hooks);
 bool arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec);
+void arena_name_get(arena_t *arena, char *name);
+void arena_name_set(arena_t *arena, const char *name);
 ssize_t arena_dirty_decay_ms_default_get(void);
 bool arena_dirty_decay_ms_default_set(ssize_t decay_ms);
 ssize_t arena_muzzy_decay_ms_default_get(void);
diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index e2a5a408..e6868fce 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -91,6 +91,9 @@ struct arena_s {
 	/* Used to determine uptime.  Read-only after initialization. */
 	nstime_t		create_time;
 
+	/* The name of the arena. */
+	char 			name[ARENA_NAME_LEN];
+
 	/*
 	 * The arena is allocated alongside its bins; really this is a
 	 * dynamically sized array determined by the binshard settings.
diff --git a/include/jemalloc/internal/arena_types.h b/include/jemalloc/internal/arena_types.h
index d0e12917..45eec69f 100644
--- a/include/jemalloc/internal/arena_types.h
+++ b/include/jemalloc/internal/arena_types.h
@@ -8,6 +8,8 @@
 #define MUZZY_DECAY_MS_DEFAULT	(0)
 /* Number of event ticks between time checks. */
 #define ARENA_DECAY_NTICKS_PER_UPDATE	1000
+/* Maximum length of the arena name. */
+#define ARENA_NAME_LEN 32
 
 typedef struct arena_decay_s arena_decay_t;
 typedef struct arena_s arena_t;
diff --git a/src/arena.c b/src/arena.c
index 1ab2775e..25ab41af 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1547,6 +1547,22 @@ arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec) {
 	return false;
 }
 
+void
+arena_name_get(arena_t *arena, char *name) {
+	char *end = (char *)memchr((void *)arena->name, '\0', ARENA_NAME_LEN);
+	assert(end != NULL);
+	size_t len = (uintptr_t)end - (uintptr_t)arena->name + 1;
+	assert(len > 0 && len <= ARENA_NAME_LEN);
+
+	strncpy(name, arena->name, len);
+}
+
+void
+arena_name_set(arena_t *arena, const char *name) {
+	strncpy(arena->name, name, ARENA_NAME_LEN);
+	arena->name[ARENA_NAME_LEN - 1] = '\0';
+}
+
 ssize_t
 arena_dirty_decay_ms_default_get(void) {
 	return atomic_load_zd(&dirty_decay_ms_default, ATOMIC_RELAXED);
@@ -1670,6 +1686,11 @@ arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) {
 	arena_set(ind, arena);
 	arena->ind = ind;
 
+	/* Init the name. */
+	malloc_snprintf(arena->name, sizeof(arena->name), "%s_%u",
+	    arena_is_auto(arena) ? "auto" : "manual", arena->ind);
+	arena->name[ARENA_NAME_LEN - 1] = '\0';
+
 	nstime_init_update(&arena->create_time);
 
 	/*
diff --git a/src/ctl.c b/src/ctl.c
index 6b03f986..acf5d366 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -170,6 +170,7 @@ CTL_PROTO(arena_i_dirty_decay_ms)
 CTL_PROTO(arena_i_muzzy_decay_ms)
 CTL_PROTO(arena_i_extent_hooks)
 CTL_PROTO(arena_i_retain_grow_limit)
+CTL_PROTO(arena_i_name)
 INDEX_PROTO(arena_i)
 CTL_PROTO(arenas_bin_i_size)
 CTL_PROTO(arenas_bin_i_nregs)
@@ -504,11 +505,12 @@ static const ctl_named_node_t arena_i_node[] = {
 	 * Undocumented for now, since we anticipate an arena API in flux after
 	 * we cut the last 5-series release.
 	 */
-	{NAME("oversize_threshold"), CTL(arena_i_oversize_threshold)},
-	{NAME("dirty_decay_ms"), CTL(arena_i_dirty_decay_ms)},
-	{NAME("muzzy_decay_ms"), CTL(arena_i_muzzy_decay_ms)},
-	{NAME("extent_hooks"),	CTL(arena_i_extent_hooks)},
-	{NAME("retain_grow_limit"),	CTL(arena_i_retain_grow_limit)}
+	{NAME("oversize_threshold"),	CTL(arena_i_oversize_threshold)},
+	{NAME("dirty_decay_ms"),	CTL(arena_i_dirty_decay_ms)},
+	{NAME("muzzy_decay_ms"),	CTL(arena_i_muzzy_decay_ms)},
+	{NAME("extent_hooks"),		CTL(arena_i_extent_hooks)},
+	{NAME("retain_grow_limit"),	CTL(arena_i_retain_grow_limit)},
+	{NAME("name"),			CTL(arena_i_name)}
 };
 static const ctl_named_node_t super_arena_i_node[] = {
 	{NAME(""),		CHILD(named, arena_i)}
@@ -2983,6 +2985,61 @@ label_return:
 	return ret;
 }
 
+/*
+ * When writing, newp should point to a char array storing the name to be set.
+ * A name longer than ARENA_NAME_LEN will be arbitrarily cut. When reading,
+ * oldp should point to a char array whose length is no shorter than
+ * ARENA_NAME_LEN or the length of the name when it was set.
+ */
+static int
+arena_i_name_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+	unsigned arena_ind;
+	char *name;
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
+	MIB_UNSIGNED(arena_ind, 1);
+	if (arena_ind == MALLCTL_ARENAS_ALL || arena_ind >=
+	    ctl_arenas->narenas) {
+		ret = EINVAL;
+		goto label_return;
+	}
+	arena_t *arena = arena_get(tsd_tsdn(tsd), arena_ind, false);
+	if (arena == NULL) {
+		ret = EFAULT;
+		goto label_return;
+	}
+
+	if (oldp != NULL && oldlenp != NULL) {
+		/*
+		 * Read the arena name.  When reading, the input oldp should
+		 * point to an array with a length no shorter than
+		 * ARENA_NAME_LEN or the length when it was set.
+		 */
+		if (*oldlenp != sizeof(char *)) {
+			ret = EINVAL;
+			goto label_return;
+		}
+		name = *(char **)oldp;
+		arena_name_get(arena, name);
+	}
+
+	if (newp != NULL) {
+		/* Write the arena name. */
+		WRITE(name, char *);
+		if (name == NULL) {
+			ret = EINVAL;
+			goto label_return;
+		}
+		arena_name_set(arena, name);
+	}
+	ret = 0;
+label_return:
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
+	return ret;
+}
+
 static const ctl_named_node_t *
 arena_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
     size_t i) {
diff --git a/src/stats.c b/src/stats.c
index 5bb1a346..701a6c86 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -42,15 +42,18 @@ const char *arena_mutex_names[mutex_prof_num_arena_mutexes] = {
 	assert(miblen_new == miblen + 1);				\
 } while (0)
 
-#define CTL_M2_GET(n, i, v, t) do {					\
+#define CTL_MIB_GET(n, i, v, t, ind) do {				\
 	size_t mib[CTL_MAX_DEPTH];					\
 	size_t miblen = sizeof(mib) / sizeof(size_t);			\
 	size_t sz = sizeof(t);						\
 	xmallctlnametomib(n, mib, &miblen);				\
-	mib[2] = (i);							\
+	mib[(ind)] = (i);							\
 	xmallctlbymib(mib, miblen, (void *)v, &sz, NULL, 0);		\
 } while (0)
 
+#define CTL_M1_GET(n, i, v, t) CTL_MIB_GET(n, i, v, t, 1)
+#define CTL_M2_GET(n, i, v, t) CTL_MIB_GET(n, i, v, t, 2)
+
 /******************************************************************************/
 /* Data. */
 
@@ -1042,6 +1045,8 @@ JEMALLOC_COLD
 static void
 stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
     bool mutex, bool extents, bool hpa) {
+	char name[ARENA_NAME_LEN];
+	char *namep = name;
 	unsigned nthreads;
 	const char *dss;
 	ssize_t dirty_decay_ms, muzzy_decay_ms;
@@ -1059,6 +1064,10 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	uint64_t uptime;
 
 	CTL_GET("arenas.page", &page, size_t);
+	if (i != MALLCTL_ARENAS_ALL && i != MALLCTL_ARENAS_DESTROYED) {
+		CTL_M1_GET("arena.0.name", i, (void *)&namep, const char *);
+		emitter_kv(emitter, "name", "name", emitter_type_string, &namep);
+	}
 
 	CTL_M2_GET("stats.arenas.0.nthreads", i, &nthreads, unsigned);
 	emitter_kv(emitter, "nthreads", "assigned threads",
diff --git a/test/include/test/test.h b/test/include/test/test.h
index d4b65912..54610dab 100644
--- a/test/include/test/test.h
+++ b/test/include/test/test.h
@@ -266,7 +266,7 @@
 #define expect_false(a, ...)	expect_b_eq(a, false, __VA_ARGS__)
 
 #define verify_str_eq(may_abort, a, b, ...) do {			\
-	if (strcmp((a), (b))) {						\
+	if (strcmp((a), (b)) != 0) {						\
 		char prefix[ASSERT_BUFSIZE];				\
 		char message[ASSERT_BUFSIZE];				\
 		malloc_snprintf(prefix, sizeof(prefix),			\
@@ -284,7 +284,7 @@
 } while (0)
 
 #define verify_str_ne(may_abort, a, b, ...) do {			\
-	if (!strcmp((a), (b))) {					\
+	if (strcmp((a), (b)) == 0) {					\
 		char prefix[ASSERT_BUFSIZE];				\
 		char message[ASSERT_BUFSIZE];				\
 		malloc_snprintf(prefix, sizeof(prefix),			\
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 14fe7993..244d4c96 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -711,6 +711,48 @@ TEST_BEGIN(test_arena_i_dss) {
 }
 TEST_END
 
+TEST_BEGIN(test_arena_i_name) {
+	unsigned arena_ind;
+	size_t ind_sz = sizeof(arena_ind);
+	size_t mib[3];
+	size_t miblen;
+	char name_old[ARENA_NAME_LEN];
+	char *name_oldp = name_old;
+	size_t sz = sizeof(name_oldp);
+	char default_name[ARENA_NAME_LEN];
+	const char *name_new = "test name";
+	const char *super_long_name = "A name longer than ARENA_NAME_LEN";
+	size_t super_long_name_len = strlen(super_long_name);
+	assert(super_long_name_len > ARENA_NAME_LEN);
+
+	miblen = sizeof(mib)/sizeof(size_t);
+	expect_d_eq(mallctlnametomib("arena.0.name", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() error");
+
+	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &ind_sz, NULL,
+	    0), 0, "Unexpected mallctl() failure");
+	mib[1] = arena_ind;
+
+	malloc_snprintf(default_name, sizeof(default_name), "manual_%u",
+	    arena_ind);
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&name_oldp, &sz,
+	    (void *)&name_new, sizeof(name_new)), 0,
+	    "Unexpected mallctl() failure");
+	expect_str_eq(name_old, default_name,
+	    "Unexpected default value for arena name");
+
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&name_oldp, &sz,
+	    (void *)&super_long_name, sizeof(super_long_name)), 0,
+	    "Unexpected mallctl() failure");
+	expect_str_eq(name_old, name_new, "Unexpected value for arena name");
+
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&name_oldp, &sz,
+	    NULL, 0), 0, "Unexpected mallctl() failure");
+	int cmp = strncmp(name_old, super_long_name, ARENA_NAME_LEN - 1);
+	expect_true(cmp == 0, "Unexpected value for long arena name ");
+}
+TEST_END
+
 TEST_BEGIN(test_arena_i_retain_grow_limit) {
 	size_t old_limit, new_limit, default_limit;
 	size_t mib[3];
@@ -1258,6 +1300,7 @@ main(void) {
 	    test_arena_i_purge,
 	    test_arena_i_decay,
 	    test_arena_i_dss,
+	    test_arena_i_name,
 	    test_arena_i_retain_grow_limit,
 	    test_arenas_dirty_decay_ms,
 	    test_arenas_muzzy_decay_ms,

From c9ac1f4701d621c3f39b94970fa96ce23897a295 Mon Sep 17 00:00:00 2001
From: Guangli Dai <gdai@fb.com>
Date: Fri, 16 Sep 2022 11:39:34 -0700
Subject: [PATCH 023/395] Fix a bug in C++ integration test.

---
 test/integration/cpp/infallible_new_true.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/test/integration/cpp/infallible_new_true.cpp b/test/integration/cpp/infallible_new_true.cpp
index 3976f08b..3b2862bd 100644
--- a/test/integration/cpp/infallible_new_true.cpp
+++ b/test/integration/cpp/infallible_new_true.cpp
@@ -10,7 +10,7 @@ typedef void (*abort_hook_t)(const char *message);
 bool fake_abort_called;
 void fake_abort(const char *message) {
 	const char *expected_start = "<jemalloc>: Allocation of size";
-	if (strncmp(message, expected_start, strlen(expected_start) != 0)) {
+	if (strncmp(message, expected_start, strlen(expected_start)) != 0) {
 		abort();
 	}
 	fake_abort_called = true;
@@ -64,4 +64,3 @@ main(void) {
 	return test(
 	    test_failing_alloc);
 }
-

From c0c9783ec9289e6d1de749ff20081af65bdd78b8 Mon Sep 17 00:00:00 2001
From: Lily Wang <494550702@qq.com>
Date: Fri, 9 Sep 2022 02:04:23 -0700
Subject: [PATCH 024/395] Add vcpkg installation instructions

---
 INSTALL.md | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/INSTALL.md b/INSTALL.md
index 9dffa646..f772dd09 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -476,6 +476,23 @@ If you're having any issues with the above, ensure the following:
    variables. Otherwise, check the path to the `vcvarsall.bat` in `$vcvarsall`
    script and fix it.
 
+### Building from vcpkg
+
+The jemalloc port in vcpkg is kept up to date by Microsoft team members and
+community contributors. The url of vcpkg is: https://github.com/Microsoft/vcpkg
+. You can download and install jemalloc using the vcpkg dependency manager:
+
+```shell
+git clone https://github.com/Microsoft/vcpkg.git
+cd vcpkg
+./bootstrap-vcpkg.sh  # ./bootstrap-vcpkg.bat for Windows
+./vcpkg integrate install
+./vcpkg install jemalloc
+```
+
+If the version is out of date, please [create an issue or pull
+request](https://github.com/Microsoft/vcpkg) on the vcpkg repository.
+
 ## Development
 
 If you intend to make non-trivial changes to jemalloc, use the 'autogen.sh'

From 3de0c24859f4413bf03448249078169bb50bda0f Mon Sep 17 00:00:00 2001
From: divanorama <divanorama@gmail.com>
Date: Thu, 29 Sep 2022 23:35:59 +0200
Subject: [PATCH 025/395] Disable builtin malloc in tests

With `--with-jemalloc-prefix=` and without `-fno-builtin` or `-O1` both clang and gcc may optimize out `malloc` calls
whose result is unused. Comparing result to NULL also doesn't necessarily count as being used.

This won't be a problem in most client programs as this only concerns really unused pointers, but in
tests it's important to actually execute allocations.
`-fno-builtin` should disable this optimization for both gcc and clang, and applying it only to tests code shouldn't hopefully be an issue.
Another alternative is to force "use" of result but that'd require more changes and may miss some other optimization-related issues.

This should resolve https://github.com/jemalloc/jemalloc/issues/2091
---
 Makefile.in | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Makefile.in b/Makefile.in
index 6809fb29..a964f07e 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -458,6 +458,8 @@ $(TESTS_OBJS): $(objroot)test/%.$(O): $(srcroot)test/%.c
 $(TESTS_CPP_OBJS): $(objroot)test/%.$(O): $(srcroot)test/%.cpp
 $(TESTS_OBJS): CPPFLAGS += -I$(srcroot)test/include -I$(objroot)test/include
 $(TESTS_CPP_OBJS): CPPFLAGS += -I$(srcroot)test/include -I$(objroot)test/include
+$(TESTS_OBJS): CFLAGS += -fno-builtin
+$(TESTS_CPP_OBJS): CPPFLAGS += -fno-builtin
 ifneq ($(IMPORTLIB),$(SO))
 $(CPP_OBJS) $(C_SYM_OBJS) $(C_OBJS) $(C_JET_SYM_OBJS) $(C_JET_OBJS): CPPFLAGS += -DDLLEXPORT
 endif

From 4c95c953e2c4b443d930d3b41abb17eb38f075f5 Mon Sep 17 00:00:00 2001
From: David Carlier <devnexen@gmail.com>
Date: Sat, 1 Oct 2022 20:35:29 +0100
Subject: [PATCH 026/395] fix build for non linux/BSD platforms.

---
 configure.ac                                          | 9 +++++++++
 include/jemalloc/internal/jemalloc_internal_defs.h.in | 3 +++
 src/background_thread.c                               | 6 +++---
 3 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/configure.ac b/configure.ac
index 0ae579ee..64c0c847 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2024,6 +2024,15 @@ if test "x$have_sched_setaffinity" = "x1" ; then
   AC_DEFINE([JEMALLOC_HAVE_SCHED_SETAFFINITY], [ ], [ ])
 fi
 
+dnl Check if the pthread_setaffinity_np function exists.
+AC_CHECK_FUNC([pthread_setaffinity_np],
+              [have_pthread_setaffinity_np="1"],
+              [have_pthread_setaffinity_np="0"]
+             )
+if test "x$have_pthread_setaffinity_np" = "x1" ; then
+  AC_DEFINE([JEMALLOC_HAVE_PTHREAD_SETAFFINITY_NP], [ ], [ ])
+fi
+
 dnl Check if the Solaris/BSD issetugid function exists.
 AC_CHECK_FUNC([issetugid],
               [have_issetugid="1"],
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 6dbd8780..f5d94ee7 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -391,6 +391,9 @@
 /* GNU specific sched_setaffinity support */
 #undef JEMALLOC_HAVE_SCHED_SETAFFINITY
 
+/* pthread_setaffinity_np support */
+#undef JEMALLOC_HAVE_PTHREAD_SETAFFINITY_NP
+
 /*
  * If defined, all the features necessary for background threads are present.
  */
diff --git a/src/background_thread.c b/src/background_thread.c
index 3171dd31..3c006cec 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -113,9 +113,7 @@ background_thread_info_init(tsdn_t *tsdn, background_thread_info_t *info) {
 
 static inline bool
 set_current_thread_affinity(int cpu) {
-#ifdef __OpenBSD__
-	return false;
-#else
+#if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY) || defined(JEMALLOC_HAVE_PTHREAD_SETAFFINITY_NP)
 #if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
 	cpu_set_t cpuset;
 #else
@@ -146,6 +144,8 @@ set_current_thread_affinity(int cpu) {
 #  endif
 	return ret != 0;
 #endif
+#else
+        return false;
 #endif
 }
 

From b04e7666f2f29de096a170c49cb49cd8f308b7e1 Mon Sep 17 00:00:00 2001
From: Jordan Rome <jordalgo@fb.com>
Date: Thu, 29 Sep 2022 10:07:47 -0400
Subject: [PATCH 027/395] update PROFILING_INTERNALS.md

Expand the bad example of summing before unbiasing.
---
 doc_internal/PROFILING_INTERNALS.md | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/doc_internal/PROFILING_INTERNALS.md b/doc_internal/PROFILING_INTERNALS.md
index 0a9f31c0..f337fb88 100644
--- a/doc_internal/PROFILING_INTERNALS.md
+++ b/doc_internal/PROFILING_INTERNALS.md
@@ -99,7 +99,25 @@ Using this approach means that there are a few things users need to be aware of.
 If one stack appears twice as often as another, this by itself does not imply that it allocates twice as often. Consider the case in which there are only two types of allocating call stacks in a program. Stack A allocates 8 bytes, and occurs a million times in a program. Stack B allocates 8 MB, and occurs just once in a program. If our sampling rate $R$ is about 1MB, we expect stack A to show up about 8 times, and stack B to show up once. Stack A isn't 8 times more frequent than stack B, though; it's a million times more frequent.
 
 ### Aggregation must be done after unbiasing samples
-Some tools manually parse heap dump output, and aggregate across stacks (or across program runs) to provide wider-scale data analyses. When doing this aggregation, though, it's important to unbias-and-then-sum, rather than sum-and-then-unbias. Reusing our example from the previous section: suppose we collect heap dumps of the program from a million machines. We then have 8 million occurs of stack A (each of 8 bytes), and a million occurrences of stack B (each of 8 MB). If we sum first, we'll attribute 64 MB to stack A, and 8 TB to stack B. Unbiasing changes these numbers by an infinitesimal amount, so that sum-then-unbias dramatically underreports the amount of memory allocated by stack A.
+Some tools manually parse heap dump output, and aggregate across stacks (or across program runs) to provide wider-scale data analyses. When doing this aggregation, though, it's important to unbias-and-then-sum, rather than sum-and-then-unbias. Reusing our example from the previous section: suppose we collect heap dumps of the program from 1 million machines. We then have 8 million samples of stack A (8 per machine, each of 8 bytes), and 1 million samples of stack B (1 per machine, each of 8 MB).
+
+If we sum first then unbias based on this formula: $1 - e^{-Z/R}$ we get:
+
+$$Z = 8,000,000 * 8 bytes = 64MB$$
+$$64MB / (1 - e^{-64MB/1MB}) \approx 64MB (Stack A)$$
+
+$$Z = 1,000,000 * 8MB = 8TB$$
+$$8TB / (1 - e^{-1TB/1MB}) \approx 8TB (Stack B)$$
+
+Clearly we are unbiasing by an infinitesimal amount, which dramatically underreports the amount of memory allocated by stack A. Whereas if we unbias first and then sum:
+
+$$Z = 8 bytes$$
+$$8 bytes / (1 - e^{-8 bytes/1MB}) \approx 1MB$$
+$$1MB * 8,000,000 = 8TB (Stack A)$$
+
+$$Z = 8MB$$
+$$8MB / (1 - e^{-8MB/1MB})  \approx 8MB$$
+$$8MB * 1,000,000 = 8TB (Stack B)$$
 
 ## An avenue for future exploration
 While the framework we laid out above is pretty general, as an engineering decision we're only interested in fairly simple approaches (i.e. ones for which the chance of an allocation being sampled depends only on its size). Our job is then: for each size class $Z$, pick a probability $p_Z$ that an allocation of that size will be sampled. We made some handwave-y references to statistical distributions to justify our choices, but there's no reason we need to pick them that way. Any set of non-zero probabilities is a valid choice.

From 1897f185d2c06307fefc4d8f4512eeb13c474999 Mon Sep 17 00:00:00 2001
From: divanorama <divanorama@gmail.com>
Date: Mon, 26 Sep 2022 09:46:34 +0200
Subject: [PATCH 028/395] Fix safety_check segfault in double free test

---
 include/jemalloc/internal/arena_inlines_b.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 69617fb7..c9d7db86 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -225,8 +225,7 @@ large_dalloc_safety_checks(edata_t *edata, void *ptr, szind_t szind) {
 	    edata_state_get(edata) != extent_state_active)) {
 		safety_check_fail("Invalid deallocation detected: "
 		    "pages being freed (%p) not currently active, "
-		    "possibly caused by double free bugs.",
-		    (uintptr_t)edata_addr_get(edata));
+		    "possibly caused by double free bugs.", ptr);
 		return true;
 	}
 	size_t input_size = sz_index2size(szind);

From be65438f20a5fe4fdc5c5bb2cfa7ba3f0e9da378 Mon Sep 17 00:00:00 2001
From: Paul Smith <paul@mad-scientist.net>
Date: Fri, 14 Oct 2022 14:33:08 -0400
Subject: [PATCH 029/395] jemalloc_internal_types.h: Use alloca if
 __STDC_NO_VLA__ is defined

No currently-available version of Visual Studio C compiler supports
variable length arrays, even if it defines __STDC_VERSION__ >= C99.
As far as I know Microsoft has no plans to ever support VLAs in MSVC.

The C11 standard requires that the __STDC_NO_VLA__ macro be defined if
the compiler doesn't support VLAs, so fall back to alloca() if so.
---
 include/jemalloc/internal/jemalloc_internal_types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_types.h b/include/jemalloc/internal/jemalloc_internal_types.h
index 62c2b59c..b23a8bed 100644
--- a/include/jemalloc/internal/jemalloc_internal_types.h
+++ b/include/jemalloc/internal/jemalloc_internal_types.h
@@ -110,7 +110,7 @@ typedef enum malloc_init_e malloc_init_t;
 	(((s) + (alignment - 1)) & ((~(alignment)) + 1))
 
 /* Declare a variable-length array. */
-#if __STDC_VERSION__ < 199901L
+#if __STDC_VERSION__ < 199901L || defined(__STDC_NO_VLA__)
 #  ifdef _MSC_VER
 #    include <malloc.h>
 #    define alloca _alloca

From 143e9c4a2f4eb8916e9802323485fd91260fd17c Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 21 Oct 2022 15:10:48 -0700
Subject: [PATCH 030/395] Enable fast thread locals for dealloc-only threads.

Previously if a thread does only allocations, it stays on the slow path /
minimal initialized state forever.  However, dealloc-only is a valid pattern for
dedicated reclamation threads -- this means thread cache is disabled (no batched
flush) for them, which causes high overhead and contention.

Added the condition to fully initialize TSD when a fair amount of dealloc
activities are observed.
---
 include/jemalloc/internal/tsd.h |  4 +++
 src/tsd.c                       | 18 ++++++++++-
 test/unit/tsd.c                 | 56 +++++++++++++++++++++++++++++++++
 3 files changed, 77 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 66d68822..c6bf28fc 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -59,6 +59,7 @@ typedef ql_elm(tsd_t) tsd_link_t;
 #define TSD_DATA_SLOW							\
     O(tcache_enabled,		bool,			bool)		\
     O(reentrancy_level,		int8_t,			int8_t)		\
+    O(min_init_state_nfetched,		uint8_t,	uint8_t)	\
     O(thread_allocated_last_event,	uint64_t,	uint64_t)	\
     O(thread_allocated_next_event,	uint64_t,	uint64_t)	\
     O(thread_deallocated_last_event,	uint64_t,	uint64_t)	\
@@ -91,6 +92,7 @@ typedef ql_elm(tsd_t) tsd_link_t;
 #define TSD_DATA_SLOW_INITIALIZER					\
     /* tcache_enabled */	TCACHE_ENABLED_ZERO_INITIALIZER,	\
     /* reentrancy_level */	0,					\
+    /* min_init_state_nfetched */	0,				\
     /* thread_allocated_last_event */	0,				\
     /* thread_allocated_next_event */	0,				\
     /* thread_deallocated_last_event */	0,				\
@@ -177,6 +179,8 @@ void tsd_global_slow_inc(tsdn_t *tsdn);
 void tsd_global_slow_dec(tsdn_t *tsdn);
 bool tsd_global_slow();
 
+#define TSD_MIN_INIT_STATE_MAX_FETCHED (128)
+
 enum {
 	/* Common case --> jnz. */
 	tsd_state_nominal = 0,
diff --git a/src/tsd.c b/src/tsd.c
index e8e4f3a3..cef7ba58 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -300,9 +300,25 @@ tsd_fetch_slow(tsd_t *tsd, bool minimal) {
 			tsd_state_set(tsd, tsd_state_minimal_initialized);
 			tsd_set(tsd);
 			tsd_data_init_nocleanup(tsd);
+			*tsd_min_init_state_nfetchedp_get(tsd) = 1;
 		}
 	} else if (tsd_state_get(tsd) == tsd_state_minimal_initialized) {
-		if (!minimal) {
+		/*
+		 * If a thread only ever deallocates (e.g. dedicated reclamation
+		 * threads), we want to help it to eventually escape the slow
+		 * path (caused by the minimal initialized state).  The nfetched
+		 * counter tracks the number of times the tsd has been accessed
+		 * under the min init state, and triggers the switch to nominal
+		 * once reached the max allowed count.
+		 *
+		 * This means at most 128 deallocations stay on the slow path.
+		 *
+		 * Also see comments in free_default().
+		 */
+		uint8_t *nfetched = tsd_min_init_state_nfetchedp_get(tsd);
+		assert(*nfetched >= 1);
+		(*nfetched)++;
+		if (!minimal || *nfetched == TSD_MIN_INIT_STATE_MAX_FETCHED) {
 			/* Switch to fully initialized. */
 			tsd_state_set(tsd, tsd_state_nominal);
 			assert(*tsd_reentrancy_levelp_get(tsd) >= 1);
diff --git a/test/unit/tsd.c b/test/unit/tsd.c
index 205d8708..bb5cd9f6 100644
--- a/test/unit/tsd.c
+++ b/test/unit/tsd.c
@@ -136,6 +136,61 @@ TEST_BEGIN(test_tsd_reincarnation) {
 }
 TEST_END
 
+static void *
+thd_start_dalloc_only(void *arg) {
+	void **ptrs = (void **)arg;
+
+	tsd_t *tsd = tsd_fetch_min();
+	if (tsd_state_get(tsd) != tsd_state_minimal_initialized) {
+		/* Allocation happened implicitly. */
+		expect_u_eq(tsd_state_get(tsd), tsd_state_nominal,
+		    "TSD state should be nominal");
+		return NULL;
+	}
+
+	void *ptr;
+	for (size_t i = 0; (ptr = ptrs[i]) != NULL; i++) {
+		/* Offset by 1 because of the manual tsd_fetch_min above. */
+		if (i + 1 < TSD_MIN_INIT_STATE_MAX_FETCHED) {
+			expect_u_eq(tsd_state_get(tsd),
+			    tsd_state_minimal_initialized,
+			    "TSD should be minimal initialized");
+		} else {
+			/* State may be nominal or nominal_slow. */
+			expect_true(tsd_nominal(tsd), "TSD should be nominal");
+		}
+		free(ptr);
+	}
+
+	return NULL;
+}
+
+static void
+test_sub_thread_n_dalloc(size_t nptrs) {
+	void **ptrs = (void **)malloc(sizeof(void *) * (nptrs + 1));
+	for (size_t i = 0; i < nptrs; i++) {
+		ptrs[i] = malloc(8);
+	}
+	ptrs[nptrs] = NULL;
+
+	thd_t thd;
+	thd_create(&thd, thd_start_dalloc_only, (void *)ptrs);
+	thd_join(thd, NULL);
+	free(ptrs);
+}
+
+TEST_BEGIN(test_tsd_sub_thread_dalloc_only) {
+	test_sub_thread_n_dalloc(1);
+	test_sub_thread_n_dalloc(16);
+	test_sub_thread_n_dalloc(TSD_MIN_INIT_STATE_MAX_FETCHED - 2);
+	test_sub_thread_n_dalloc(TSD_MIN_INIT_STATE_MAX_FETCHED - 1);
+	test_sub_thread_n_dalloc(TSD_MIN_INIT_STATE_MAX_FETCHED);
+	test_sub_thread_n_dalloc(TSD_MIN_INIT_STATE_MAX_FETCHED + 1);
+	test_sub_thread_n_dalloc(TSD_MIN_INIT_STATE_MAX_FETCHED + 2);
+	test_sub_thread_n_dalloc(TSD_MIN_INIT_STATE_MAX_FETCHED * 2);
+}
+TEST_END
+
 typedef struct {
 	atomic_u32_t phase;
 	atomic_b_t error;
@@ -269,6 +324,7 @@ main(void) {
 	return test_no_reentrancy(
 	    test_tsd_main_thread,
 	    test_tsd_sub_thread,
+	    test_tsd_sub_thread_dalloc_only,
 	    test_tsd_reincarnation,
 	    test_tsd_global_slow);
 }

From 481bbfc9906e7744716677edd49d0d6c22556a1a Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 2 Nov 2022 16:09:06 -0700
Subject: [PATCH 031/395] Add a configure option --enable-force-getenv.

Allows the use of getenv() rather than secure_getenv() to read MALLOC_CONF.
This helps in situations where hosts are under full control, and setting
MALLOC_CONF is needed while also setuid.  Disabled by default.
---
 configure.ac                                     | 16 ++++++++++++++++
 .../internal/jemalloc_internal_defs.h.in         |  6 ++++++
 src/jemalloc.c                                   | 16 ++++++++++------
 3 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/configure.ac b/configure.ac
index 64c0c847..846a049c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1571,6 +1571,22 @@ if test "x$enable_readlinkat" = "x1" ; then
 fi
 AC_SUBST([enable_readlinkat])
 
+dnl Do not force getenv by default
+AC_ARG_ENABLE([force-getenv],
+  [AS_HELP_STRING([--enable-force-getenv], [Use getenv over secure_getenv])],
+[if test "x$enable_force_getenv" = "xno" ; then
+  enable_force_getenv="0"
+else
+  enable_force_getenv="1"
+fi
+],
+[enable_force_getenv="0"]
+)
+if test "x$enable_force_getenv" = "x1" ; then
+  AC_DEFINE([JEMALLOC_FORCE_GETENV], [ ], [ ])
+fi
+AC_SUBST([force_getenv])
+
 dnl Avoid extra safety checks by default
 AC_ARG_ENABLE([opt-safety-checks],
   [AS_HELP_STRING([--enable-opt-safety-checks],
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index f5d94ee7..55938433 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -266,6 +266,12 @@
  */
 #undef JEMALLOC_READLINKAT
 
+/*
+ * If defined, use getenv() (instead of secure_getenv() or
+ * alternatives) to access MALLOC_CONF.
+ */
+#undef JEMALLOC_FORCE_GETENV
+
 /*
  * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings.
  */
diff --git a/src/jemalloc.c b/src/jemalloc.c
index a4761c9b..039be40f 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -703,16 +703,20 @@ check_entry_exit_locking(tsdn_t *tsdn) {
  */
 
 static char *
-jemalloc_secure_getenv(const char *name) {
-#ifdef JEMALLOC_HAVE_SECURE_GETENV
-	return secure_getenv(name);
+jemalloc_getenv(const char *name) {
+#ifdef JEMALLOC_FORCE_GETENV
+	return getenv(name);
 #else
-#  ifdef JEMALLOC_HAVE_ISSETUGID
+#  ifdef JEMALLOC_HAVE_SECURE_GETENV
+	return secure_getenv(name);
+#  else
+#    ifdef JEMALLOC_HAVE_ISSETUGID
 	if (issetugid() != 0) {
 		return NULL;
 	}
-#  endif
+#    endif
 	return getenv(name);
+#  endif
 #endif
 }
 
@@ -1045,7 +1049,7 @@ obtain_malloc_conf(unsigned which_source, char buf[PATH_MAX + 1]) {
 #endif
 		    ;
 
-		if ((ret = jemalloc_secure_getenv(envname)) != NULL) {
+		if ((ret = jemalloc_getenv(envname)) != NULL) {
 			/*
 			 * Do nothing; opts is already initialized to the value
 			 * of the MALLOC_CONF environment variable.

From 14ad8205bf0e23cdc1698f65c4d307753726a6a3 Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@fb.com>
Date: Thu, 3 Nov 2022 18:27:03 -0700
Subject: [PATCH 032/395] Update the ratio display in benchmark

In bench.h, specify the ratio as the time consumption ratio and
modify the display of the ratio.
---
 test/include/test/bench.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/include/test/bench.h b/test/include/test/bench.h
index 0397c948..c2f78a71 100644
--- a/test/include/test/bench.h
+++ b/test/include/test/bench.h
@@ -53,7 +53,7 @@ compare_funcs(uint64_t nwarmup, uint64_t niter, const char *name_a,
 
 	timer_ratio(&timer_a, &timer_b, ratio_buf, sizeof(ratio_buf));
 	malloc_printf("%"FMTu64" iterations, %s=%"FMTu64"us (%s ns/iter), "
-	    "%s=%"FMTu64"us (%s ns/iter), ratio=1:%s\n",
+	    "%s=%"FMTu64"us (%s ns/iter), time consumption ratio=%s:1\n",
 	    niter, name_a, usec_a, buf_a, name_b, usec_b, buf_b, ratio_buf);
 
 	dallocx(p, 0);

From 06374d2a6ad525be86e4381b4bb5010fedff3268 Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@fb.com>
Date: Thu, 3 Nov 2022 16:55:15 -0700
Subject: [PATCH 033/395] Benchmark operator delete

Added the microbenchmark for operator delete.
Also modified bench.h so that it can be used in C++.
---
 Makefile.in                          | 16 ++++--
 test/include/test/bench.h            | 10 ++--
 test/include/test/jemalloc_test.h.in |  3 +-
 test/stress/cpp/microbench.cpp       | 83 ++++++++++++++++++++++++++++
 4 files changed, 102 insertions(+), 10 deletions(-)
 create mode 100644 test/stress/cpp/microbench.cpp

diff --git a/Makefile.in b/Makefile.in
index a964f07e..195084d6 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -332,10 +332,15 @@ TESTS_STRESS := $(srcroot)test/stress/batch_alloc.c \
 	$(srcroot)test/stress/large_microbench.c \
 	$(srcroot)test/stress/mallctl.c \
 	$(srcroot)test/stress/microbench.c
+ifeq (@enable_cxx@, 1)
+TESTS_STRESS_CPP := $(srcroot)test/stress/cpp/microbench.cpp
+else
+TESTS_STRESS_CPP :=
+endif
 
 
 TESTS := $(TESTS_UNIT) $(TESTS_INTEGRATION) $(TESTS_INTEGRATION_CPP) \
-	$(TESTS_ANALYZE) $(TESTS_STRESS)
+	$(TESTS_ANALYZE) $(TESTS_STRESS) $(TESTS_STRESS_CPP)
 
 PRIVATE_NAMESPACE_HDRS := $(objroot)include/jemalloc/internal/private_namespace.h $(objroot)include/jemalloc/internal/private_namespace_jet.h
 PRIVATE_NAMESPACE_GEN_HDRS := $(PRIVATE_NAMESPACE_HDRS:%.h=%.gen.h)
@@ -362,9 +367,10 @@ TESTS_INTEGRATION_OBJS := $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%.$(O))
 TESTS_INTEGRATION_CPP_OBJS := $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%.$(O))
 TESTS_ANALYZE_OBJS := $(TESTS_ANALYZE:$(srcroot)%.c=$(objroot)%.$(O))
 TESTS_STRESS_OBJS := $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%.$(O))
+TESTS_STRESS_CPP_OBJS := $(TESTS_STRESS_CPP:$(srcroot)%.cpp=$(objroot)%.$(O))
 TESTS_OBJS := $(TESTS_UNIT_OBJS) $(TESTS_INTEGRATION_OBJS) $(TESTS_ANALYZE_OBJS) \
 	$(TESTS_STRESS_OBJS)
-TESTS_CPP_OBJS := $(TESTS_INTEGRATION_CPP_OBJS)
+TESTS_CPP_OBJS := $(TESTS_INTEGRATION_CPP_OBJS) $(TESTS_STRESS_CPP_OBJS)
 
 .PHONY: all dist build_doc_html build_doc_man build_doc
 .PHONY: install_bin install_include install_lib
@@ -454,6 +460,7 @@ $(TESTS_INTEGRATION_OBJS): CPPFLAGS += -DJEMALLOC_INTEGRATION_TEST
 $(TESTS_INTEGRATION_CPP_OBJS): CPPFLAGS += -DJEMALLOC_INTEGRATION_CPP_TEST
 $(TESTS_ANALYZE_OBJS): CPPFLAGS += -DJEMALLOC_ANALYZE_TEST
 $(TESTS_STRESS_OBJS): CPPFLAGS += -DJEMALLOC_STRESS_TEST
+$(TESTS_STRESS_CPP_OBJS): CPPFLAGS += -DJEMALLOC_STRESS_CPP_TEST
 $(TESTS_OBJS): $(objroot)test/%.$(O): $(srcroot)test/%.c
 $(TESTS_CPP_OBJS): $(objroot)test/%.$(O): $(srcroot)test/%.cpp
 $(TESTS_OBJS): CPPFLAGS += -I$(srcroot)test/include -I$(objroot)test/include
@@ -474,7 +481,7 @@ $(TESTS_OBJS) $(TESTS_CPP_OBJS): $(objroot)test/include/test/jemalloc_test.h
 endif
 
 $(C_OBJS) $(CPP_OBJS) $(C_PIC_OBJS) $(CPP_PIC_OBJS) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(TESTS_INTEGRATION_OBJS) $(TESTS_INTEGRATION_CPP_OBJS): $(objroot)include/jemalloc/internal/private_namespace.h
-$(C_JET_OBJS) $(C_TESTLIB_UNIT_OBJS) $(C_TESTLIB_ANALYZE_OBJS) $(C_TESTLIB_STRESS_OBJS) $(TESTS_UNIT_OBJS) $(TESTS_ANALYZE_OBJS) $(TESTS_STRESS_OBJS): $(objroot)include/jemalloc/internal/private_namespace_jet.h
+$(C_JET_OBJS) $(C_TESTLIB_UNIT_OBJS) $(C_TESTLIB_ANALYZE_OBJS) $(C_TESTLIB_STRESS_OBJS) $(TESTS_UNIT_OBJS) $(TESTS_ANALYZE_OBJS) $(TESTS_STRESS_OBJS) $(TESTS_STRESS_CPP_OBJS): $(objroot)include/jemalloc/internal/private_namespace_jet.h
 
 $(C_SYM_OBJS) $(C_OBJS) $(C_PIC_OBJS) $(C_JET_SYM_OBJS) $(C_JET_OBJS) $(C_TESTLIB_OBJS) $(TESTS_OBJS): %.$(O):
 	@mkdir -p $(@D)
@@ -664,7 +671,7 @@ endif
 tests_unit: $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%$(EXE))
 tests_integration: $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%$(EXE)) $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%$(EXE))
 tests_analyze: $(TESTS_ANALYZE:$(srcroot)%.c=$(objroot)%$(EXE))
-tests_stress: $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%$(EXE))
+tests_stress: $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%$(EXE)) $(TESTS_STRESS_CPP:$(srcroot)%.cpp=$(objroot)%$(EXE))
 tests: tests_unit tests_integration tests_analyze tests_stress
 
 check_unit_dir:
@@ -697,6 +704,7 @@ else
 endif
 stress: tests_stress stress_dir
 	$(SHELL) $(objroot)test/test.sh $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%)
+	$(SHELL) $(objroot)test/test.sh $(TESTS_STRESS_CPP:$(srcroot)%.cpp=$(objroot)%)
 check: check_unit check_integration check_integration_decay check_integration_prof
 
 clean:
diff --git a/test/include/test/bench.h b/test/include/test/bench.h
index c2f78a71..7421b4d2 100644
--- a/test/include/test/bench.h
+++ b/test/include/test/bench.h
@@ -23,7 +23,7 @@ fmt_nsecs(uint64_t usec, uint64_t iters, char *buf) {
 	uint64_t nsecs_per_iter1000 = nsec1000 / iters;
 	uint64_t intpart = nsecs_per_iter1000 / 1000;
 	uint64_t fracpart = nsecs_per_iter1000 % 1000;
-	malloc_snprintf(buf, FMT_NSECS_BUF_SIZE, "%"FMTu64".%03"FMTu64, intpart,
+	malloc_snprintf(buf, FMT_NSECS_BUF_SIZE, "%" FMTu64 ".%03" FMTu64, intpart,
 	    fracpart);
 }
 
@@ -40,8 +40,8 @@ compare_funcs(uint64_t nwarmup, uint64_t niter, const char *name_a,
 		return;
 	}
 
-	time_func(&timer_a, nwarmup, niter, func_a);
-	time_func(&timer_b, nwarmup, niter, func_b);
+	time_func(&timer_a, nwarmup, niter, (void (*)())func_a);
+	time_func(&timer_b, nwarmup, niter, (void (*)())func_b);
 
 	uint64_t usec_a = timer_usec(&timer_a);
 	char buf_a[FMT_NSECS_BUF_SIZE];
@@ -52,8 +52,8 @@ compare_funcs(uint64_t nwarmup, uint64_t niter, const char *name_a,
 	fmt_nsecs(usec_b, niter, buf_b);
 
 	timer_ratio(&timer_a, &timer_b, ratio_buf, sizeof(ratio_buf));
-	malloc_printf("%"FMTu64" iterations, %s=%"FMTu64"us (%s ns/iter), "
-	    "%s=%"FMTu64"us (%s ns/iter), time consumption ratio=%s:1\n",
+	malloc_printf("%" FMTu64 " iterations, %s=%" FMTu64 "us (%s ns/iter), "
+	    "%s=%" FMTu64 "us (%s ns/iter), time consumption ratio=%s:1\n",
 	    niter, name_a, usec_a, buf_a, name_b, usec_b, buf_b, ratio_buf);
 
 	dallocx(p, 0);
diff --git a/test/include/test/jemalloc_test.h.in b/test/include/test/jemalloc_test.h.in
index 3f8c0da7..600d993c 100644
--- a/test/include/test/jemalloc_test.h.in
+++ b/test/include/test/jemalloc_test.h.in
@@ -88,7 +88,8 @@ static const bool config_debug =
  * public jemalloc interfaces with jet_ prefixes, so that stress tests can use
  * a separate allocator for their internal data structures.
  */
-#elif defined(JEMALLOC_STRESS_TEST)
+#elif defined(JEMALLOC_STRESS_TEST) || \
+    defined(JEMALLOC_STRESS_CPP_TEST)
 #  include "jemalloc/jemalloc@install_suffix@.h"
 
 #  include "jemalloc/jemalloc_protos_jet.h"
diff --git a/test/stress/cpp/microbench.cpp b/test/stress/cpp/microbench.cpp
new file mode 100644
index 00000000..65f41dea
--- /dev/null
+++ b/test/stress/cpp/microbench.cpp
@@ -0,0 +1,83 @@
+#include "test/jemalloc_test.h"
+#include "test/bench.h"
+
+static void
+malloc_free(void) {
+	void *p = malloc(1);
+	expect_ptr_not_null(p, "Unexpected new failure");
+	free(p);
+}
+
+static void
+new_delete(void) {
+	auto p = ::operator new(1);
+	expect_ptr_not_null(p, "Unexpected new failure");
+	::operator delete(p);
+}
+
+static void
+malloc_free_array(void) {
+	void *p = malloc(sizeof(int)*8);
+	expect_ptr_not_null(p, "Unexpected new[] failure");
+	free(p);
+}
+
+static void
+new_delete_array(void) {
+	auto p = new int[8];
+	expect_ptr_not_null(p, "Unexpected new[] failure");
+	delete[] p;
+}
+
+#if __cpp_sized_deallocation >= 201309
+static void
+new_sized_delete(void) {
+	auto p = ::operator new(1);
+	expect_ptr_not_null(p, "Unexpected new failure");
+	::operator delete(p, 1);
+}
+
+static void
+malloc_sdallocx(void) {
+	void *p = malloc(1);
+	expect_ptr_not_null(p, "Unexpected new failure");
+        sdallocx(p, 1, 0);
+}
+#endif
+
+TEST_BEGIN(test_free_vs_delete) {
+	compare_funcs(10*1000*1000, 100*1000*1000, 
+	    "malloc_free", (void *)malloc_free, 
+	    "new_delete", (void *)new_delete);
+}
+TEST_END
+
+TEST_BEGIN(test_free_array_vs_delete_array) {
+	compare_funcs(10*1000*1000, 100*1000*1000, 
+	    "malloc_free_array", (void *)malloc_free_array, 
+	    "delete_array", (void *)new_delete_array);
+}
+TEST_END
+
+
+TEST_BEGIN(test_sized_delete_vs_sdallocx) {
+#if __cpp_sized_deallocation >= 201309
+	compare_funcs(10*1000*1000, 100*1000*1000, 
+	    "new_size_delete", (void *)new_sized_delete, 
+	    "malloc_sdallocx", (void *)malloc_sdallocx);
+#else
+	malloc_printf("Skipping test_sized_delete_vs_sdallocx since \
+	    sized deallocation is not enabled.\n");
+#endif
+}
+TEST_END
+
+
+int
+main() {
+	return test_no_reentrancy(
+	    test_free_vs_delete,
+	    test_free_array_vs_delete_array,
+	    test_sized_delete_vs_sdallocx);
+	    
+}

From e8f9f13811c16acb1ab8771fd2ffe4437e1b8620 Mon Sep 17 00:00:00 2001
From: Guangli Dai <gdai@fb.com>
Date: Mon, 19 Sep 2022 17:05:55 -0700
Subject: [PATCH 034/395] Inline free and sdallocx into operator delete

---
 .../internal/jemalloc_internal_externs.h      |   3 +-
 .../internal/jemalloc_internal_inlines_c.h    | 224 ++++++++++++++++++
 include/jemalloc/internal/prof_inlines.h      |   6 +-
 src/jemalloc.c                                | 211 +----------------
 src/jemalloc_cpp.cpp                          |  18 +-
 test/stress/cpp/microbench.cpp                |   7 +-
 6 files changed, 241 insertions(+), 228 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index 63b9bd2c..d90f6ddb 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -70,7 +70,8 @@ size_t batch_alloc(void **ptrs, size_t num, size_t size, int flags);
 void jemalloc_prefork(void);
 void jemalloc_postfork_parent(void);
 void jemalloc_postfork_child(void);
-void je_sdallocx_noflags(void *ptr, size_t size);
+void sdallocx_default(void *ptr, size_t size, int flags);
+void free_default(void *ptr);
 void *malloc_default(size_t size);
 
 #endif /* JEMALLOC_INTERNAL_EXTERNS_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index b0868b7d..719b8eea 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -7,6 +7,17 @@
 #include "jemalloc/internal/sz.h"
 #include "jemalloc/internal/thread_event.h"
 #include "jemalloc/internal/witness.h"
+#include "jemalloc/internal/arena_externs.h"
+#include "jemalloc/internal/emap.h"
+
+/*
+ * These correspond to the macros in jemalloc/jemalloc_macros.h.  Broadly, we
+ * should have one constant here per magic value there.  Note however that the
+ * representations need not be related.
+ */
+#define TCACHE_IND_NONE ((unsigned)-1)
+#define TCACHE_IND_AUTOMATIC ((unsigned)-2)
+#define ARENA_IND_AUTOMATIC ((unsigned)-1)
 
 /*
  * Translating the names of the 'i' functions:
@@ -337,4 +348,217 @@ imalloc_fastpath(size_t size, void *(fallback_alloc)(size_t)) {
 	return fallback_alloc(size);
 }
 
+JEMALLOC_ALWAYS_INLINE tcache_t *
+tcache_get_from_ind(tsd_t *tsd, unsigned tcache_ind, bool slow, bool is_alloc) {
+        tcache_t *tcache;
+        if (tcache_ind == TCACHE_IND_AUTOMATIC) {
+                if (likely(!slow)) {
+                        /* Getting tcache ptr unconditionally. */
+                        tcache = tsd_tcachep_get(tsd);
+                        assert(tcache == tcache_get(tsd));
+                } else if (is_alloc ||
+                    likely(tsd_reentrancy_level_get(tsd) == 0)) {
+                        tcache = tcache_get(tsd);
+                } else {
+                        tcache = NULL;
+                }
+        } else {
+                /*
+                 * Should not specify tcache on deallocation path when being
+                 * reentrant.
+                 */
+                assert(is_alloc || tsd_reentrancy_level_get(tsd) == 0 ||
+                    tsd_state_nocleanup(tsd));
+                if (tcache_ind == TCACHE_IND_NONE) {
+                        tcache = NULL;
+                } else {
+                        tcache = tcaches_get(tsd, tcache_ind);
+                }
+        }
+        return tcache;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+maybe_check_alloc_ctx(tsd_t *tsd, void *ptr, emap_alloc_ctx_t *alloc_ctx) {
+        if (config_opt_size_checks) {
+                emap_alloc_ctx_t dbg_ctx;
+                emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr,
+                    &dbg_ctx);
+                if (alloc_ctx->szind != dbg_ctx.szind) {
+                        safety_check_fail_sized_dealloc(
+                            /* current_dealloc */ true, ptr,
+                            /* true_size */ sz_size2index(dbg_ctx.szind),
+                            /* input_size */ sz_size2index(alloc_ctx->szind));
+                        return true;
+                }
+                if (alloc_ctx->slab != dbg_ctx.slab) {
+                        safety_check_fail(
+                            "Internal heap corruption detected: "
+                            "mismatch in slab bit");
+                        return true;
+                }
+        }
+        return false;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+prof_sample_aligned(const void *ptr) {
+        return ((uintptr_t)ptr & PAGE_MASK) == 0;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+free_fastpath_nonfast_aligned(void *ptr, bool check_prof) {
+        /*
+         * free_fastpath do not handle two uncommon cases: 1) sampled profiled
+         * objects and 2) sampled junk & stash for use-after-free detection.
+         * Both have special alignments which are used to escape the fastpath.
+         *
+         * prof_sample is page-aligned, which covers the UAF check when both
+         * are enabled (the assertion below).  Avoiding redundant checks since
+         * this is on the fastpath -- at most one runtime branch from this.
+         */
+        if (config_debug && cache_bin_nonfast_aligned(ptr)) {
+                assert(prof_sample_aligned(ptr));
+        }
+
+        if (config_prof && check_prof) {
+                /* When prof is enabled, the prof_sample alignment is enough. */
+                if (prof_sample_aligned(ptr)) {
+                        return true;
+                } else {
+                        return false;
+                }
+        }
+
+        if (config_uaf_detection) {
+                if (cache_bin_nonfast_aligned(ptr)) {
+                        return true;
+                } else {
+                        return false;
+                }
+        }
+
+        return false;
+}
+
+/* Returns whether or not the free attempt was successful. */
+JEMALLOC_ALWAYS_INLINE
+bool free_fastpath(void *ptr, size_t size, bool size_hint) {
+        tsd_t *tsd = tsd_get(false);
+        /* The branch gets optimized away unless tsd_get_allocates(). */
+        if (unlikely(tsd == NULL)) {
+                return false;
+        }
+        /*
+         *  The tsd_fast() / initialized checks are folded into the branch
+         *  testing (deallocated_after >= threshold) later in this function.
+         *  The threshold will be set to 0 when !tsd_fast.
+         */
+        assert(tsd_fast(tsd) ||
+            *tsd_thread_deallocated_next_event_fastp_get_unsafe(tsd) == 0);
+
+        emap_alloc_ctx_t alloc_ctx;
+        if (!size_hint) {
+                bool err = emap_alloc_ctx_try_lookup_fast(tsd,
+                    &arena_emap_global, ptr, &alloc_ctx);
+
+                /* Note: profiled objects will have alloc_ctx.slab set */
+                if (unlikely(err || !alloc_ctx.slab ||
+                    free_fastpath_nonfast_aligned(ptr,
+                    /* check_prof */ false))) {
+                        return false;
+                }
+                assert(alloc_ctx.szind != SC_NSIZES);
+        } else {
+                /*
+                 * Check for both sizes that are too large, and for sampled /
+                 * special aligned objects.  The alignment check will also check
+                 * for null ptr.
+                 */
+                if (unlikely(size > SC_LOOKUP_MAXCLASS ||
+                    free_fastpath_nonfast_aligned(ptr,
+                    /* check_prof */ true))) {
+                        return false;
+                }
+                alloc_ctx.szind = sz_size2index_lookup(size);
+                /* Max lookup class must be small. */
+                assert(alloc_ctx.szind < SC_NBINS);
+                /* This is a dead store, except when opt size checking is on. */
+                alloc_ctx.slab = true;
+        }
+        /*
+         * Currently the fastpath only handles small sizes.  The branch on
+         * SC_LOOKUP_MAXCLASS makes sure of it.  This lets us avoid checking
+         * tcache szind upper limit (i.e. tcache_maxclass) as well.
+         */
+        assert(alloc_ctx.slab);
+
+        uint64_t deallocated, threshold;
+        te_free_fastpath_ctx(tsd, &deallocated, &threshold);
+
+        size_t usize = sz_index2size(alloc_ctx.szind);
+        uint64_t deallocated_after = deallocated + usize;
+        /*
+         * Check for events and tsd non-nominal (fast_threshold will be set to
+         * 0) in a single branch.  Note that this handles the uninitialized case
+         * as well (TSD init will be triggered on the non-fastpath).  Therefore
+         * anything depends on a functional TSD (e.g. the alloc_ctx sanity check
+         * below) needs to be after this branch.
+         */
+        if (unlikely(deallocated_after >= threshold)) {
+                return false;
+        }
+        assert(tsd_fast(tsd));
+        bool fail = maybe_check_alloc_ctx(tsd, ptr, &alloc_ctx);
+        if (fail) {
+                /* See the comment in isfree. */
+                return true;
+        }
+
+        tcache_t *tcache = tcache_get_from_ind(tsd, TCACHE_IND_AUTOMATIC,
+            /* slow */ false, /* is_alloc */ false);
+        cache_bin_t *bin = &tcache->bins[alloc_ctx.szind];
+
+        /*
+         * If junking were enabled, this is where we would do it.  It's not
+         * though, since we ensured above that we're on the fast path.  Assert
+         * that to double-check.
+         */
+        assert(!opt_junk_free);
+
+        if (!cache_bin_dalloc_easy(bin, ptr)) {
+                return false;
+        }
+
+        *tsd_thread_deallocatedp_get(tsd) = deallocated_after;
+
+        return true;
+}
+
+JEMALLOC_ALWAYS_INLINE void JEMALLOC_NOTHROW
+je_sdallocx_noflags(void *ptr, size_t size) {
+        LOG("core.sdallocx.entry", "ptr: %p, size: %zu, flags: 0", ptr,
+                size);
+
+        if (!free_fastpath(ptr, size, true)) {
+                sdallocx_default(ptr, size, 0);
+        }
+
+        LOG("core.sdallocx.exit", "");
+}
+
+JEMALLOC_ALWAYS_INLINE void JEMALLOC_NOTHROW
+je_sdallocx_impl(void *ptr, size_t size, int flags) {
+        if (flags != 0 || !free_fastpath(ptr, size, true)) {
+                sdallocx_default(ptr, size, flags);
+        }
+}
+
+JEMALLOC_ALWAYS_INLINE void JEMALLOC_NOTHROW
+je_free_impl(void *ptr) {
+        if (!free_fastpath(ptr, 0, false)) {
+                free_default(ptr);
+        }
+}
+
 #endif /* JEMALLOC_INTERNAL_INLINES_C_H */
diff --git a/include/jemalloc/internal/prof_inlines.h b/include/jemalloc/internal/prof_inlines.h
index a8e7e7fb..7d9608b5 100644
--- a/include/jemalloc/internal/prof_inlines.h
+++ b/include/jemalloc/internal/prof_inlines.h
@@ -4,6 +4,7 @@
 #include "jemalloc/internal/safety_check.h"
 #include "jemalloc/internal/sz.h"
 #include "jemalloc/internal/thread_event.h"
+#include "jemalloc/internal/jemalloc_internal_inlines_c.h"
 
 JEMALLOC_ALWAYS_INLINE void
 prof_active_assert() {
@@ -227,11 +228,6 @@ prof_sample_align(size_t orig_align) {
 	    orig_align;
 }
 
-JEMALLOC_ALWAYS_INLINE bool
-prof_sample_aligned(const void *ptr) {
-	return ((uintptr_t)ptr & PAGE_MASK) == 0;
-}
-
 JEMALLOC_ALWAYS_INLINE bool
 prof_sampled(tsd_t *tsd, const void *ptr) {
 	prof_info_t prof_info;
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 039be40f..7407022f 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2273,15 +2273,6 @@ static_opts_init(static_opts_t *static_opts) {
 	static_opts->usize = false;
 }
 
-/*
- * These correspond to the macros in jemalloc/jemalloc_macros.h.  Broadly, we
- * should have one constant here per magic value there.  Note however that the
- * representations need not be related.
- */
-#define TCACHE_IND_NONE ((unsigned)-1)
-#define TCACHE_IND_AUTOMATIC ((unsigned)-2)
-#define ARENA_IND_AUTOMATIC ((unsigned)-1)
-
 typedef struct dynamic_opts_s dynamic_opts_t;
 struct dynamic_opts_s {
 	void **result;
@@ -2346,36 +2337,6 @@ zero_get(bool guarantee, bool slow) {
 	}
 }
 
-JEMALLOC_ALWAYS_INLINE tcache_t *
-tcache_get_from_ind(tsd_t *tsd, unsigned tcache_ind, bool slow, bool is_alloc) {
-	tcache_t *tcache;
-	if (tcache_ind == TCACHE_IND_AUTOMATIC) {
-		if (likely(!slow)) {
-			/* Getting tcache ptr unconditionally. */
-			tcache = tsd_tcachep_get(tsd);
-			assert(tcache == tcache_get(tsd));
-		} else if (is_alloc ||
-		    likely(tsd_reentrancy_level_get(tsd) == 0)) {
-			tcache = tcache_get(tsd);
-		} else {
-			tcache = NULL;
-		}
-	} else {
-		/*
-		 * Should not specify tcache on deallocation path when being
-		 * reentrant.
-		 */
-		assert(is_alloc || tsd_reentrancy_level_get(tsd) == 0 ||
-		    tsd_state_nocleanup(tsd));
-		if (tcache_ind == TCACHE_IND_NONE) {
-			tcache = NULL;
-		} else {
-			tcache = tcaches_get(tsd, tcache_ind);
-		}
-	}
-	return tcache;
-}
-
 /* Return true if a manual arena is specified and arena_get() OOMs. */
 JEMALLOC_ALWAYS_INLINE bool
 arena_get_from_ind(tsd_t *tsd, unsigned arena_ind, arena_t **arena_p) {
@@ -2915,29 +2876,6 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) {
 	thread_dalloc_event(tsd, usize);
 }
 
-JEMALLOC_ALWAYS_INLINE bool
-maybe_check_alloc_ctx(tsd_t *tsd, void *ptr, emap_alloc_ctx_t *alloc_ctx) {
-	if (config_opt_size_checks) {
-		emap_alloc_ctx_t dbg_ctx;
-		emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr,
-		    &dbg_ctx);
-		if (alloc_ctx->szind != dbg_ctx.szind) {
-			safety_check_fail_sized_dealloc(
-			    /* current_dealloc */ true, ptr,
-			    /* true_size */ sz_size2index(dbg_ctx.szind),
-			    /* input_size */ sz_size2index(alloc_ctx->szind));
-			return true;
-		}
-		if (alloc_ctx->slab != dbg_ctx.slab) {
-			safety_check_fail(
-			    "Internal heap corruption detected: "
-			    "mismatch in slab bit");
-			return true;
-		}
-	}
-	return false;
-}
-
 JEMALLOC_ALWAYS_INLINE void
 isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 	if (!slow_path) {
@@ -3044,142 +2982,11 @@ free_default(void *ptr) {
 	}
 }
 
-JEMALLOC_ALWAYS_INLINE bool
-free_fastpath_nonfast_aligned(void *ptr, bool check_prof) {
-	/*
-	 * free_fastpath do not handle two uncommon cases: 1) sampled profiled
-	 * objects and 2) sampled junk & stash for use-after-free detection.
-	 * Both have special alignments which are used to escape the fastpath.
-	 *
-	 * prof_sample is page-aligned, which covers the UAF check when both
-	 * are enabled (the assertion below).  Avoiding redundant checks since
-	 * this is on the fastpath -- at most one runtime branch from this.
-	 */
-	if (config_debug && cache_bin_nonfast_aligned(ptr)) {
-		assert(prof_sample_aligned(ptr));
-	}
-
-	if (config_prof && check_prof) {
-		/* When prof is enabled, the prof_sample alignment is enough. */
-		if (prof_sample_aligned(ptr)) {
-			return true;
-		} else {
-			return false;
-		}
-	}
-
-	if (config_uaf_detection) {
-		if (cache_bin_nonfast_aligned(ptr)) {
-			return true;
-		} else {
-			return false;
-		}
-	}
-
-	return false;
-}
-
-/* Returns whether or not the free attempt was successful. */
-JEMALLOC_ALWAYS_INLINE
-bool free_fastpath(void *ptr, size_t size, bool size_hint) {
-	tsd_t *tsd = tsd_get(false);
-	/* The branch gets optimized away unless tsd_get_allocates(). */
-	if (unlikely(tsd == NULL)) {
-		return false;
-	}
-	/*
-	 *  The tsd_fast() / initialized checks are folded into the branch
-	 *  testing (deallocated_after >= threshold) later in this function.
-	 *  The threshold will be set to 0 when !tsd_fast.
-	 */
-	assert(tsd_fast(tsd) ||
-	    *tsd_thread_deallocated_next_event_fastp_get_unsafe(tsd) == 0);
-
-	emap_alloc_ctx_t alloc_ctx;
-	if (!size_hint) {
-		bool err = emap_alloc_ctx_try_lookup_fast(tsd,
-		    &arena_emap_global, ptr, &alloc_ctx);
-
-		/* Note: profiled objects will have alloc_ctx.slab set */
-		if (unlikely(err || !alloc_ctx.slab ||
-		    free_fastpath_nonfast_aligned(ptr,
-		    /* check_prof */ false))) {
-			return false;
-		}
-		assert(alloc_ctx.szind != SC_NSIZES);
-	} else {
-		/*
-		 * Check for both sizes that are too large, and for sampled /
-		 * special aligned objects.  The alignment check will also check
-		 * for null ptr.
-		 */
-		if (unlikely(size > SC_LOOKUP_MAXCLASS ||
-		    free_fastpath_nonfast_aligned(ptr,
-		    /* check_prof */ true))) {
-			return false;
-		}
-		alloc_ctx.szind = sz_size2index_lookup(size);
-		/* Max lookup class must be small. */
-		assert(alloc_ctx.szind < SC_NBINS);
-		/* This is a dead store, except when opt size checking is on. */
-		alloc_ctx.slab = true;
-	}
-	/*
-	 * Currently the fastpath only handles small sizes.  The branch on
-	 * SC_LOOKUP_MAXCLASS makes sure of it.  This lets us avoid checking
-	 * tcache szind upper limit (i.e. tcache_maxclass) as well.
-	 */
-	assert(alloc_ctx.slab);
-
-	uint64_t deallocated, threshold;
-	te_free_fastpath_ctx(tsd, &deallocated, &threshold);
-
-	size_t usize = sz_index2size(alloc_ctx.szind);
-	uint64_t deallocated_after = deallocated + usize;
-	/*
-	 * Check for events and tsd non-nominal (fast_threshold will be set to
-	 * 0) in a single branch.  Note that this handles the uninitialized case
-	 * as well (TSD init will be triggered on the non-fastpath).  Therefore
-	 * anything depends on a functional TSD (e.g. the alloc_ctx sanity check
-	 * below) needs to be after this branch.
-	 */
-	if (unlikely(deallocated_after >= threshold)) {
-		return false;
-	}
-	assert(tsd_fast(tsd));
-	bool fail = maybe_check_alloc_ctx(tsd, ptr, &alloc_ctx);
-	if (fail) {
-		/* See the comment in isfree. */
-		return true;
-	}
-
-	tcache_t *tcache = tcache_get_from_ind(tsd, TCACHE_IND_AUTOMATIC,
-	    /* slow */ false, /* is_alloc */ false);
-	cache_bin_t *bin = &tcache->bins[alloc_ctx.szind];
-
-	/*
-	 * If junking were enabled, this is where we would do it.  It's not
-	 * though, since we ensured above that we're on the fast path.  Assert
-	 * that to double-check.
-	 */
-	assert(!opt_junk_free);
-
-	if (!cache_bin_dalloc_easy(bin, ptr)) {
-		return false;
-	}
-
-	*tsd_thread_deallocatedp_get(tsd) = deallocated_after;
-
-	return true;
-}
-
 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
 je_free(void *ptr) {
 	LOG("core.free.entry", "ptr: %p", ptr);
 
-	if (!free_fastpath(ptr, 0, false)) {
-		free_default(ptr);
-	}
+	je_free_impl(ptr);
 
 	LOG("core.free.exit", "");
 }
@@ -4000,21 +3807,7 @@ je_sdallocx(void *ptr, size_t size, int flags) {
 	LOG("core.sdallocx.entry", "ptr: %p, size: %zu, flags: %d", ptr,
 		size, flags);
 
-	if (flags != 0 || !free_fastpath(ptr, size, true)) {
-		sdallocx_default(ptr, size, flags);
-	}
-
-	LOG("core.sdallocx.exit", "");
-}
-
-void JEMALLOC_NOTHROW
-je_sdallocx_noflags(void *ptr, size_t size) {
-	LOG("core.sdallocx.entry", "ptr: %p, size: %zu, flags: 0", ptr,
-		size);
-
-	if (!free_fastpath(ptr, size, true)) {
-		sdallocx_default(ptr, size, 0);
-	}
+	je_sdallocx_impl(ptr, size, flags);
 
 	LOG("core.sdallocx.exit", "");
 }
diff --git a/src/jemalloc_cpp.cpp b/src/jemalloc_cpp.cpp
index 8b53a392..e39615bc 100644
--- a/src/jemalloc_cpp.cpp
+++ b/src/jemalloc_cpp.cpp
@@ -173,21 +173,21 @@ operator new[](std::size_t size, std::align_val_t alignment, const std::nothrow_
 
 void
 operator delete(void *ptr) noexcept {
-	je_free(ptr);
+	je_free_impl(ptr);
 }
 
 void
 operator delete[](void *ptr) noexcept {
-	je_free(ptr);
+	je_free_impl(ptr);
 }
 
 void
 operator delete(void *ptr, const std::nothrow_t &) noexcept {
-	je_free(ptr);
+	je_free_impl(ptr);
 }
 
 void operator delete[](void *ptr, const std::nothrow_t &) noexcept {
-	je_free(ptr);
+	je_free_impl(ptr);
 }
 
 #if __cpp_sized_deallocation >= 201309
@@ -224,27 +224,27 @@ alignedSizedDeleteImpl(void* ptr, std::size_t size, std::align_val_t alignment)
 	if (unlikely(ptr == nullptr)) {
 		return;
 	}
-	je_sdallocx(ptr, size, MALLOCX_ALIGN(alignment));
+	je_sdallocx_impl(ptr, size, MALLOCX_ALIGN(alignment));
 }
 
 void
 operator delete(void* ptr, std::align_val_t) noexcept {
-	je_free(ptr);
+	je_free_impl(ptr);
 }
 
 void
 operator delete[](void* ptr, std::align_val_t) noexcept {
-	je_free(ptr);
+	je_free_impl(ptr);
 }
 
 void
 operator delete(void* ptr, std::align_val_t, const std::nothrow_t&) noexcept {
-	je_free(ptr);
+	je_free_impl(ptr);
 }
 
 void
 operator delete[](void* ptr, std::align_val_t, const std::nothrow_t&) noexcept {
-	je_free(ptr);
+	je_free_impl(ptr);
 }
 
 void
diff --git a/test/stress/cpp/microbench.cpp b/test/stress/cpp/microbench.cpp
index 65f41dea..3d23403b 100644
--- a/test/stress/cpp/microbench.cpp
+++ b/test/stress/cpp/microbench.cpp
@@ -4,7 +4,7 @@
 static void
 malloc_free(void) {
 	void *p = malloc(1);
-	expect_ptr_not_null(p, "Unexpected new failure");
+	expect_ptr_not_null(p, "Unexpected malloc failure");
 	free(p);
 }
 
@@ -18,7 +18,7 @@ new_delete(void) {
 static void
 malloc_free_array(void) {
 	void *p = malloc(sizeof(int)*8);
-	expect_ptr_not_null(p, "Unexpected new[] failure");
+	expect_ptr_not_null(p, "Unexpected malloc failure");
 	free(p);
 }
 
@@ -40,7 +40,7 @@ new_sized_delete(void) {
 static void
 malloc_sdallocx(void) {
 	void *p = malloc(1);
-	expect_ptr_not_null(p, "Unexpected new failure");
+	expect_ptr_not_null(p, "Unexpected malloc failure");
         sdallocx(p, 1, 0);
 }
 #endif
@@ -79,5 +79,4 @@ main() {
 	    test_free_vs_delete,
 	    test_free_array_vs_delete_array,
 	    test_sized_delete_vs_sdallocx);
-	    
 }

From a74acb57e87e2c3ad4386f757f4d792d9aa6e19a Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@fb.com>
Date: Thu, 1 Dec 2022 17:31:08 -0800
Subject: [PATCH 035/395] Fix dividing 0 error in stress/cpp/microbench

Summary:
Per issue #2356, some CXX compilers may optimize away the
new/delete operation in stress/cpp/microbench.cpp.
Thus, this commit (1) bumps the time interval to 1 if it is 0, and
(2) modifies the pointers in the microbench to volatile.
---
 test/src/timer.c               | 11 +++++++++++
 test/stress/cpp/microbench.cpp | 36 +++++++++++++++++-----------------
 2 files changed, 29 insertions(+), 18 deletions(-)

diff --git a/test/src/timer.c b/test/src/timer.c
index 6e8b8edb..0f39d5f6 100644
--- a/test/src/timer.c
+++ b/test/src/timer.c
@@ -28,6 +28,17 @@ timer_ratio(timedelta_t *a, timedelta_t *b, char *buf, size_t buflen) {
 	size_t i = 0;
 	size_t j, n;
 
+	/* 
+ 	* The time difference could be 0 if the two clock readings are 
+ 	* identical, either due to the operations being measured in the middle
+ 	* took very little time (or even got optimized away), or the clock 
+ 	* readings are bad / very coarse grained clock.
+ 	* Thus, bump t1 if it is 0 to avoid dividing 0. 
+ 	*/
+	if (t1 == 0) {
+	    t1 = 1;
+	}
+
 	/* Whole. */
 	n = malloc_snprintf(&buf[i], buflen-i, "%"FMTu64, t0 / t1);
 	i += n;
diff --git a/test/stress/cpp/microbench.cpp b/test/stress/cpp/microbench.cpp
index 3d23403b..ab41b65d 100644
--- a/test/stress/cpp/microbench.cpp
+++ b/test/stress/cpp/microbench.cpp
@@ -3,45 +3,45 @@
 
 static void
 malloc_free(void) {
-	void *p = malloc(1);
-	expect_ptr_not_null(p, "Unexpected malloc failure");
-	free(p);
+	void* volatile p = malloc(1);
+	expect_ptr_not_null((void *)p, "Unexpected malloc failure");
+	free((void *)p);
 }
 
 static void
 new_delete(void) {
-	auto p = ::operator new(1);
-	expect_ptr_not_null(p, "Unexpected new failure");
-	::operator delete(p);
+	void* volatile p = ::operator new(1);
+	expect_ptr_not_null((void *)p, "Unexpected new failure");
+	::operator delete((void *)p);
 }
 
 static void
 malloc_free_array(void) {
-	void *p = malloc(sizeof(int)*8);
-	expect_ptr_not_null(p, "Unexpected malloc failure");
-	free(p);
+	void* volatile p = malloc(sizeof(int)*8);
+	expect_ptr_not_null((void *)p, "Unexpected malloc failure");
+	free((void *)p);
 }
 
 static void
 new_delete_array(void) {
-	auto p = new int[8];
-	expect_ptr_not_null(p, "Unexpected new[] failure");
-	delete[] p;
+	int* volatile p = new int[8];
+	expect_ptr_not_null((int *)p, "Unexpected new[] failure");
+	delete[] (int *)p;
 }
 
 #if __cpp_sized_deallocation >= 201309
 static void
 new_sized_delete(void) {
-	auto p = ::operator new(1);
-	expect_ptr_not_null(p, "Unexpected new failure");
-	::operator delete(p, 1);
+	void* volatile p = ::operator new(1);
+	expect_ptr_not_null((void *)p, "Unexpected new failure");
+	::operator delete((void *)p, 1);
 }
 
 static void
 malloc_sdallocx(void) {
-	void *p = malloc(1);
-	expect_ptr_not_null(p, "Unexpected malloc failure");
-        sdallocx(p, 1, 0);
+	void* volatile p = malloc(1);
+	expect_ptr_not_null((void *)p, "Unexpected malloc failure");
+        sdallocx((void *)p, 1, 0);
 }
 #endif
 

From 8580c65f81c5252e493da656a448ec3a8571dab7 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 2 Nov 2022 15:17:16 -0700
Subject: [PATCH 036/395] Implement prof sample hooks
 "experimental.hooks.prof_sample(_free)".

The added hooks hooks.prof_sample and hooks.prof_sample_free are intended to
allow advanced users to track additional information, to enable new ways of
profiling on top of the jemalloc heap profile and sample features.

The sample hook is invoked after the allocation and backtracing, and forwards
the both the allocation and backtrace to the user hook; the sample_free hook
happens before the actual deallocation, and forwards only the ptr and usz to the
hook.
---
 include/jemalloc/internal/prof_externs.h |   9 +-
 include/jemalloc/internal/prof_hook.h    |   6 +
 include/jemalloc/internal/prof_inlines.h |   5 +-
 src/ctl.c                                |  60 +++++++
 src/prof.c                               |  50 +++++-
 src/prof_sys.c                           |   2 +
 test/unit/prof_hook.c                    | 194 +++++++++++++++++++++--
 7 files changed, 307 insertions(+), 19 deletions(-)

diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index d1101561..412378a2 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -56,6 +56,12 @@ prof_backtrace_hook_t prof_backtrace_hook_get();
 void prof_dump_hook_set(prof_dump_hook_t hook);
 prof_dump_hook_t prof_dump_hook_get();
 
+void prof_sample_hook_set(prof_sample_hook_t hook);
+prof_sample_hook_t prof_sample_hook_get();
+
+void prof_sample_free_hook_set(prof_sample_free_hook_t hook);
+prof_sample_free_hook_t prof_sample_free_hook_get();
+
 /* Functions only accessed in prof_inlines.h */
 prof_tdata_t *prof_tdata_init(tsd_t *tsd);
 prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
@@ -63,7 +69,8 @@ prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
 void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx);
 void prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
     size_t usize, prof_tctx_t *tctx);
-void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_info_t *prof_info);
+void prof_free_sampled_object(tsd_t *tsd, const void *ptr, size_t usize,
+    prof_info_t *prof_info);
 prof_tctx_t *prof_tctx_create(tsd_t *tsd);
 void prof_idump(tsdn_t *tsdn);
 bool prof_mdump(tsd_t *tsd, const char *filename);
diff --git a/include/jemalloc/internal/prof_hook.h b/include/jemalloc/internal/prof_hook.h
index 150d19d3..8615dc53 100644
--- a/include/jemalloc/internal/prof_hook.h
+++ b/include/jemalloc/internal/prof_hook.h
@@ -18,4 +18,10 @@ typedef void (*prof_backtrace_hook_t)(void **, unsigned *, unsigned);
  */
 typedef void (*prof_dump_hook_t)(const char *filename);
 
+/* ptr, size, backtrace vector, backtrace vector length */
+typedef void (*prof_sample_hook_t)(const void *, size_t, void **, unsigned);
+
+/* ptr, size */
+typedef void (*prof_sample_free_hook_t)(const void *, size_t);
+
 #endif /* JEMALLOC_INTERNAL_PROF_HOOK_H */
diff --git a/include/jemalloc/internal/prof_inlines.h b/include/jemalloc/internal/prof_inlines.h
index 7d9608b5..ab3e01f6 100644
--- a/include/jemalloc/internal/prof_inlines.h
+++ b/include/jemalloc/internal/prof_inlines.h
@@ -213,7 +213,8 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t size, size_t usize,
 	 * counters.
 	 */
 	if (unlikely(old_sampled)) {
-		prof_free_sampled_object(tsd, old_usize, old_prof_info);
+		prof_free_sampled_object(tsd, old_ptr, old_usize,
+		    old_prof_info);
 	}
 }
 
@@ -250,7 +251,7 @@ prof_free(tsd_t *tsd, const void *ptr, size_t usize,
 
 	if (unlikely((uintptr_t)prof_info.alloc_tctx > (uintptr_t)1U)) {
 		assert(prof_sample_aligned(ptr));
-		prof_free_sampled_object(tsd, usize, &prof_info);
+		prof_free_sampled_object(tsd, ptr, usize, &prof_info);
 	}
 }
 
diff --git a/src/ctl.c b/src/ctl.c
index acf5d366..eafbdc61 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -315,6 +315,8 @@ CTL_PROTO(experimental_hooks_install)
 CTL_PROTO(experimental_hooks_remove)
 CTL_PROTO(experimental_hooks_prof_backtrace)
 CTL_PROTO(experimental_hooks_prof_dump)
+CTL_PROTO(experimental_hooks_prof_sample)
+CTL_PROTO(experimental_hooks_prof_sample_free)
 CTL_PROTO(experimental_hooks_safety_check_abort)
 CTL_PROTO(experimental_thread_activity_callback)
 CTL_PROTO(experimental_utilization_query)
@@ -858,6 +860,8 @@ static const ctl_named_node_t experimental_hooks_node[] = {
 	{NAME("remove"),	CTL(experimental_hooks_remove)},
 	{NAME("prof_backtrace"),	CTL(experimental_hooks_prof_backtrace)},
 	{NAME("prof_dump"),	CTL(experimental_hooks_prof_dump)},
+	{NAME("prof_sample"),	CTL(experimental_hooks_prof_sample)},
+	{NAME("prof_sample_free"),	CTL(experimental_hooks_prof_sample_free)},
 	{NAME("safety_check_abort"),	CTL(experimental_hooks_safety_check_abort)},
 };
 
@@ -3505,6 +3509,62 @@ label_return:
 	return ret;
 }
 
+static int
+experimental_hooks_prof_sample_ctl(tsd_t *tsd, const size_t *mib,
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+
+	if (oldp == NULL && newp == NULL) {
+		ret = EINVAL;
+		goto label_return;
+	}
+	if (oldp != NULL) {
+		prof_sample_hook_t old_hook =
+		    prof_sample_hook_get();
+		READ(old_hook, prof_sample_hook_t);
+	}
+	if (newp != NULL) {
+		if (!opt_prof) {
+			ret = ENOENT;
+			goto label_return;
+		}
+		prof_sample_hook_t new_hook JEMALLOC_CC_SILENCE_INIT(NULL);
+		WRITE(new_hook, prof_sample_hook_t);
+		prof_sample_hook_set(new_hook);
+	}
+	ret = 0;
+label_return:
+	return ret;
+}
+
+static int
+experimental_hooks_prof_sample_free_ctl(tsd_t *tsd, const size_t *mib,
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+
+	if (oldp == NULL && newp == NULL) {
+		ret = EINVAL;
+		goto label_return;
+	}
+	if (oldp != NULL) {
+		prof_sample_free_hook_t old_hook =
+		    prof_sample_free_hook_get();
+		READ(old_hook, prof_sample_free_hook_t);
+	}
+	if (newp != NULL) {
+		if (!opt_prof) {
+			ret = ENOENT;
+			goto label_return;
+		}
+		prof_sample_free_hook_t new_hook JEMALLOC_CC_SILENCE_INIT(NULL);
+		WRITE(new_hook, prof_sample_free_hook_t);
+		prof_sample_free_hook_set(new_hook);
+	}
+	ret = 0;
+label_return:
+	return ret;
+}
+
 /* For integration test purpose only.  No plan to move out of experimental. */
 static int
 experimental_hooks_safety_check_abort_ctl(tsd_t *tsd, const size_t *mib,
diff --git a/src/prof.c b/src/prof.c
index 3deac0b5..91425371 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -78,6 +78,12 @@ atomic_p_t prof_backtrace_hook;
 /* Logically a prof_dump_hook_t. */
 atomic_p_t prof_dump_hook;
 
+/* Logically a prof_sample_hook_t. */
+atomic_p_t prof_sample_hook;
+
+/* Logically a prof_sample_free_hook_t. */
+atomic_p_t prof_sample_free_hook;
+
 /******************************************************************************/
 
 void
@@ -145,10 +151,20 @@ prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
 	if (opt_prof_stats) {
 		prof_stats_inc(tsd, szind, size);
 	}
+
+	/* Sample hook. */
+	prof_sample_hook_t prof_sample_hook = prof_sample_hook_get();
+	if (prof_sample_hook != NULL) {
+		prof_bt_t *bt = &tctx->gctx->bt;
+		pre_reentrancy(tsd, NULL);
+		prof_sample_hook(ptr, size, bt->vec, bt->len);
+		post_reentrancy(tsd);
+	}
 }
 
 void
-prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_info_t *prof_info) {
+prof_free_sampled_object(tsd_t *tsd, const void *ptr, size_t usize,
+    prof_info_t *prof_info) {
 	cassert(config_prof);
 
 	assert(prof_info != NULL);
@@ -156,6 +172,16 @@ prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_info_t *prof_info) {
 	assert((uintptr_t)tctx > (uintptr_t)1U);
 
 	szind_t szind = sz_size2index(usize);
+
+	/* Unsample hook. */
+	prof_sample_free_hook_t prof_sample_free_hook =
+	    prof_sample_free_hook_get();
+	if (prof_sample_free_hook != NULL) {
+		pre_reentrancy(tsd, NULL);
+		prof_sample_free_hook(ptr, usize);
+		post_reentrancy(tsd);
+	}
+
 	malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
 
 	assert(tctx->cnts.curobjs > 0);
@@ -549,6 +575,28 @@ prof_dump_hook_get() {
 	    ATOMIC_ACQUIRE);
 }
 
+void
+prof_sample_hook_set(prof_sample_hook_t hook) {
+	atomic_store_p(&prof_sample_hook, hook, ATOMIC_RELEASE);
+}
+
+prof_sample_hook_t
+prof_sample_hook_get() {
+	return (prof_sample_hook_t)atomic_load_p(&prof_sample_hook,
+	    ATOMIC_ACQUIRE);
+}
+
+void
+prof_sample_free_hook_set(prof_sample_free_hook_t hook) {
+	atomic_store_p(&prof_sample_free_hook, hook, ATOMIC_RELEASE);
+}
+
+prof_sample_free_hook_t
+prof_sample_free_hook_get() {
+	return (prof_sample_free_hook_t)atomic_load_p(&prof_sample_free_hook,
+	    ATOMIC_ACQUIRE);
+}
+
 void
 prof_boot0(void) {
 	cassert(config_prof);
diff --git a/src/prof_sys.c b/src/prof_sys.c
index 99fa3a77..d2487fd6 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -431,6 +431,8 @@ void
 prof_hooks_init() {
 	prof_backtrace_hook_set(&prof_backtrace_impl);
 	prof_dump_hook_set(NULL);
+	prof_sample_hook_set(NULL);
+	prof_sample_free_hook_set(NULL);
 }
 
 void
diff --git a/test/unit/prof_hook.c b/test/unit/prof_hook.c
index fc06d84e..a48b237b 100644
--- a/test/unit/prof_hook.c
+++ b/test/unit/prof_hook.c
@@ -1,11 +1,23 @@
 #include "test/jemalloc_test.h"
 
+/*
+ * The MALLOC_CONF of this test has lg_prof_sample:0, meaning that every single
+ * allocation will be sampled (and trigger relevant hooks).
+ */
+
 const char *dump_filename = "/dev/null";
 
-prof_backtrace_hook_t default_hook;
+prof_backtrace_hook_t default_bt_hook;
 
 bool mock_bt_hook_called = false;
 bool mock_dump_hook_called = false;
+bool mock_prof_sample_hook_called = false;
+bool mock_prof_sample_free_hook_called = false;
+
+void *sampled_ptr = NULL;
+size_t sampled_ptr_sz = 0;
+void *free_sampled_ptr = NULL;
+size_t free_sampled_ptr_sz = 0;
 
 void
 mock_bt_hook(void **vec, unsigned *len, unsigned max_len) {
@@ -18,7 +30,7 @@ mock_bt_hook(void **vec, unsigned *len, unsigned max_len) {
 
 void
 mock_bt_augmenting_hook(void **vec, unsigned *len, unsigned max_len) {
-	default_hook(vec, len, max_len);
+	default_bt_hook(vec, len, max_len);
 	expect_u_gt(*len, 0, "Default backtrace hook returned empty backtrace");
 	expect_u_lt(*len, max_len,
 	    "Default backtrace hook returned too large backtrace");
@@ -47,6 +59,24 @@ mock_dump_hook(const char *filename) {
 	    "Incorrect file name passed to the dump hook");
 }
 
+void
+mock_prof_sample_hook(const void *ptr, size_t sz, void **vec, unsigned len) {
+	mock_prof_sample_hook_called = true;
+	sampled_ptr = (void *)ptr;
+	sampled_ptr_sz = sz;
+	for (unsigned i = 0; i < len; i++) {
+		expect_ptr_not_null((void **)vec[i],
+		    "Backtrace should not contain NULL");
+	}
+}
+
+void
+mock_prof_sample_free_hook(const void *ptr, size_t sz) {
+	mock_prof_sample_free_hook_called = true;
+	free_sampled_ptr = (void *)ptr;
+	free_sampled_ptr_sz = sz;
+}
+
 TEST_BEGIN(test_prof_backtrace_hook_replace) {
 
 	test_skip_if(!config_prof);
@@ -63,10 +93,10 @@ TEST_BEGIN(test_prof_backtrace_hook_replace) {
 	    NULL, 0, (void *)&null_hook,  sizeof(null_hook)),
 		EINVAL, "Incorrectly allowed NULL backtrace hook");
 
-	size_t default_hook_sz = sizeof(prof_backtrace_hook_t);
+	size_t default_bt_hook_sz = sizeof(prof_backtrace_hook_t);
 	prof_backtrace_hook_t hook = &mock_bt_hook;
 	expect_d_eq(mallctl("experimental.hooks.prof_backtrace",
-	    (void *)&default_hook, &default_hook_sz, (void *)&hook,
+	    (void *)&default_bt_hook, &default_bt_hook_sz, (void *)&hook,
 	    sizeof(hook)), 0, "Unexpected mallctl failure setting hook");
 
 	void *p1 = mallocx(1, 0);
@@ -77,8 +107,8 @@ TEST_BEGIN(test_prof_backtrace_hook_replace) {
 	prof_backtrace_hook_t current_hook;
 	size_t current_hook_sz = sizeof(prof_backtrace_hook_t);
 	expect_d_eq(mallctl("experimental.hooks.prof_backtrace",
-	    (void *)&current_hook, &current_hook_sz, (void *)&default_hook,
-	    sizeof(default_hook)), 0,
+	    (void *)&current_hook, &current_hook_sz, (void *)&default_bt_hook,
+	    sizeof(default_bt_hook)), 0,
 	    "Unexpected mallctl failure resetting hook to default");
 
 	expect_ptr_eq(current_hook, hook,
@@ -100,10 +130,10 @@ TEST_BEGIN(test_prof_backtrace_hook_augment) {
 
 	expect_false(mock_bt_hook_called, "Called mock hook before it's set");
 
-	size_t default_hook_sz = sizeof(prof_backtrace_hook_t);
+	size_t default_bt_hook_sz = sizeof(prof_backtrace_hook_t);
 	prof_backtrace_hook_t hook = &mock_bt_augmenting_hook;
 	expect_d_eq(mallctl("experimental.hooks.prof_backtrace",
-	    (void *)&default_hook, &default_hook_sz, (void *)&hook,
+	    (void *)&default_bt_hook, &default_bt_hook_sz, (void *)&hook,
 	    sizeof(hook)), 0, "Unexpected mallctl failure setting hook");
 
 	void *p1 = mallocx(1, 0);
@@ -114,8 +144,8 @@ TEST_BEGIN(test_prof_backtrace_hook_augment) {
 	prof_backtrace_hook_t current_hook;
 	size_t current_hook_sz = sizeof(prof_backtrace_hook_t);
 	expect_d_eq(mallctl("experimental.hooks.prof_backtrace",
-	    (void *)&current_hook, &current_hook_sz, (void *)&default_hook,
-	    sizeof(default_hook)), 0,
+	    (void *)&current_hook, &current_hook_sz, (void *)&default_bt_hook,
+	    sizeof(default_bt_hook)), 0,
 	    "Unexpected mallctl failure resetting hook to default");
 
 	expect_ptr_eq(current_hook, hook,
@@ -138,10 +168,10 @@ TEST_BEGIN(test_prof_dump_hook) {
 
 	expect_false(mock_dump_hook_called, "Called dump hook before it's set");
 
-	size_t default_hook_sz = sizeof(prof_dump_hook_t);
+	size_t default_bt_hook_sz = sizeof(prof_dump_hook_t);
 	prof_dump_hook_t hook = &mock_dump_hook;
 	expect_d_eq(mallctl("experimental.hooks.prof_dump",
-	    (void *)&default_hook, &default_hook_sz, (void *)&hook,
+	    (void *)&default_bt_hook, &default_bt_hook_sz, (void *)&hook,
 	    sizeof(hook)), 0, "Unexpected mallctl failure setting hook");
 
 	expect_d_eq(mallctl("prof.dump", NULL, NULL, (void *)&dump_filename,
@@ -152,8 +182,8 @@ TEST_BEGIN(test_prof_dump_hook) {
 	prof_dump_hook_t current_hook;
 	size_t current_hook_sz = sizeof(prof_dump_hook_t);
 	expect_d_eq(mallctl("experimental.hooks.prof_dump",
-	    (void *)&current_hook, &current_hook_sz, (void *)&default_hook,
-	    sizeof(default_hook)), 0,
+	    (void *)&current_hook, &current_hook_sz, (void *)&default_bt_hook,
+	    sizeof(default_bt_hook)), 0,
 	    "Unexpected mallctl failure resetting hook to default");
 
 	expect_ptr_eq(current_hook, hook,
@@ -161,10 +191,144 @@ TEST_BEGIN(test_prof_dump_hook) {
 }
 TEST_END
 
+/* Need the do_write flag because NULL is a valid to_write value. */
+static void
+read_write_prof_sample_hook(prof_sample_hook_t *to_read, bool do_write,
+    prof_sample_hook_t to_write) {
+	size_t hook_sz = sizeof(prof_sample_hook_t);
+	expect_d_eq(mallctl("experimental.hooks.prof_sample",
+	    (void *)to_read, &hook_sz, do_write ? &to_write : NULL, hook_sz), 0,
+	    "Unexpected prof_sample_hook mallctl failure");
+}
+
+static void
+write_prof_sample_hook(prof_sample_hook_t new_hook) {
+	read_write_prof_sample_hook(NULL, true, new_hook);
+}
+
+static prof_sample_hook_t
+read_prof_sample_hook(void) {
+	prof_sample_hook_t curr_hook;
+	read_write_prof_sample_hook(&curr_hook, false, NULL);
+
+	return curr_hook;
+}
+
+static void
+read_write_prof_sample_free_hook(prof_sample_free_hook_t *to_read,
+    bool do_write, prof_sample_free_hook_t to_write) {
+	size_t hook_sz = sizeof(prof_sample_free_hook_t);
+	expect_d_eq(mallctl("experimental.hooks.prof_sample_free",
+	    (void *)to_read, &hook_sz, do_write ? &to_write : NULL, hook_sz), 0,
+	    "Unexpected prof_sample_free_hook mallctl failure");
+}
+
+static void
+write_prof_sample_free_hook(prof_sample_free_hook_t new_hook) {
+	read_write_prof_sample_free_hook(NULL, true, new_hook);
+}
+
+static prof_sample_free_hook_t
+read_prof_sample_free_hook(void) {
+	prof_sample_free_hook_t curr_hook;
+	read_write_prof_sample_free_hook(&curr_hook, false, NULL);
+
+	return curr_hook;
+}
+
+static void
+check_prof_sample_hooks(bool sample_hook_set, bool sample_free_hook_set) {
+	expect_false(mock_prof_sample_hook_called,
+	    "Should not have called prof_sample hook");
+	expect_false(mock_prof_sample_free_hook_called,
+	    "Should not have called prof_sample_free hook");
+	expect_ptr_null(sampled_ptr, "Unexpected sampled ptr");
+	expect_zu_eq(sampled_ptr_sz, 0, "Unexpected sampled ptr size");
+	expect_ptr_null(free_sampled_ptr, "Unexpected free sampled ptr");
+	expect_zu_eq(free_sampled_ptr_sz, 0,
+	    "Unexpected free sampled ptr size");
+
+	prof_sample_hook_t curr_hook = read_prof_sample_hook();
+	expect_ptr_eq(curr_hook, sample_hook_set ? mock_prof_sample_hook : NULL,
+	    "Unexpected non NULL default hook");
+
+	prof_sample_free_hook_t curr_free_hook = read_prof_sample_free_hook();
+	expect_ptr_eq(curr_free_hook, sample_free_hook_set ?
+	    mock_prof_sample_free_hook : NULL,
+	    "Unexpected non NULL default hook");
+
+	size_t alloc_sz = 10;
+	void *p = mallocx(alloc_sz, 0);
+	expect_ptr_not_null(p, "Failed to allocate");
+	expect_true(mock_prof_sample_hook_called == sample_hook_set,
+	   "Incorrect prof_sample hook usage");
+	if (sample_hook_set) {
+		expect_ptr_eq(p, sampled_ptr, "Unexpected sampled ptr");
+		expect_zu_eq(alloc_sz, sampled_ptr_sz,
+		    "Unexpected sampled usize");
+	}
+
+	dallocx(p, 0);
+	expect_true(mock_prof_sample_free_hook_called == sample_free_hook_set,
+	   "Incorrect prof_sample_free hook usage");
+	if (sample_free_hook_set) {
+		size_t usz = sz_s2u(alloc_sz);
+		expect_ptr_eq(p, free_sampled_ptr, "Unexpected sampled ptr");
+		expect_zu_eq(usz, free_sampled_ptr_sz, "Unexpected sampled usize");
+	}
+
+	sampled_ptr = free_sampled_ptr = NULL;
+	sampled_ptr_sz = free_sampled_ptr_sz = 0;
+	mock_prof_sample_hook_called = false;
+	mock_prof_sample_free_hook_called = false;
+}
+
+TEST_BEGIN(test_prof_sample_hooks) {
+	test_skip_if(!config_prof);
+
+	check_prof_sample_hooks(false, false);
+
+	write_prof_sample_hook(mock_prof_sample_hook);
+	check_prof_sample_hooks(true, false);
+
+	write_prof_sample_free_hook(mock_prof_sample_free_hook);
+	check_prof_sample_hooks(true, true);
+
+	write_prof_sample_hook(NULL);
+	check_prof_sample_hooks(false, true);
+
+	write_prof_sample_free_hook(NULL);
+	check_prof_sample_hooks(false, false);
+
+	/* Test read+write together. */
+	prof_sample_hook_t sample_hook;
+	read_write_prof_sample_hook(&sample_hook, true, mock_prof_sample_hook);
+	expect_ptr_null(sample_hook, "Unexpected non NULL default hook");
+	check_prof_sample_hooks(true, false);
+
+	prof_sample_free_hook_t sample_free_hook;
+	read_write_prof_sample_free_hook(&sample_free_hook, true,
+	    mock_prof_sample_free_hook);
+	expect_ptr_null(sample_free_hook, "Unexpected non NULL default hook");
+	check_prof_sample_hooks(true, true);
+
+	read_write_prof_sample_hook(&sample_hook, true, NULL);
+	expect_ptr_eq(sample_hook, mock_prof_sample_hook,
+	    "Unexpected prof_sample hook");
+	check_prof_sample_hooks(false, true);
+
+	read_write_prof_sample_free_hook(&sample_free_hook, true, NULL);
+	expect_ptr_eq(sample_free_hook, mock_prof_sample_free_hook,
+	    "Unexpected prof_sample_free hook");
+	check_prof_sample_hooks(false, false);
+}
+TEST_END
+
 int
 main(void) {
 	return test(
 	    test_prof_backtrace_hook_replace,
 	    test_prof_backtrace_hook_augment,
-	    test_prof_dump_hook);
+	    test_prof_dump_hook,
+	    test_prof_sample_hooks);
 }

From 5fd55837bbc400d8cc15152ac2b80b64baa9b68c Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 10 Feb 2023 15:28:22 -0800
Subject: [PATCH 037/395] Fix thread_name updating for heap profiling.

The current thread name reading path updates the name every time, which requires
both alloc and dalloc -- and the temporary NULL value in the middle causes races
where the prof dump read path gets NULLed in the middle.

Minimize the changes in this commit to isolate the bugfix testing; will also
refactor the whole thread name paths later.
---
 src/prof_data.c | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/src/prof_data.c b/src/prof_data.c
index f8b19594..56d3dc88 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -451,16 +451,15 @@ prof_thread_name_alloc(tsd_t *tsd, const char *thread_name) {
 	}
 
 	size = strlen(thread_name) + 1;
-	if (size == 1) {
-		return "";
-	}
-
 	ret = iallocztm(tsd_tsdn(tsd), size, sz_size2index(size), false, NULL,
 	    true, arena_get(TSDN_NULL, 0, true), true);
 	if (ret == NULL) {
 		return NULL;
 	}
+
 	memcpy(ret, thread_name, size);
+	ret[size - 1] = '\0';
+
 	return ret;
 }
 
@@ -493,14 +492,14 @@ prof_thread_name_set_impl(tsd_t *tsd, const char *thread_name) {
 		return EAGAIN;
 	}
 
-	if (tdata->thread_name != NULL) {
-		idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, NULL, true,
-		    true);
-		tdata->thread_name = NULL;
-	}
-	if (strlen(s) > 0) {
-		tdata->thread_name = s;
+	char *old_thread_name = tdata->thread_name;
+	tdata->thread_name = s;
+	if (old_thread_name != NULL) {
+		idalloctm(tsd_tsdn(tsd), old_thread_name, /* tcache */ NULL,
+		    /* alloc_ctx */ NULL, /* is_internal */ true,
+		    /* slow_path */ true);
 	}
+
 	return 0;
 }
 

From 97b313c7d480bc087b0c805b4bb42b71dd9c9e93 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 15 Dec 2022 14:36:04 -0800
Subject: [PATCH 038/395] More conservative setting for
 /test/unit/background_thread_enable.

Lower the thread and arena count to avoid resource exhaustion on 32-bit.
---
 test/unit/background_thread_enable.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/test/unit/background_thread_enable.c b/test/unit/background_thread_enable.c
index 44034ac6..5f42feff 100644
--- a/test/unit/background_thread_enable.c
+++ b/test/unit/background_thread_enable.c
@@ -1,6 +1,6 @@
 #include "test/jemalloc_test.h"
 
-const char *malloc_conf = "background_thread:false,narenas:1,max_background_threads:20";
+const char *malloc_conf = "background_thread:false,narenas:1,max_background_threads:8";
 
 static unsigned
 max_test_narenas(void) {
@@ -12,12 +12,9 @@ max_test_narenas(void) {
 	 * approximation.
 	 */
 	unsigned ret = 10 * ncpus;
-	/* Limit the max to avoid VM exhaustion on 32-bit . */
-	if (ret > 512) {
-		ret = 512;
-	}
 
-	return ret;
+	/* Limit the max to avoid VM exhaustion on 32-bit . */
+	return ret > 256 ? 256 : ret;
 }
 
 TEST_BEGIN(test_deferred) {

From b6125120ac22c2c7e7cd36df114a2b280dcc33e7 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 13 Feb 2023 17:43:12 -0800
Subject: [PATCH 039/395] Add an explicit name to the dedicated oversize arena.

---
 src/arena.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/arena.c b/src/arena.c
index 25ab41af..970f60ed 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1746,6 +1746,11 @@ arena_choose_huge(tsd_t *tsd) {
 		if (huge_arena == NULL) {
 			return NULL;
 		}
+
+		char *huge_arena_name = "auto_oversize";
+		strncpy(huge_arena->name, huge_arena_name, ARENA_NAME_LEN);
+		huge_arena->name[ARENA_NAME_LEN - 1] = '\0';
+
 		/*
 		 * Purge eagerly for huge allocations, because: 1) number of
 		 * huge allocations is usually small, which means ticker based

From c7805f1eb5b9eadccb9711044e141ff741c09d4c Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 15 Feb 2023 17:28:58 -0800
Subject: [PATCH 040/395] Add a header in HPA stats for the nonfull slabs.

---
 src/stats.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/stats.c b/src/stats.c
index 701a6c86..43360a2d 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -910,8 +910,7 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	    "      npageslabs: %zu huge, %zu nonhuge\n"
 	    "      nactive: %zu huge, %zu nonhuge \n"
 	    "      ndirty: %zu huge, %zu nonhuge \n"
-	    "      nretained: 0 huge, %zu nonhuge \n"
-	    "\n",
+	    "      nretained: 0 huge, %zu nonhuge \n",
 	    npageslabs_huge, npageslabs_nonhuge,
 	    nactive_huge, nactive_nonhuge,
 	    ndirty_huge, ndirty_nonhuge,
@@ -932,6 +931,7 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	    &ndirty_nonhuge);
 	emitter_json_object_end(emitter); /* End "empty_slabs" */
 
+	/* Last, nonfull slab stats. */
 	COL_HDR(row, size, NULL, right, 20, size)
 	COL_HDR(row, ind, NULL, right, 4, unsigned)
 	COL_HDR(row, npageslabs_huge, NULL, right, 16, size)
@@ -947,6 +947,7 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	stats_arenas_mib[2] = i;
 	CTL_LEAF_PREPARE(stats_arenas_mib, 3, "hpa_shard.nonfull_slabs");
 
+	emitter_table_printf(emitter, "  In nonfull slabs:\n");
 	emitter_table_row(emitter, &header_row);
 	emitter_json_array_kv_begin(emitter, "nonfull_slabs");
 	bool in_gap = false;

From 4422f88d17404944a312825a1aec96cd9dc6c165 Mon Sep 17 00:00:00 2001
From: barracuda156 <vital.had@gmail.com>
Date: Sat, 15 Oct 2022 18:04:24 +0800
Subject: [PATCH 041/395] Makefile.in: link with g++ when cxx enabled

---
 Makefile.in | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/Makefile.in b/Makefile.in
index 195084d6..450abeb4 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -522,7 +522,11 @@ endif
 
 $(objroot)lib/$(LIBJEMALLOC).$(SOREV) : $(if $(PIC_CFLAGS),$(C_PIC_OBJS),$(C_OBJS)) $(if $(PIC_CFLAGS),$(CPP_PIC_OBJS),$(CPP_OBJS))
 	@mkdir -p $(@D)
+ifeq (@enable_cxx@, 1)
+	$(CXX) $(DSO_LDFLAGS) $(call RPATH,$(RPATH_EXTRA)) $(LDTARGET) $+ $(LDFLAGS) $(LIBS) $(EXTRA_LDFLAGS)
+else
 	$(CC) $(DSO_LDFLAGS) $(call RPATH,$(RPATH_EXTRA)) $(LDTARGET) $+ $(LDFLAGS) $(LIBS) $(EXTRA_LDFLAGS)
+endif
 
 $(objroot)lib/$(LIBJEMALLOC)_pic.$(A) : $(C_PIC_OBJS) $(CPP_PIC_OBJS)
 $(objroot)lib/$(LIBJEMALLOC).$(A) : $(C_OBJS) $(CPP_OBJS)

From e8b28908dede2a27530dbaa255af6cbcf579fc31 Mon Sep 17 00:00:00 2001
From: Fernando Pelliccioni <fpelliccioni@gmail.com>
Date: Fri, 23 Sep 2022 11:34:05 -0300
Subject: [PATCH 042/395] [MSVC] support for Visual Studio 2019 and 2022

---
 msvc/jemalloc_vc2019.sln                      |  63 +++
 msvc/jemalloc_vc2022.sln                      |  63 +++
 .../projects/vc2019/jemalloc/jemalloc.vcxproj | 379 ++++++++++++++++++
 .../vc2019/jemalloc/jemalloc.vcxproj.filters  | 197 +++++++++
 .../vc2019/test_threads/test_threads.vcxproj  | 326 +++++++++++++++
 .../test_threads/test_threads.vcxproj.filters |  26 ++
 .../projects/vc2022/jemalloc/jemalloc.vcxproj | 379 ++++++++++++++++++
 .../vc2022/jemalloc/jemalloc.vcxproj.filters  | 197 +++++++++
 .../vc2022/test_threads/test_threads.vcxproj  | 326 +++++++++++++++
 .../test_threads/test_threads.vcxproj.filters |  26 ++
 10 files changed, 1982 insertions(+)
 create mode 100644 msvc/jemalloc_vc2019.sln
 create mode 100644 msvc/jemalloc_vc2022.sln
 create mode 100644 msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
 create mode 100644 msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
 create mode 100644 msvc/projects/vc2019/test_threads/test_threads.vcxproj
 create mode 100644 msvc/projects/vc2019/test_threads/test_threads.vcxproj.filters
 create mode 100644 msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
 create mode 100644 msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
 create mode 100644 msvc/projects/vc2022/test_threads/test_threads.vcxproj
 create mode 100644 msvc/projects/vc2022/test_threads/test_threads.vcxproj.filters

diff --git a/msvc/jemalloc_vc2019.sln b/msvc/jemalloc_vc2019.sln
new file mode 100644
index 00000000..871ea9d4
--- /dev/null
+++ b/msvc/jemalloc_vc2019.sln
@@ -0,0 +1,63 @@
+﻿
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio 14
+VisualStudioVersion = 14.0.24720.0
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{70A99006-6DE9-472B-8F83-4CEE6C616DF3}"
+	ProjectSection(SolutionItems) = preProject
+		ReadMe.txt = ReadMe.txt
+	EndProjectSection
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "jemalloc", "projects\vc2019\jemalloc\jemalloc.vcxproj", "{8D6BB292-9E1C-413D-9F98-4864BDC1514A}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test_threads", "projects\vc2019\test_threads\test_threads.vcxproj", "{09028CFD-4EB7-491D-869C-0708DB97ED44}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|x64 = Debug|x64
+		Debug|x86 = Debug|x86
+		Debug-static|x64 = Debug-static|x64
+		Debug-static|x86 = Debug-static|x86
+		Release|x64 = Release|x64
+		Release|x86 = Release|x86
+		Release-static|x64 = Release-static|x64
+		Release-static|x86 = Release-static|x86
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x64.ActiveCfg = Debug|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x64.Build.0 = Debug|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x86.ActiveCfg = Debug|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x86.Build.0 = Debug|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x64.ActiveCfg = Debug-static|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x64.Build.0 = Debug-static|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x86.ActiveCfg = Debug-static|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x86.Build.0 = Debug-static|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x64.ActiveCfg = Release|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x64.Build.0 = Release|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x86.ActiveCfg = Release|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x86.Build.0 = Release|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x64.ActiveCfg = Release-static|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x64.Build.0 = Release-static|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x86.ActiveCfg = Release-static|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x86.Build.0 = Release-static|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x64.ActiveCfg = Debug|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x64.Build.0 = Debug|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x86.ActiveCfg = Debug|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x86.Build.0 = Debug|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x64.ActiveCfg = Debug-static|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x64.Build.0 = Debug-static|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x86.ActiveCfg = Debug-static|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x86.Build.0 = Debug-static|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x64.ActiveCfg = Release|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x64.Build.0 = Release|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x86.ActiveCfg = Release|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x86.Build.0 = Release|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x64.ActiveCfg = Release-static|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x64.Build.0 = Release-static|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x86.ActiveCfg = Release-static|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x86.Build.0 = Release-static|Win32
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+EndGlobal
diff --git a/msvc/jemalloc_vc2022.sln b/msvc/jemalloc_vc2022.sln
new file mode 100644
index 00000000..898574f1
--- /dev/null
+++ b/msvc/jemalloc_vc2022.sln
@@ -0,0 +1,63 @@
+﻿
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio 14
+VisualStudioVersion = 14.0.24720.0
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{70A99006-6DE9-472B-8F83-4CEE6C616DF3}"
+	ProjectSection(SolutionItems) = preProject
+		ReadMe.txt = ReadMe.txt
+	EndProjectSection
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "jemalloc", "projects\vc2022\jemalloc\jemalloc.vcxproj", "{8D6BB292-9E1C-413D-9F98-4864BDC1514A}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test_threads", "projects\vc2022\test_threads\test_threads.vcxproj", "{09028CFD-4EB7-491D-869C-0708DB97ED44}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|x64 = Debug|x64
+		Debug|x86 = Debug|x86
+		Debug-static|x64 = Debug-static|x64
+		Debug-static|x86 = Debug-static|x86
+		Release|x64 = Release|x64
+		Release|x86 = Release|x86
+		Release-static|x64 = Release-static|x64
+		Release-static|x86 = Release-static|x86
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x64.ActiveCfg = Debug|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x64.Build.0 = Debug|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x86.ActiveCfg = Debug|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x86.Build.0 = Debug|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x64.ActiveCfg = Debug-static|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x64.Build.0 = Debug-static|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x86.ActiveCfg = Debug-static|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x86.Build.0 = Debug-static|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x64.ActiveCfg = Release|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x64.Build.0 = Release|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x86.ActiveCfg = Release|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x86.Build.0 = Release|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x64.ActiveCfg = Release-static|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x64.Build.0 = Release-static|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x86.ActiveCfg = Release-static|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x86.Build.0 = Release-static|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x64.ActiveCfg = Debug|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x64.Build.0 = Debug|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x86.ActiveCfg = Debug|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x86.Build.0 = Debug|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x64.ActiveCfg = Debug-static|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x64.Build.0 = Debug-static|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x86.ActiveCfg = Debug-static|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x86.Build.0 = Debug-static|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x64.ActiveCfg = Release|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x64.Build.0 = Release|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x86.ActiveCfg = Release|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x86.Build.0 = Release|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x64.ActiveCfg = Release-static|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x64.Build.0 = Release-static|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x86.ActiveCfg = Release-static|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x86.Build.0 = Release-static|Win32
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+EndGlobal
diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
new file mode 100644
index 00000000..66ba849d
--- /dev/null
+++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
@@ -0,0 +1,379 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="16.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug-static|Win32">
+      <Configuration>Debug-static</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug-static|x64">
+      <Configuration>Debug-static</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release-static|Win32">
+      <Configuration>Release-static</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release-static|x64">
+      <Configuration>Release-static</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\..\src\arena.c" />
+    <ClCompile Include="..\..\..\..\src\background_thread.c" />
+    <ClCompile Include="..\..\..\..\src\base.c" />
+    <ClCompile Include="..\..\..\..\src\bin.c" />
+    <ClCompile Include="..\..\..\..\src\bin_info.c" />
+    <ClCompile Include="..\..\..\..\src\bitmap.c" />
+    <ClCompile Include="..\..\..\..\src\buf_writer.c" />
+    <ClCompile Include="..\..\..\..\src\cache_bin.c" />
+    <ClCompile Include="..\..\..\..\src\ckh.c" />
+    <ClCompile Include="..\..\..\..\src\counter.c" />
+    <ClCompile Include="..\..\..\..\src\ctl.c" />
+    <ClCompile Include="..\..\..\..\src\decay.c" />
+    <ClCompile Include="..\..\..\..\src\div.c" />
+    <ClCompile Include="..\..\..\..\src\ecache.c" />
+    <ClCompile Include="..\..\..\..\src\edata.c" />
+    <ClCompile Include="..\..\..\..\src\edata_cache.c" />
+    <ClCompile Include="..\..\..\..\src\ehooks.c" />
+    <ClCompile Include="..\..\..\..\src\emap.c" />
+    <ClCompile Include="..\..\..\..\src\eset.c" />
+    <ClCompile Include="..\..\..\..\src\exp_grow.c" />
+    <ClCompile Include="..\..\..\..\src\extent.c" />
+    <ClCompile Include="..\..\..\..\src\extent_dss.c" />
+    <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
+    <ClCompile Include="..\..\..\..\src\fxp.c" />
+    <ClCompile Include="..\..\..\..\src\hook.c" />
+    <ClCompile Include="..\..\..\..\src\hpa.c" />
+    <ClCompile Include="..\..\..\..\src\hpa_hooks.c" />
+    <ClCompile Include="..\..\..\..\src\hpdata.c" />
+    <ClCompile Include="..\..\..\..\src\inspect.c" />
+    <ClCompile Include="..\..\..\..\src\jemalloc.c" />
+    <ClCompile Include="..\..\..\..\src\large.c" />
+    <ClCompile Include="..\..\..\..\src\log.c" />
+    <ClCompile Include="..\..\..\..\src\malloc_io.c" />
+    <ClCompile Include="..\..\..\..\src\mutex.c" />
+    <ClCompile Include="..\..\..\..\src\nstime.c" />
+    <ClCompile Include="..\..\..\..\src\pa.c" />
+    <ClCompile Include="..\..\..\..\src\pa_extra.c" />
+    <ClCompile Include="..\..\..\..\src\pai.c" />
+    <ClCompile Include="..\..\..\..\src\pac.c" />
+    <ClCompile Include="..\..\..\..\src\pages.c" />
+    <ClCompile Include="..\..\..\..\src\peak_event.c" />
+    <ClCompile Include="..\..\..\..\src\prof.c" />
+    <ClCompile Include="..\..\..\..\src\prof_data.c" />
+    <ClCompile Include="..\..\..\..\src\prof_log.c" />
+    <ClCompile Include="..\..\..\..\src\prof_recent.c" />
+    <ClCompile Include="..\..\..\..\src\prof_stats.c" />
+    <ClCompile Include="..\..\..\..\src\prof_sys.c" />
+    <ClCompile Include="..\..\..\..\src\psset.c" />
+    <ClCompile Include="..\..\..\..\src\rtree.c" />
+    <ClCompile Include="..\..\..\..\src\safety_check.c" />
+    <ClCompile Include="..\..\..\..\src\san.c" />
+    <ClCompile Include="..\..\..\..\src\san_bump.c" />
+    <ClCompile Include="..\..\..\..\src\sc.c" />
+    <ClCompile Include="..\..\..\..\src\sec.c" />
+    <ClCompile Include="..\..\..\..\src\stats.c" />
+    <ClCompile Include="..\..\..\..\src\sz.c" />
+    <ClCompile Include="..\..\..\..\src\tcache.c" />
+    <ClCompile Include="..\..\..\..\src\test_hooks.c" />
+    <ClCompile Include="..\..\..\..\src\thread_event.c" />
+    <ClCompile Include="..\..\..\..\src\ticker.c" />
+    <ClCompile Include="..\..\..\..\src\tsd.c" />
+    <ClCompile Include="..\..\..\..\src\witness.c" />
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{8D6BB292-9E1C-413D-9F98-4864BDC1514A}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>jemalloc</RootNamespace>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)d</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)-$(PlatformToolset)-$(Configuration)</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)-$(PlatformToolset)-$(Configuration)</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)d</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)-vc$(PlatformToolsetVersion)-$(Configuration)</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)-vc$(PlatformToolsetVersion)-$(Configuration)</TargetName>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>_REENTRANT;_WINDLL;DLLEXPORT;JEMALLOC_DEBUG;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>JEMALLOC_DEBUG;_REENTRANT;JEMALLOC_EXPORT=;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>JEMALLOC_NO_PRIVATE_NAMESPACE;_REENTRANT;_WINDLL;DLLEXPORT;JEMALLOC_DEBUG;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>JEMALLOC_NO_PRIVATE_NAMESPACE;JEMALLOC_DEBUG;_REENTRANT;JEMALLOC_EXPORT=;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <DebugInformationFormat>OldStyle</DebugInformationFormat>
+      <MinimalRebuild>false</MinimalRebuild>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>_REENTRANT;_WINDLL;DLLEXPORT;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>_REENTRANT;JEMALLOC_EXPORT=;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>JEMALLOC_NO_PRIVATE_NAMESPACE;_REENTRANT;_WINDLL;DLLEXPORT;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>JEMALLOC_NO_PRIVATE_NAMESPACE;_REENTRANT;JEMALLOC_EXPORT=;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <DebugInformationFormat>OldStyle</DebugInformationFormat>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
new file mode 100644
index 00000000..1b43e9f2
--- /dev/null
+++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
@@ -0,0 +1,197 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\..\src\arena.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\background_thread.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\base.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\bin.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\bitmap.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\buf_writer.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\cache_bin.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ckh.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\counter.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ctl.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\decay.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\div.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\emap.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\exp_grow.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\extent.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\extent_dss.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\extent_mmap.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\fxp.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hook.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpa.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpa_hooks.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpdata.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\inspect.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\jemalloc.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\large.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\log.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\malloc_io.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\mutex.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\nstime.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\pa.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\pa_extra.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\pai.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\pac.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\pages.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\peak_event.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_data.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_log.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_recent.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_stats.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_sys.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\psset.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\rtree.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\safety_check.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\sc.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\sec.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\stats.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\sz.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\tcache.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\test_hooks.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\thread_event.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ticker.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\tsd.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\witness.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\bin_info.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ecache.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\edata.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\edata_cache.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ehooks.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\eset.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\san.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\san_bump.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2019/test_threads/test_threads.vcxproj b/msvc/projects/vc2019/test_threads/test_threads.vcxproj
new file mode 100644
index 00000000..8471a41e
--- /dev/null
+++ b/msvc/projects/vc2019/test_threads/test_threads.vcxproj
@@ -0,0 +1,326 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="16.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug-static|Win32">
+      <Configuration>Debug-static</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug-static|x64">
+      <Configuration>Debug-static</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release-static|Win32">
+      <Configuration>Release-static</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release-static|x64">
+      <Configuration>Release-static</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{09028CFD-4EB7-491D-869C-0708DB97ED44}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>test_threads</RootNamespace>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemallocd.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>JEMALLOC_EXPORT=;JEMALLOC_STATIC;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemalloc-$(PlatformToolset)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalDependencies>jemallocd.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>JEMALLOC_EXPORT=;JEMALLOC_STATIC;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalDependencies>jemalloc-vc$(PlatformToolsetVersion)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemalloc.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>JEMALLOC_EXPORT=;JEMALLOC_STATIC;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemalloc-$(PlatformToolset)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemalloc.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>JEMALLOC_EXPORT=;JEMALLOC_STATIC;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemalloc-vc$(PlatformToolsetVersion)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\test_threads\test_threads.cpp" />
+    <ClCompile Include="..\..\..\test_threads\test_threads_main.cpp" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\jemalloc\jemalloc.vcxproj">
+      <Project>{8d6bb292-9e1c-413d-9f98-4864bdc1514a}</Project>
+    </ProjectReference>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\test_threads\test_threads.h" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2019/test_threads/test_threads.vcxproj.filters b/msvc/projects/vc2019/test_threads/test_threads.vcxproj.filters
new file mode 100644
index 00000000..fa4588fd
--- /dev/null
+++ b/msvc/projects/vc2019/test_threads/test_threads.vcxproj.filters
@@ -0,0 +1,26 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\test_threads\test_threads.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\test_threads\test_threads_main.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\test_threads\test_threads.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
new file mode 100644
index 00000000..7d9a1aa0
--- /dev/null
+++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
@@ -0,0 +1,379 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="17.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug-static|Win32">
+      <Configuration>Debug-static</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug-static|x64">
+      <Configuration>Debug-static</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release-static|Win32">
+      <Configuration>Release-static</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release-static|x64">
+      <Configuration>Release-static</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\..\src\arena.c" />
+    <ClCompile Include="..\..\..\..\src\background_thread.c" />
+    <ClCompile Include="..\..\..\..\src\base.c" />
+    <ClCompile Include="..\..\..\..\src\bin.c" />
+    <ClCompile Include="..\..\..\..\src\bin_info.c" />
+    <ClCompile Include="..\..\..\..\src\bitmap.c" />
+    <ClCompile Include="..\..\..\..\src\buf_writer.c" />
+    <ClCompile Include="..\..\..\..\src\cache_bin.c" />
+    <ClCompile Include="..\..\..\..\src\ckh.c" />
+    <ClCompile Include="..\..\..\..\src\counter.c" />
+    <ClCompile Include="..\..\..\..\src\ctl.c" />
+    <ClCompile Include="..\..\..\..\src\decay.c" />
+    <ClCompile Include="..\..\..\..\src\div.c" />
+    <ClCompile Include="..\..\..\..\src\ecache.c" />
+    <ClCompile Include="..\..\..\..\src\edata.c" />
+    <ClCompile Include="..\..\..\..\src\edata_cache.c" />
+    <ClCompile Include="..\..\..\..\src\ehooks.c" />
+    <ClCompile Include="..\..\..\..\src\emap.c" />
+    <ClCompile Include="..\..\..\..\src\eset.c" />
+    <ClCompile Include="..\..\..\..\src\exp_grow.c" />
+    <ClCompile Include="..\..\..\..\src\extent.c" />
+    <ClCompile Include="..\..\..\..\src\extent_dss.c" />
+    <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
+    <ClCompile Include="..\..\..\..\src\fxp.c" />
+    <ClCompile Include="..\..\..\..\src\hook.c" />
+    <ClCompile Include="..\..\..\..\src\hpa.c" />
+    <ClCompile Include="..\..\..\..\src\hpa_hooks.c" />
+    <ClCompile Include="..\..\..\..\src\hpdata.c" />
+    <ClCompile Include="..\..\..\..\src\inspect.c" />
+    <ClCompile Include="..\..\..\..\src\jemalloc.c" />
+    <ClCompile Include="..\..\..\..\src\large.c" />
+    <ClCompile Include="..\..\..\..\src\log.c" />
+    <ClCompile Include="..\..\..\..\src\malloc_io.c" />
+    <ClCompile Include="..\..\..\..\src\mutex.c" />
+    <ClCompile Include="..\..\..\..\src\nstime.c" />
+    <ClCompile Include="..\..\..\..\src\pa.c" />
+    <ClCompile Include="..\..\..\..\src\pa_extra.c" />
+    <ClCompile Include="..\..\..\..\src\pai.c" />
+    <ClCompile Include="..\..\..\..\src\pac.c" />
+    <ClCompile Include="..\..\..\..\src\pages.c" />
+    <ClCompile Include="..\..\..\..\src\peak_event.c" />
+    <ClCompile Include="..\..\..\..\src\prof.c" />
+    <ClCompile Include="..\..\..\..\src\prof_data.c" />
+    <ClCompile Include="..\..\..\..\src\prof_log.c" />
+    <ClCompile Include="..\..\..\..\src\prof_recent.c" />
+    <ClCompile Include="..\..\..\..\src\prof_stats.c" />
+    <ClCompile Include="..\..\..\..\src\prof_sys.c" />
+    <ClCompile Include="..\..\..\..\src\psset.c" />
+    <ClCompile Include="..\..\..\..\src\rtree.c" />
+    <ClCompile Include="..\..\..\..\src\safety_check.c" />
+    <ClCompile Include="..\..\..\..\src\san.c" />
+    <ClCompile Include="..\..\..\..\src\san_bump.c" />
+    <ClCompile Include="..\..\..\..\src\sc.c" />
+    <ClCompile Include="..\..\..\..\src\sec.c" />
+    <ClCompile Include="..\..\..\..\src\stats.c" />
+    <ClCompile Include="..\..\..\..\src\sz.c" />
+    <ClCompile Include="..\..\..\..\src\tcache.c" />
+    <ClCompile Include="..\..\..\..\src\test_hooks.c" />
+    <ClCompile Include="..\..\..\..\src\thread_event.c" />
+    <ClCompile Include="..\..\..\..\src\ticker.c" />
+    <ClCompile Include="..\..\..\..\src\tsd.c" />
+    <ClCompile Include="..\..\..\..\src\witness.c" />
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{8D6BB292-9E1C-413D-9F98-4864BDC1514A}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>jemalloc</RootNamespace>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)d</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)-$(PlatformToolset)-$(Configuration)</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)-$(PlatformToolset)-$(Configuration)</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)d</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)-vc$(PlatformToolsetVersion)-$(Configuration)</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)-vc$(PlatformToolsetVersion)-$(Configuration)</TargetName>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>_REENTRANT;_WINDLL;DLLEXPORT;JEMALLOC_DEBUG;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>JEMALLOC_DEBUG;_REENTRANT;JEMALLOC_EXPORT=;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>JEMALLOC_NO_PRIVATE_NAMESPACE;_REENTRANT;_WINDLL;DLLEXPORT;JEMALLOC_DEBUG;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>JEMALLOC_NO_PRIVATE_NAMESPACE;JEMALLOC_DEBUG;_REENTRANT;JEMALLOC_EXPORT=;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <DebugInformationFormat>OldStyle</DebugInformationFormat>
+      <MinimalRebuild>false</MinimalRebuild>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>_REENTRANT;_WINDLL;DLLEXPORT;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>_REENTRANT;JEMALLOC_EXPORT=;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>JEMALLOC_NO_PRIVATE_NAMESPACE;_REENTRANT;_WINDLL;DLLEXPORT;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>JEMALLOC_NO_PRIVATE_NAMESPACE;_REENTRANT;JEMALLOC_EXPORT=;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <DebugInformationFormat>OldStyle</DebugInformationFormat>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
new file mode 100644
index 00000000..1b43e9f2
--- /dev/null
+++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
@@ -0,0 +1,197 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\..\src\arena.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\background_thread.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\base.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\bin.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\bitmap.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\buf_writer.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\cache_bin.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ckh.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\counter.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ctl.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\decay.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\div.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\emap.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\exp_grow.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\extent.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\extent_dss.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\extent_mmap.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\fxp.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hook.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpa.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpa_hooks.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpdata.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\inspect.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\jemalloc.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\large.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\log.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\malloc_io.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\mutex.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\nstime.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\pa.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\pa_extra.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\pai.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\pac.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\pages.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\peak_event.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_data.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_log.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_recent.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_stats.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_sys.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\psset.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\rtree.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\safety_check.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\sc.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\sec.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\stats.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\sz.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\tcache.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\test_hooks.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\thread_event.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ticker.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\tsd.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\witness.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\bin_info.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ecache.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\edata.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\edata_cache.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ehooks.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\eset.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\san.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\san_bump.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2022/test_threads/test_threads.vcxproj b/msvc/projects/vc2022/test_threads/test_threads.vcxproj
new file mode 100644
index 00000000..471f693b
--- /dev/null
+++ b/msvc/projects/vc2022/test_threads/test_threads.vcxproj
@@ -0,0 +1,326 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug-static|Win32">
+      <Configuration>Debug-static</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug-static|x64">
+      <Configuration>Debug-static</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release-static|Win32">
+      <Configuration>Release-static</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release-static|x64">
+      <Configuration>Release-static</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{09028CFD-4EB7-491D-869C-0708DB97ED44}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>test_threads</RootNamespace>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemallocd.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>JEMALLOC_EXPORT=;JEMALLOC_STATIC;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemalloc-$(PlatformToolset)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalDependencies>jemallocd.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>JEMALLOC_EXPORT=;JEMALLOC_STATIC;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalDependencies>jemalloc-vc$(PlatformToolsetVersion)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemalloc.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>JEMALLOC_EXPORT=;JEMALLOC_STATIC;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemalloc-$(PlatformToolset)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemalloc.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>JEMALLOC_EXPORT=;JEMALLOC_STATIC;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemalloc-vc$(PlatformToolsetVersion)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\test_threads\test_threads.cpp" />
+    <ClCompile Include="..\..\..\test_threads\test_threads_main.cpp" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\jemalloc\jemalloc.vcxproj">
+      <Project>{8d6bb292-9e1c-413d-9f98-4864bdc1514a}</Project>
+    </ProjectReference>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\test_threads\test_threads.h" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2022/test_threads/test_threads.vcxproj.filters b/msvc/projects/vc2022/test_threads/test_threads.vcxproj.filters
new file mode 100644
index 00000000..fa4588fd
--- /dev/null
+++ b/msvc/projects/vc2022/test_threads/test_threads.vcxproj.filters
@@ -0,0 +1,26 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\test_threads\test_threads.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\test_threads\test_threads_main.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\test_threads\test_threads.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+</Project>
\ No newline at end of file

From 09e4b38fb1f9a9b505e35ac13b8f99282990bc2c Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@fb.com>
Date: Thu, 15 Dec 2022 10:54:33 -0800
Subject: [PATCH 043/395] Use asm volatile during benchmarks.

---
 configure.ac                                  | 16 +++++++++++++++
 .../internal/jemalloc_internal_defs.h.in      |  3 +++
 test/include/test/bench.h                     | 11 ++++++++++
 test/stress/cpp/microbench.cpp                | 20 ++++++++++++-------
 test/stress/large_microbench.c                |  2 ++
 test/stress/microbench.c                      |  6 ++++++
 6 files changed, 51 insertions(+), 7 deletions(-)

diff --git a/configure.ac b/configure.ac
index 846a049c..fbc6298b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -546,6 +546,22 @@ typedef unsigned __int32 uint32_t;
     ;;
 esac
 AC_DEFINE_UNQUOTED([LG_VADDR], [$LG_VADDR], [ ])
+AC_CACHE_CHECK([asm volatile support],
+               [je_cv_asm_volatile],
+               AC_RUN_IFELSE([AC_LANG_PROGRAM(
+[[
+]],
+[[
+      void* ptr;
+      asm volatile("" : "+r"(ptr));
+      return 0;
+]])],
+[je_cv_asm_volatile=yes],
+[je_cv_asm_volatile=no],
+[je_cv_asm_volatile=no]))
+if test "x${je_cv_asm_volatile}" = "xyes"; then
+  AC_DEFINE([JEMALLOC_HAVE_ASM_VOLATILE], [ ], [ ])
+fi
 
 LD_PRELOAD_VAR="LD_PRELOAD"
 so="so"
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 55938433..41e40ccf 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -440,4 +440,7 @@
 /* If defined, realloc(ptr, 0) defaults to "free" instead of "alloc". */
 #undef JEMALLOC_ZERO_REALLOC_DEFAULT_FREE
 
+/* If defined, use volatile asm during benchmarks. */
+#undef JEMALLOC_HAVE_ASM_VOLATILE
+
 #endif /* JEMALLOC_INTERNAL_DEFS_H_ */
diff --git a/test/include/test/bench.h b/test/include/test/bench.h
index 7421b4d2..29c6801f 100644
--- a/test/include/test/bench.h
+++ b/test/include/test/bench.h
@@ -58,3 +58,14 @@ compare_funcs(uint64_t nwarmup, uint64_t niter, const char *name_a,
 
 	dallocx(p, 0);
 }
+
+static inline void *
+no_opt_ptr(void *ptr) {
+#ifdef JEMALLOC_HAVE_ASM_VOLATILE
+  asm volatile("" : "+r"(ptr));
+#else
+  void *volatile dup = ptr;
+  ptr = dup;
+#endif
+  return ptr;
+}
diff --git a/test/stress/cpp/microbench.cpp b/test/stress/cpp/microbench.cpp
index ab41b65d..203c3dc9 100644
--- a/test/stress/cpp/microbench.cpp
+++ b/test/stress/cpp/microbench.cpp
@@ -3,44 +3,50 @@
 
 static void
 malloc_free(void) {
-	void* volatile p = malloc(1);
+	void* p = malloc(1);
 	expect_ptr_not_null((void *)p, "Unexpected malloc failure");
+	p = no_opt_ptr(p);
 	free((void *)p);
 }
 
 static void
 new_delete(void) {
-	void* volatile p = ::operator new(1);
+	void* p = ::operator new(1);
 	expect_ptr_not_null((void *)p, "Unexpected new failure");
+	p = no_opt_ptr(p);
 	::operator delete((void *)p);
 }
 
 static void
 malloc_free_array(void) {
-	void* volatile p = malloc(sizeof(int)*8);
+	void* p = malloc(sizeof(int)*8);
 	expect_ptr_not_null((void *)p, "Unexpected malloc failure");
+	p = no_opt_ptr(p);
 	free((void *)p);
 }
 
 static void
 new_delete_array(void) {
-	int* volatile p = new int[8];
-	expect_ptr_not_null((int *)p, "Unexpected new[] failure");
+	int* p = new int[8];
+	expect_ptr_not_null((void *)p, "Unexpected new[] failure");
+	p = (int *)no_opt_ptr((void *)p);
 	delete[] (int *)p;
 }
 
 #if __cpp_sized_deallocation >= 201309
 static void
 new_sized_delete(void) {
-	void* volatile p = ::operator new(1);
+	void* p = ::operator new(1);
 	expect_ptr_not_null((void *)p, "Unexpected new failure");
+	p = no_opt_ptr(p);
 	::operator delete((void *)p, 1);
 }
 
 static void
 malloc_sdallocx(void) {
-	void* volatile p = malloc(1);
+	void* p = malloc(1);
 	expect_ptr_not_null((void *)p, "Unexpected malloc failure");
+	p = no_opt_ptr(p);
         sdallocx((void *)p, 1, 0);
 }
 #endif
diff --git a/test/stress/large_microbench.c b/test/stress/large_microbench.c
index c66b33a1..44a60c53 100644
--- a/test/stress/large_microbench.c
+++ b/test/stress/large_microbench.c
@@ -9,6 +9,7 @@ large_mallocx_free(void) {
 	 */
 	void *p = mallocx(SC_LARGE_MINCLASS, MALLOCX_TCACHE_NONE);
 	assert_ptr_not_null(p, "mallocx shouldn't fail");
+	p = no_opt_ptr(p);
 	free(p);
 }
 
@@ -16,6 +17,7 @@ static void
 small_mallocx_free(void) {
 	void *p = mallocx(16, 0);
 	assert_ptr_not_null(p, "mallocx shouldn't fail");
+	p = no_opt_ptr(p);
 	free(p);
 }
 
diff --git a/test/stress/microbench.c b/test/stress/microbench.c
index 062e32fd..89479b7e 100644
--- a/test/stress/microbench.c
+++ b/test/stress/microbench.c
@@ -9,6 +9,7 @@ malloc_free(void) {
 		test_fail("Unexpected malloc() failure");
 		return;
 	}
+	p = no_opt_ptr(p);
 	free(p);
 }
 
@@ -19,6 +20,7 @@ mallocx_free(void) {
 		test_fail("Unexpected mallocx() failure");
 		return;
 	}
+	p = no_opt_ptr(p);
 	free(p);
 }
 
@@ -35,6 +37,7 @@ malloc_dallocx(void) {
 		test_fail("Unexpected malloc() failure");
 		return;
 	}
+	p = no_opt_ptr(p);
 	dallocx(p, 0);
 }
 
@@ -45,6 +48,7 @@ malloc_sdallocx(void) {
 		test_fail("Unexpected malloc() failure");
 		return;
 	}
+	p = no_opt_ptr(p);
 	sdallocx(p, 1, 0);
 }
 
@@ -82,6 +86,7 @@ malloc_sallocx_free(void) {
 		test_fail("Unexpected malloc() failure");
 		return;
 	}
+	p = no_opt_ptr(p);
 	if (sallocx(p, 0) < 1) {
 		test_fail("Unexpected sallocx() failure");
 	}
@@ -103,6 +108,7 @@ malloc_nallocx_free(void) {
 		test_fail("Unexpected malloc() failure");
 		return;
 	}
+	p = no_opt_ptr(p);
 	if (nallocx(1, 0) < 1) {
 		test_fail("Unexpected nallocx() failure");
 	}

From 4edea8eb8e879bf4d89a3ed418bf90bb8e09d93b Mon Sep 17 00:00:00 2001
From: Chris Seymour <christopher.seymour@nanoporetech.com>
Date: Sat, 25 Feb 2023 14:52:22 +0000
Subject: [PATCH 044/395] switch to https

---
 README         |  2 +-
 TUNING.md      | 26 +++++++++++++-------------
 jemalloc.pc.in |  2 +-
 3 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/README b/README
index 3a6e0d27..d33a69ce 100644
--- a/README
+++ b/README
@@ -17,4 +17,4 @@ jemalloc.
 
 The ChangeLog file contains a brief summary of changes for each release.
 
-URL: http://jemalloc.net/
+URL: https://jemalloc.net/
diff --git a/TUNING.md b/TUNING.md
index e96399d7..1f6bef35 100644
--- a/TUNING.md
+++ b/TUNING.md
@@ -11,9 +11,9 @@ by a few percent, or make favorable trade-offs.
 ## Notable runtime options for performance tuning
 
 Runtime options can be set via
-[malloc_conf](http://jemalloc.net/jemalloc.3.html#tuning).
+[malloc_conf](https://jemalloc.net/jemalloc.3.html#tuning).
 
-* [background_thread](http://jemalloc.net/jemalloc.3.html#background_thread)
+* [background_thread](https://jemalloc.net/jemalloc.3.html#background_thread)
 
     Enabling jemalloc background threads generally improves the tail latency for
     application threads, since unused memory purging is shifted to the dedicated
@@ -23,7 +23,7 @@ Runtime options can be set via
     Suggested: `background_thread:true` when jemalloc managed threads can be
     allowed.
 
-* [metadata_thp](http://jemalloc.net/jemalloc.3.html#opt.metadata_thp)
+* [metadata_thp](https://jemalloc.net/jemalloc.3.html#opt.metadata_thp)
 
     Allowing jemalloc to utilize transparent huge pages for its internal
     metadata usually reduces TLB misses significantly, especially for programs
@@ -35,8 +35,8 @@ Runtime options can be set via
     `metadata_thp:always`, which is expected to improve CPU utilization at a
     small memory cost.
 
-* [dirty_decay_ms](http://jemalloc.net/jemalloc.3.html#opt.dirty_decay_ms) and
-  [muzzy_decay_ms](http://jemalloc.net/jemalloc.3.html#opt.muzzy_decay_ms)
+* [dirty_decay_ms](https://jemalloc.net/jemalloc.3.html#opt.dirty_decay_ms) and
+  [muzzy_decay_ms](https://jemalloc.net/jemalloc.3.html#opt.muzzy_decay_ms)
 
     Decay time determines how fast jemalloc returns unused pages back to the
     operating system, and therefore provides a fairly straightforward trade-off
@@ -46,7 +46,7 @@ Runtime options can be set via
 
     Suggested: tune the values based on the desired trade-offs.
 
-* [narenas](http://jemalloc.net/jemalloc.3.html#opt.narenas)
+* [narenas](https://jemalloc.net/jemalloc.3.html#opt.narenas)
 
     By default jemalloc uses multiple arenas to reduce internal lock contention.
     However high arena count may also increase overall memory fragmentation,
@@ -57,7 +57,7 @@ Runtime options can be set via
     Suggested: if low parallelism is expected, try lower arena count while
     monitoring CPU and memory usage.
 
-* [percpu_arena](http://jemalloc.net/jemalloc.3.html#opt.percpu_arena)
+* [percpu_arena](https://jemalloc.net/jemalloc.3.html#opt.percpu_arena)
 
     Enable dynamic thread to arena association based on running CPU.  This has
     the potential to improve locality, e.g. when thread to CPU affinity is
@@ -100,28 +100,28 @@ aborts immediately on illegal options.
 In addition to the runtime options, there are a number of programmatic ways to
 improve application performance with jemalloc.
 
-* [Explicit arenas](http://jemalloc.net/jemalloc.3.html#arenas.create)
+* [Explicit arenas](https://jemalloc.net/jemalloc.3.html#arenas.create)
 
     Manually created arenas can help performance in various ways, e.g. by
     managing locality and contention for specific usages.  For example,
     applications can explicitly allocate frequently accessed objects from a
     dedicated arena with
-    [mallocx()](http://jemalloc.net/jemalloc.3.html#MALLOCX_ARENA) to improve
+    [mallocx()](https://jemalloc.net/jemalloc.3.html#MALLOCX_ARENA) to improve
     locality.  In addition, explicit arenas often benefit from individually
     tuned options, e.g. relaxed [decay
-    time](http://jemalloc.net/jemalloc.3.html#arena.i.dirty_decay_ms) if
+    time](https://jemalloc.net/jemalloc.3.html#arena.i.dirty_decay_ms) if
     frequent reuse is expected.
 
-* [Extent hooks](http://jemalloc.net/jemalloc.3.html#arena.i.extent_hooks)
+* [Extent hooks](https://jemalloc.net/jemalloc.3.html#arena.i.extent_hooks)
 
     Extent hooks allow customization for managing underlying memory.  One use
     case for performance purpose is to utilize huge pages -- for example,
-    [HHVM](https://github.com/facebook/hhvm/blob/master/hphp/util/alloc.cpp)
+    [HHVM](httpss://github.com/facebook/hhvm/blob/master/hphp/util/alloc.cpp)
     uses explicit arenas with customized extent hooks to manage 1GB huge pages
     for frequently accessed data, which reduces TLB misses significantly.
 
 * [Explicit thread-to-arena
-  binding](http://jemalloc.net/jemalloc.3.html#thread.arena)
+  binding](https://jemalloc.net/jemalloc.3.html#thread.arena)
 
     It is common for some threads in an application to have different memory
     access / allocation patterns.  Threads with heavy workloads often benefit
diff --git a/jemalloc.pc.in b/jemalloc.pc.in
index c428a86d..0a377152 100644
--- a/jemalloc.pc.in
+++ b/jemalloc.pc.in
@@ -6,7 +6,7 @@ install_suffix=@install_suffix@
 
 Name: jemalloc
 Description: A general purpose malloc(3) implementation that emphasizes fragmentation avoidance and scalable concurrency support.
-URL: http://jemalloc.net/
+URL: https://jemalloc.net/
 Version: @jemalloc_version_major@.@jemalloc_version_minor@.@jemalloc_version_bugfix@_@jemalloc_version_nrev@
 Cflags: -I${includedir}
 Libs: -L${libdir} -ljemalloc${install_suffix}

From f743690739299cb1e72852744bdd79443b264be0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Amaury=20S=C3=A9chet?= <deadalnix@gmail.com>
Date: Fri, 10 Mar 2023 09:12:15 +0000
Subject: [PATCH 045/395] Remove unused mutex from hpa_central

---
 include/jemalloc/internal/hpa.h | 5 -----
 src/hpa.c                       | 6 +-----
 2 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index f3562853..0b3c76c6 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -9,11 +9,6 @@
 
 typedef struct hpa_central_s hpa_central_t;
 struct hpa_central_s {
-	/*
-	 * The mutex guarding most of the operations on the central data
-	 * structure.
-	 */
-	malloc_mutex_t mtx;
 	/*
 	 * Guards expansion of eden.  We separate this from the regular mutex so
 	 * that cheaper operations can still continue while we're doing the OS
diff --git a/src/hpa.c b/src/hpa.c
index 7e2aeba0..8ebb2db2 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -68,11 +68,7 @@ hpa_central_init(hpa_central_t *central, base_t *base, const hpa_hooks_t *hooks)
 	if (err) {
 		return true;
 	}
-	err = malloc_mutex_init(&central->mtx, "hpa_central",
-	    WITNESS_RANK_HPA_CENTRAL, malloc_mutex_rank_exclusive);
-	if (err) {
-		return true;
-	}
+
 	central->base = base;
 	central->eden = NULL;
 	central->eden_len = 0;

From 71bc1a3d91ae7e513488401627eca2a31e9f6e60 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 10 Mar 2023 13:15:59 -0800
Subject: [PATCH 046/395] Avoid assuming the arena id in test when percpu_arena
 is used.

---
 test/unit/huge.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/test/unit/huge.c b/test/unit/huge.c
index ec64e500..53f6577b 100644
--- a/test/unit/huge.c
+++ b/test/unit/huge.c
@@ -82,6 +82,9 @@ TEST_BEGIN(huge_allocation) {
 	expect_u_gt(arena1, 0, "Huge allocation should not come from arena 0");
 	dallocx(ptr, 0);
 
+	test_skip_if(have_percpu_arena &&
+	    PERCPU_ARENA_ENABLED(opt_percpu_arena));
+
 	ptr = mallocx(HUGE_SZ >> 1, 0);
 	expect_ptr_not_null(ptr, "Fail to allocate half huge size");
 	expect_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr,

From d503d72129eddb2175d5d5119c9b70d507112947 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 9 Mar 2023 11:26:07 -0800
Subject: [PATCH 047/395] Add the missing descriptions in AC_DEFINE

---
 configure.ac | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/configure.ac b/configure.ac
index fbc6298b..2bbf7d54 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1088,7 +1088,7 @@ AC_SUBST([JEMALLOC_CPREFIX])
 AC_ARG_WITH([export],
   [AS_HELP_STRING([--without-export], [disable exporting jemalloc public APIs])],
   [if test "x$with_export" = "xno"; then
-  AC_DEFINE([JEMALLOC_EXPORT],[], [ ])
+  AC_DEFINE([JEMALLOC_EXPORT], [ ], [ ])
 fi]
 )
 
@@ -1650,7 +1650,7 @@ fi
 [enable_uaf_detection="0"]
 )
 if test "x$enable_uaf_detection" = "x1" ; then
-  AC_DEFINE([JEMALLOC_UAF_DETECTION], [ ])
+  AC_DEFINE([JEMALLOC_UAF_DETECTION], [ ], [ ])
 fi
 AC_SUBST([enable_uaf_detection])
 

From aba1645f2d65a3b5c46958d7642b46ab3c142cf3 Mon Sep 17 00:00:00 2001
From: Marvin Schmidt <marv@exherbo.org>
Date: Tue, 27 Sep 2022 07:03:14 +0200
Subject: [PATCH 048/395] configure: Handle *-linux-musl* hosts properly

This is the same as the `*-*-linux*` case with the two exceptions that
we don't set glibc=1 and don't define JEMALLOC_USE_CXX_THROW
---
 configure.ac | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/configure.ac b/configure.ac
index 2bbf7d54..f38b72d6 100644
--- a/configure.ac
+++ b/configure.ac
@@ -723,6 +723,19 @@ case "${host}" in
 	fi
 	zero_realloc_default_free="1"
 	;;
+  *-*-linux-musl*)
+	dnl syscall(2) and secure_getenv(3) are exposed by _GNU_SOURCE.
+	JE_APPEND_VS(CPPFLAGS, -D_GNU_SOURCE)
+	abi="elf"
+	AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS], [ ], [ ])
+	AC_DEFINE([JEMALLOC_HAS_ALLOCA_H], [ ], [ ])
+	AC_DEFINE([JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY], [ ], [ ])
+	AC_DEFINE([JEMALLOC_THREADED_INIT], [ ], [ ])
+	if test "${LG_SIZEOF_PTR}" = "3"; then
+	  default_retain="1"
+	fi
+	zero_realloc_default_free="1"
+	;;
   *-*-linux*)
 	dnl syscall(2) and secure_getenv(3) are exposed by _GNU_SOURCE.
 	JE_APPEND_VS(CPPFLAGS, -D_GNU_SOURCE)

From 45249cf5a9cfa13c2c62e68e272a391721523b4b Mon Sep 17 00:00:00 2001
From: Marvin Schmidt <marv@exherbo.org>
Date: Tue, 27 Sep 2022 07:00:13 +0200
Subject: [PATCH 049/395] Fix exception specification error for hosts using
 musl libc

It turns out that the previous commit did not suffice since the
JEMALLOC_SYS_NOTHROW definition also causes the same exception specification
errors as JEMALLOC_USE_CXX_THROW did:
```
x86_64-pc-linux-musl-cc -std=gnu11 -Werror=unknown-warning-option -Wall -Wextra -Wshorten-64-to-32 -Wsign-compare -Wundef -Wno-format-zero-length -Wpointer-
arith -Wno-missing-braces -Wno-missing-field-initializers -pipe -g3 -fvisibility=hidden -Wimplicit-fallthrough -O3 -funroll-loops -march=native -O2 -pipe -c -march=native -O2 -pipe -D_GNU_SOURCE -D_REENTRANT -Iinclude -Iinclude -o src/background_thread.o src/background_thread.c
In file included from src/jemalloc_cpp.cpp:9:
In file included from include/jemalloc/internal/jemalloc_preamble.h:27:
include/jemalloc/internal/../jemalloc.h:254:32: error: exception specification in declaration does not match previous declaration
    void JEMALLOC_SYS_NOTHROW   *je_malloc(size_t size)
                                 ^
include/jemalloc/internal/../jemalloc.h:75:21: note: expanded from macro 'je_malloc'
                    ^
/usr/x86_64-pc-linux-musl/include/stdlib.h:40:7: note: previous declaration is here
void *malloc (size_t);
      ^
```

On systems using the musl C library we have to omit the exception specification
on malloc function family like it's done for MacOS, FreeBSD and OpenBSD.
---
 include/jemalloc/jemalloc_macros.h.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/jemalloc_macros.h.in b/include/jemalloc/jemalloc_macros.h.in
index 2de3f27d..05d996be 100644
--- a/include/jemalloc/jemalloc_macros.h.in
+++ b/include/jemalloc/jemalloc_macros.h.in
@@ -142,7 +142,7 @@
 #  define JEMALLOC_COLD
 #endif
 
-#if (defined(__APPLE__) || defined(__FreeBSD__) || defined(__OpenBSD__)) && !defined(JEMALLOC_NO_RENAME)
+#if (defined(__APPLE__) || defined(__FreeBSD__) || defined(__OpenBSD__) || (defined(__linux__) && !defined(__GLIBC__))) && !defined(JEMALLOC_NO_RENAME)
 #  define JEMALLOC_SYS_NOTHROW
 #else
 #  define JEMALLOC_SYS_NOTHROW JEMALLOC_NOTHROW

From 8e7353a19b5fd9dd1041307b884bc969065b63af Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 21 Mar 2023 14:02:30 -0700
Subject: [PATCH 050/395] Explicit arena assignment in test_thread_idle.

Otherwise the associated arena could change with percpu arena enabled.
---
 test/unit/mallctl.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 244d4c96..1ff8b564 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -1097,8 +1097,12 @@ TEST_BEGIN(test_thread_idle) {
 
 	unsigned arena_ind;
 	sz = sizeof(arena_ind);
-	err = mallctl("thread.arena", &arena_ind, &sz, NULL, 0);
-	expect_d_eq(err, 0, "");
+	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
+        err = mallctl("thread.arena", NULL, NULL, &arena_ind, sizeof(arena_ind));
+	expect_d_eq(err, 0, "Unexpected mallctl() failure");
+	err = mallctl("thread.tcache.flush", NULL, NULL, NULL, 0);
+	expect_d_eq(err, 0, "Unexpected mallctl() failure");
 
 	/* We're going to do an allocation of size 1, which we know is small. */
 	size_t mib[5];
@@ -1108,10 +1112,11 @@ TEST_BEGIN(test_thread_idle) {
 	mib[2] = arena_ind;
 
 	/*
-	 * This alloc and dalloc should leave something in the tcache, in a
-	 * small size's cache bin.
+	 * This alloc and dalloc should leave something (from the newly created
+	 * arena) in the tcache, in a small size's cache bin.  Later the stats
+	 * of that arena will be checked to verify if tcache flush happened.
 	 */
-	void *ptr = mallocx(1, 0);
+	void *ptr = mallocx(1, MALLOCX_TCACHE_NONE);
 	dallocx(ptr, 0);
 
 	uint64_t epoch;

From 8b64be34414e92fcbcdbaf5b81db6d26289667b5 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 21 Mar 2023 14:12:12 -0700
Subject: [PATCH 051/395] Explicit arena assignment in test_tcache_max.

Otherwise the associated arena could change with percpu arena enabled.
---
 test/unit/tcache_max.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/test/unit/tcache_max.c b/test/unit/tcache_max.c
index 1f657c85..b1093f40 100644
--- a/test/unit/tcache_max.c
+++ b/test/unit/tcache_max.c
@@ -157,6 +157,13 @@ TEST_BEGIN(test_tcache_max) {
 	test_skip_if(opt_prof);
 	test_skip_if(san_uaf_detection_enabled());
 
+	unsigned arena_ind;
+	size_t sz = sizeof(arena_ind);
+	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
+	expect_d_eq(mallctl("thread.arena", NULL, NULL, &arena_ind,
+	    sizeof(arena_ind)), 0, "Unexpected mallctl() failure");
+
 	for (alloc_option = alloc_option_start;
 	     alloc_option < alloc_option_end;
 	     alloc_option++) {

From 31e01a98f159926493158cde6453cde55f21c42b Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@fb.com>
Date: Wed, 14 Dec 2022 17:23:41 -0800
Subject: [PATCH 052/395] Fix the rdtscp detection bug and add prefix for the
 macro.

---
 configure.ac                                            | 4 ++--
 include/jemalloc/internal/jemalloc_internal_defs.h.in   | 6 ++++++
 include/jemalloc/internal/jemalloc_internal_inlines_a.h | 2 +-
 3 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/configure.ac b/configure.ac
index f38b72d6..ec7a97cb 100644
--- a/configure.ac
+++ b/configure.ac
@@ -522,10 +522,10 @@ typedef unsigned __int32 uint32_t;
       return 0;
 ]])],
       [je_cv_rdtscp=yes],
-      [je_cv_rdstcp=no],
+      [je_cv_rdtscp=no],
       [je_cv_rdtscp=no]))
       if test "x${je_cv_rdtscp}" = "xyes"; then
-        AC_DEFINE([HAVE_RDTSCP], 1, [])
+        AC_DEFINE([JEMALLOC_HAVE_RDTSCP], [ ], [ ])
       fi
     fi
     ;;
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 41e40ccf..87845a48 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -443,4 +443,10 @@
 /* If defined, use volatile asm during benchmarks. */
 #undef JEMALLOC_HAVE_ASM_VOLATILE
 
+/* 
+ * If defined, support the use of rdtscp to get the time stamp counter 
+ * and the processor ID. 
+ */
+#undef JEMALLOC_HAVE_RDTSCP
+
 #endif /* JEMALLOC_INTERNAL_DEFS_H_ */
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
index 7686a9b7..cb6d78fa 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -14,7 +14,7 @@ malloc_getcpu(void) {
 	return GetCurrentProcessorNumber();
 #elif defined(JEMALLOC_HAVE_SCHED_GETCPU)
 	return (malloc_cpuid_t)sched_getcpu();
-#elif defined(HAVE_RDTSCP)
+#elif defined(JEMALLOC_HAVE_RDTSCP)
 	unsigned int ax, cx, dx;
 	asm volatile("rdtscp" : "=a"(ax), "=d"(dx), "=c"(cx) ::);
 	return (malloc_cpuid_t)(dx & 0xfff);

From 543e2d61e6047208d647cf3fd3499bead3bcc23e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Amaury=20S=C3=A9chet?= <deadalnix@gmail.com>
Date: Thu, 2 Mar 2023 23:32:42 +0000
Subject: [PATCH 053/395] Simplify the logic in ph_insert

Also fixes what looks like an off by one error in the lazy aux list
merge part of the code that previously never touched the last node in
the aux list.
---
 include/jemalloc/internal/ph.h | 59 +++++++++++++++++-----------------
 1 file changed, 30 insertions(+), 29 deletions(-)

diff --git a/include/jemalloc/internal/ph.h b/include/jemalloc/internal/ph.h
index 5f091c5f..8ceadb90 100644
--- a/include/jemalloc/internal/ph.h
+++ b/include/jemalloc/internal/ph.h
@@ -318,36 +318,37 @@ ph_insert(ph_t *ph, void *phn, size_t offset, ph_cmp_t cmp) {
 	 */
 	if (ph->root == NULL) {
 		ph->root = phn;
-	} else {
-		/*
-		 * As a special case, check to see if we can replace the root.
-		 * This is practically common in some important cases, and lets
-		 * us defer some insertions (hopefully, until the point where
-		 * some of the items in the aux list have been removed, savings
-		 * us from linking them at all).
-		 */
-		if (cmp(phn, ph->root) < 0) {
-			phn_lchild_set(phn, ph->root, offset);
-			phn_prev_set(ph->root, phn, offset);
-			ph->root = phn;
-			ph->auxcount = 0;
-			return;
-		}
-		ph->auxcount++;
-		phn_next_set(phn, phn_next_get(ph->root, offset), offset);
-		if (phn_next_get(ph->root, offset) != NULL) {
-			phn_prev_set(phn_next_get(ph->root, offset), phn,
-			    offset);
-		}
-		phn_prev_set(phn, ph->root, offset);
-		phn_next_set(ph->root, phn, offset);
+		return;
 	}
-	if (ph->auxcount > 1) {
-		unsigned nmerges = ffs_zu(ph->auxcount - 1);
-		bool done = false;
-		for (unsigned i = 0; i < nmerges && !done; i++) {
-			done = ph_try_aux_merge_pair(ph, offset, cmp);
-		}
+
+	/*
+	 * As a special case, check to see if we can replace the root.
+	 * This is practically common in some important cases, and lets
+	 * us defer some insertions (hopefully, until the point where
+	 * some of the items in the aux list have been removed, savings
+	 * us from linking them at all).
+	 */
+	if (cmp(phn, ph->root) < 0) {
+		phn_lchild_set(phn, ph->root, offset);
+		phn_prev_set(ph->root, phn, offset);
+		ph->root = phn;
+		ph->auxcount = 0;
+		return;
+	}
+
+	phn_next_set(phn, phn_next_get(ph->root, offset), offset);
+	if (phn_next_get(ph->root, offset) != NULL) {
+		phn_prev_set(phn_next_get(ph->root, offset), phn,
+		    offset);
+	}
+	phn_prev_set(phn, ph->root, offset);
+	phn_next_set(ph->root, phn, offset);
+
+	ph->auxcount++;
+	unsigned nmerges = ffs_zu(ph->auxcount);
+	bool done = false;
+	for (unsigned i = 0; i < nmerges && !done; i++) {
+		done = ph_try_aux_merge_pair(ph, offset, cmp);
 	}
 }
 

From be6da4f663a062353dd9a25baaae0ebcd68b7477 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Amaury=20S=C3=A9chet?= <deadalnix@gmail.com>
Date: Fri, 3 Mar 2023 12:08:51 +0000
Subject: [PATCH 054/395] Do not maintain root->prev in ph_remove.

---
 include/jemalloc/internal/ph.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/include/jemalloc/internal/ph.h b/include/jemalloc/internal/ph.h
index 8ceadb90..0cc41eab 100644
--- a/include/jemalloc/internal/ph.h
+++ b/include/jemalloc/internal/ph.h
@@ -380,9 +380,6 @@ ph_remove(ph_t *ph, void *phn, size_t offset, ph_cmp_t cmp) {
 		 */
 		if (phn_lchild_get(phn, offset) == NULL) {
 			ph->root = phn_next_get(phn, offset);
-			if (ph->root != NULL) {
-				phn_prev_set(ph->root, NULL, offset);
-			}
 			return;
 		}
 		ph_merge_aux(ph, offset, cmp);

From 5266152d7922fc76fdaaa39ded9381a4fa7b4b9d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Amaury=20S=C3=A9chet?= <deadalnix@gmail.com>
Date: Fri, 3 Mar 2023 12:35:45 +0000
Subject: [PATCH 055/395] Simplify the logic in ph_remove

---
 include/jemalloc/internal/ph.h | 64 +++++++++++-----------------------
 1 file changed, 20 insertions(+), 44 deletions(-)

diff --git a/include/jemalloc/internal/ph.h b/include/jemalloc/internal/ph.h
index 0cc41eab..c3cf8743 100644
--- a/include/jemalloc/internal/ph.h
+++ b/include/jemalloc/internal/ph.h
@@ -369,9 +369,6 @@ ph_remove_first(ph_t *ph, size_t offset, ph_cmp_t cmp) {
 
 JEMALLOC_ALWAYS_INLINE void
 ph_remove(ph_t *ph, void *phn, size_t offset, ph_cmp_t cmp) {
-	void *replace;
-	void *parent;
-
 	if (ph->root == phn) {
 		/*
 		 * We can delete from aux list without merging it, but we need
@@ -389,50 +386,29 @@ ph_remove(ph_t *ph, void *phn, size_t offset, ph_cmp_t cmp) {
 		}
 	}
 
-	/* Get parent (if phn is leftmost child) before mutating. */
-	if ((parent = phn_prev_get(phn, offset)) != NULL) {
-		if (phn_lchild_get(parent, offset) != phn) {
-			parent = NULL;
-		}
-	}
-	/* Find a possible replacement node, and link to parent. */
-	replace = ph_merge_children(phn, offset, cmp);
-	/* Set next/prev for sibling linked list. */
+	void* prev = phn_prev_get(phn, offset);
+	void* next = phn_next_get(phn, offset);
+
+	/* If we have children, then we integrate them back in the heap. */
+	void* replace = ph_merge_children(phn, offset, cmp);
 	if (replace != NULL) {
-		if (parent != NULL) {
-			phn_prev_set(replace, parent, offset);
-			phn_lchild_set(parent, replace, offset);
-		} else {
-			phn_prev_set(replace, phn_prev_get(phn, offset),
-			    offset);
-			if (phn_prev_get(phn, offset) != NULL) {
-				phn_next_set(phn_prev_get(phn, offset), replace,
-				    offset);
-			}
-		}
-		phn_next_set(replace, phn_next_get(phn, offset), offset);
-		if (phn_next_get(phn, offset) != NULL) {
-			phn_prev_set(phn_next_get(phn, offset), replace,
-			    offset);
+		phn_next_set(replace, next, offset);
+		if (next != NULL) {
+			phn_prev_set(next, replace, offset);
 		}
+
+		next = replace;
+	}
+
+	if (next != NULL) {
+		phn_prev_set(next, prev, offset);
+	}
+
+	assert(prev != NULL);
+	if (phn_lchild_get(prev, offset) == phn) {
+		phn_lchild_set(prev, next, offset);
 	} else {
-		if (parent != NULL) {
-			void *next = phn_next_get(phn, offset);
-			phn_lchild_set(parent, next, offset);
-			if (next != NULL) {
-				phn_prev_set(next, parent, offset);
-			}
-		} else {
-			assert(phn_prev_get(phn, offset) != NULL);
-			phn_next_set(
-			    phn_prev_get(phn, offset),
-			    phn_next_get(phn, offset), offset);
-		}
-		if (phn_next_get(phn, offset) != NULL) {
-			phn_prev_set(
-			    phn_next_get(phn, offset),
-			    phn_prev_get(phn, offset), offset);
-		}
+		phn_next_set(prev, next, offset);
 	}
 }
 

From 6cab460a45411316426fb44bd476214d6af36d47 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 28 Mar 2023 23:12:55 -0700
Subject: [PATCH 056/395] Add a multithreaded test for prof_sys_thread_name.

Verified that this catches the issue being fixed in 5fd5583.
---
 test/unit/prof_sys_thread_name.c | 50 +++++++++++++++++++++++++++++++-
 1 file changed, 49 insertions(+), 1 deletion(-)

diff --git a/test/unit/prof_sys_thread_name.c b/test/unit/prof_sys_thread_name.c
index affc788a..3aeb8cf1 100644
--- a/test/unit/prof_sys_thread_name.c
+++ b/test/unit/prof_sys_thread_name.c
@@ -3,6 +3,7 @@
 #include "jemalloc/internal/prof_sys.h"
 
 static const char *test_thread_name = "test_name";
+static const char *dump_filename = "/dev/null";
 
 static int
 test_prof_sys_thread_name_read_error(char *buf, size_t limit) {
@@ -25,6 +26,7 @@ test_prof_sys_thread_name_read_clear(char *buf, size_t limit) {
 
 TEST_BEGIN(test_prof_sys_thread_name) {
 	test_skip_if(!config_prof);
+	test_skip_if(!opt_prof_sys_thread_name);
 
 	bool oldval;
 	size_t sz = sizeof(oldval);
@@ -44,6 +46,8 @@ TEST_BEGIN(test_prof_sys_thread_name) {
 	assert_ptr_eq(thread_name, test_thread_name,
 	    "Thread name should not be touched");
 
+	prof_sys_thread_name_read_t *orig_prof_sys_thread_name_read =
+	    prof_sys_thread_name_read;
 	prof_sys_thread_name_read = test_prof_sys_thread_name_read_error;
 	void *p = malloc(1);
 	free(p);
@@ -67,11 +71,55 @@ TEST_BEGIN(test_prof_sys_thread_name) {
 	    "mallctl read for thread name should not fail");
 	expect_str_eq(thread_name, "", "Thread name should be updated if the "
 	    "system call returns a different name");
+
+	prof_sys_thread_name_read = orig_prof_sys_thread_name_read;
 }
 TEST_END
 
+#define ITER (16*1024)
+static void *
+thd_start(void *unused) {
+	/* Triggering samples which loads thread names. */
+	for (unsigned i = 0; i < ITER; i++) {
+		void *p = mallocx(4096, 0);
+		assert_ptr_not_null(p, "Unexpected mallocx() failure");
+		dallocx(p, 0);
+	}
+
+	return NULL;
+}
+
+TEST_BEGIN(test_prof_sys_thread_name_mt) {
+	test_skip_if(!config_prof);
+	test_skip_if(!opt_prof_sys_thread_name);
+
+#define NTHREADS 4
+	thd_t thds[NTHREADS];
+	unsigned thd_args[NTHREADS];
+	unsigned i;
+
+	for (i = 0; i < NTHREADS; i++) {
+		thd_args[i] = i;
+		thd_create(&thds[i], thd_start, (void *)&thd_args[i]);
+	}
+	/* Prof dump which reads the thread names. */
+	for (i = 0; i < ITER; i++) {
+		expect_d_eq(mallctl("prof.dump", NULL, NULL,
+		    (void *)&dump_filename, sizeof(dump_filename)), 0,
+		    "Unexpected mallctl failure while dumping");
+	}
+
+	for (i = 0; i < NTHREADS; i++) {
+		thd_join(thds[i], NULL);
+	}
+}
+#undef NTHREADS
+#undef ITER
+TEST_END
+
 int
 main(void) {
 	return test(
-	    test_prof_sys_thread_name);
+	    test_prof_sys_thread_name,
+	    test_prof_sys_thread_name_mt);
 }

From ce0b7ab6c8d7a3579d012c227013f5143d9bc8c6 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 28 Mar 2023 18:02:34 -0700
Subject: [PATCH 057/395] Inline the storage for thread name in prof_tdata_t.

The previous approach managed the thread name in a separate buffer, which causes
races because the thread name update (triggered by new samples) can happen at
the same time as prof dumping (which reads the thread names) -- these two
operations are under separate locks to avoid blocking each other.  Implemented
the thread name storage as part of the tdata struct, which resolves the lifetime
issue and also avoids internal alloc / dalloc during prof_sample.
---
 include/jemalloc/internal/prof_data.h    |  1 -
 include/jemalloc/internal/prof_inlines.h | 34 ++++++++++++
 include/jemalloc/internal/prof_structs.h |  6 +-
 include/jemalloc/internal/prof_types.h   |  3 +
 src/ctl.c                                |  6 +-
 src/prof.c                               | 19 ++++---
 src/prof_data.c                          | 70 ++++++------------------
 src/prof_log.c                           |  3 +-
 src/prof_recent.c                        |  4 +-
 src/prof_sys.c                           | 15 +++--
 test/unit/prof_thread_name.c             | 62 ++++++++++++---------
 11 files changed, 120 insertions(+), 103 deletions(-)

diff --git a/include/jemalloc/internal/prof_data.h b/include/jemalloc/internal/prof_data.h
index 4c8e22c7..c4286b51 100644
--- a/include/jemalloc/internal/prof_data.h
+++ b/include/jemalloc/internal/prof_data.h
@@ -18,7 +18,6 @@ bool prof_bt_keycomp(const void *k1, const void *k2);
 
 bool prof_data_init(tsd_t *tsd);
 prof_tctx_t *prof_lookup(tsd_t *tsd, prof_bt_t *bt);
-char *prof_thread_name_alloc(tsd_t *tsd, const char *thread_name);
 int prof_thread_name_set_impl(tsd_t *tsd, const char *thread_name);
 void prof_unbias_map_init();
 void prof_dump_impl(tsd_t *tsd, write_cb_t *prof_dump_write, void *cbopaque,
diff --git a/include/jemalloc/internal/prof_inlines.h b/include/jemalloc/internal/prof_inlines.h
index ab3e01f6..b74b115c 100644
--- a/include/jemalloc/internal/prof_inlines.h
+++ b/include/jemalloc/internal/prof_inlines.h
@@ -38,6 +38,22 @@ prof_gdump_get_unlocked(void) {
 	return prof_gdump_val;
 }
 
+JEMALLOC_ALWAYS_INLINE void
+prof_thread_name_assert(prof_tdata_t *tdata) {
+	if (!config_debug) {
+		return;
+	}
+	prof_active_assert();
+
+	bool terminated = false;
+	for (unsigned i = 0; i < PROF_THREAD_NAME_MAX_LEN; i++) {
+		if (tdata->thread_name[i] == '\0') {
+			terminated = true;
+		}
+	}
+	assert(terminated);
+}
+
 JEMALLOC_ALWAYS_INLINE prof_tdata_t *
 prof_tdata_get(tsd_t *tsd, bool create) {
 	prof_tdata_t *tdata;
@@ -59,6 +75,10 @@ prof_tdata_get(tsd_t *tsd, bool create) {
 		assert(tdata == NULL || tdata->attached);
 	}
 
+	if (tdata != NULL) {
+		prof_thread_name_assert(tdata);
+	}
+
 	return tdata;
 }
 
@@ -255,4 +275,18 @@ prof_free(tsd_t *tsd, const void *ptr, size_t usize,
 	}
 }
 
+JEMALLOC_ALWAYS_INLINE bool
+prof_thread_name_empty(prof_tdata_t *tdata) {
+	prof_active_assert();
+
+	return (tdata->thread_name[0] == '\0');
+}
+
+JEMALLOC_ALWAYS_INLINE void
+prof_thread_name_clear(prof_tdata_t *tdata) {
+	prof_active_assert();
+
+	tdata->thread_name[0] = '\0';
+}
+
 #endif /* JEMALLOC_INTERNAL_PROF_INLINES_H */
diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h
index 9331fba4..da3cf8d5 100644
--- a/include/jemalloc/internal/prof_structs.h
+++ b/include/jemalloc/internal/prof_structs.h
@@ -156,9 +156,6 @@ struct prof_tdata_s {
 	 */
 	uint64_t		thr_discrim;
 
-	/* Included in heap profile dumps if non-NULL. */
-	char			*thread_name;
-
 	bool			attached;
 	bool			expired;
 
@@ -179,6 +176,9 @@ struct prof_tdata_s {
 	 */
 	ckh_t			bt2tctx;
 
+	/* Included in heap profile dumps if has content. */
+	char			thread_name[PROF_THREAD_NAME_MAX_LEN];
+
 	/* State used to avoid dumping while operating on prof internals. */
 	bool			enq;
 	bool			enq_idump;
diff --git a/include/jemalloc/internal/prof_types.h b/include/jemalloc/internal/prof_types.h
index 87cbb4ab..104f7e61 100644
--- a/include/jemalloc/internal/prof_types.h
+++ b/include/jemalloc/internal/prof_types.h
@@ -77,4 +77,7 @@ typedef struct prof_recent_s prof_recent_t;
 /* Default number of recent allocations to record. */
 #define PROF_RECENT_ALLOC_MAX_DEFAULT 0
 
+/* Thread name storage size limit. */
+#define PROF_THREAD_NAME_MAX_LEN 16
+
 #endif /* JEMALLOC_INTERNAL_PROF_TYPES_H */
diff --git a/src/ctl.c b/src/ctl.c
index eafbdc61..cfd4ac6e 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -2384,13 +2384,13 @@ thread_prof_name_ctl(tsd_t *tsd, const size_t *mib,
 	READ_XOR_WRITE();
 
 	if (newp != NULL) {
-		if (newlen != sizeof(const char *)) {
+		const char *newval = *(const char **)newp;
+		if (newlen != sizeof(const char *) || newval == NULL) {
 			ret = EINVAL;
 			goto label_return;
 		}
 
-		if ((ret = prof_thread_name_set(tsd, *(const char **)newp)) !=
-		    0) {
+		if ((ret = prof_thread_name_set(tsd, newval)) != 0) {
 			goto label_return;
 		}
 	} else {
diff --git a/src/prof.c b/src/prof.c
index 91425371..832aa528 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -415,11 +415,14 @@ prof_tdata_t *
 prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata) {
 	uint64_t thr_uid = tdata->thr_uid;
 	uint64_t thr_discrim = tdata->thr_discrim + 1;
-	char *thread_name = (tdata->thread_name != NULL) ?
-	    prof_thread_name_alloc(tsd, tdata->thread_name) : NULL;
 	bool active = tdata->active;
 
+	/* Keep a local copy of the thread name, before detaching. */
+	prof_thread_name_assert(tdata);
+	char thread_name[PROF_THREAD_NAME_MAX_LEN];
+	strncpy(thread_name, tdata->thread_name, PROF_THREAD_NAME_MAX_LEN);
 	prof_tdata_detach(tsd, tdata);
+
 	return prof_tdata_init_impl(tsd, thr_uid, thr_discrim, thread_name,
 	    active);
 }
@@ -464,15 +467,15 @@ prof_active_set(tsdn_t *tsdn, bool active) {
 
 const char *
 prof_thread_name_get(tsd_t *tsd) {
+	static const char *prof_thread_name_dummy = "";
+
 	assert(tsd_reentrancy_level_get(tsd) == 0);
-
-	prof_tdata_t *tdata;
-
-	tdata = prof_tdata_get(tsd, true);
+	prof_tdata_t *tdata = prof_tdata_get(tsd, true);
 	if (tdata == NULL) {
-		return "";
+		return prof_thread_name_dummy;
 	}
-	return (tdata->thread_name != NULL ? tdata->thread_name : "");
+
+	return tdata->thread_name;
 }
 
 int
diff --git a/src/prof_data.c b/src/prof_data.c
index 56d3dc88..c33668ee 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -441,64 +441,30 @@ prof_bt_count(void) {
 	return bt_count;
 }
 
-char *
-prof_thread_name_alloc(tsd_t *tsd, const char *thread_name) {
-	char *ret;
-	size_t size;
-
-	if (thread_name == NULL) {
-		return NULL;
-	}
-
-	size = strlen(thread_name) + 1;
-	ret = iallocztm(tsd_tsdn(tsd), size, sz_size2index(size), false, NULL,
-	    true, arena_get(TSDN_NULL, 0, true), true);
-	if (ret == NULL) {
-		return NULL;
-	}
-
-	memcpy(ret, thread_name, size);
-	ret[size - 1] = '\0';
-
-	return ret;
+static void
+prof_thread_name_write_tdata(prof_tdata_t *tdata, const char *thread_name) {
+	strncpy(tdata->thread_name, thread_name, PROF_THREAD_NAME_MAX_LEN);
+	tdata->thread_name[PROF_THREAD_NAME_MAX_LEN - 1] = '\0';
 }
 
 int
 prof_thread_name_set_impl(tsd_t *tsd, const char *thread_name) {
 	assert(tsd_reentrancy_level_get(tsd) == 0);
+	assert(thread_name != NULL);
 
-	prof_tdata_t *tdata;
-	unsigned i;
-	char *s;
-
-	tdata = prof_tdata_get(tsd, true);
-	if (tdata == NULL) {
-		return EAGAIN;
-	}
-
-	/* Validate input. */
-	if (thread_name == NULL) {
-		return EFAULT;
-	}
-	for (i = 0; thread_name[i] != '\0'; i++) {
+	for (unsigned i = 0; thread_name[i] != '\0'; i++) {
 		char c = thread_name[i];
 		if (!isgraph(c) && !isblank(c)) {
-			return EFAULT;
+			return EINVAL;
 		}
 	}
 
-	s = prof_thread_name_alloc(tsd, thread_name);
-	if (s == NULL) {
-		return EAGAIN;
+	prof_tdata_t *tdata = prof_tdata_get(tsd, true);
+	if (tdata == NULL) {
+		return ENOMEM;
 	}
 
-	char *old_thread_name = tdata->thread_name;
-	tdata->thread_name = s;
-	if (old_thread_name != NULL) {
-		idalloctm(tsd_tsdn(tsd), old_thread_name, /* tcache */ NULL,
-		    /* alloc_ctx */ NULL, /* is_internal */ true,
-		    /* slow_path */ true);
-	}
+	prof_thread_name_write_tdata(tdata, thread_name);
 
 	return 0;
 }
@@ -949,7 +915,7 @@ prof_tdata_dump_iter(prof_tdata_tree_t *tdatas_ptr, prof_tdata_t *tdata,
 	    tdata->thr_uid);
 	prof_dump_print_cnts(arg->prof_dump_write, arg->cbopaque,
 	    &tdata->cnt_summed);
-	if (tdata->thread_name != NULL) {
+	if (!prof_thread_name_empty(tdata)) {
 		arg->prof_dump_write(arg->cbopaque, " ");
 		arg->prof_dump_write(arg->cbopaque, tdata->thread_name);
 	}
@@ -1179,10 +1145,15 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
 	tdata->lock = prof_tdata_mutex_choose(thr_uid);
 	tdata->thr_uid = thr_uid;
 	tdata->thr_discrim = thr_discrim;
-	tdata->thread_name = thread_name;
 	tdata->attached = true;
 	tdata->expired = false;
 	tdata->tctx_uid_next = 0;
+	if (thread_name == NULL) {
+		prof_thread_name_clear(tdata);
+	} else {
+		prof_thread_name_write_tdata(tdata, thread_name);
+	}
+	prof_thread_name_assert(tdata);
 
 	if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS, prof_bt_hash,
 	    prof_bt_keycomp)) {
@@ -1230,13 +1201,8 @@ prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata,
 	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), tdata->lock);
 
 	tdata_tree_remove(&tdatas, tdata);
-
 	assert(prof_tdata_should_destroy_unlocked(tdata, even_if_attached));
 
-	if (tdata->thread_name != NULL) {
-		idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, NULL, true,
-		    true);
-	}
 	ckh_delete(tsd, &tdata->bt2tctx);
 	idalloctm(tsd_tsdn(tsd), tdata, NULL, NULL, true, true);
 }
diff --git a/src/prof_log.c b/src/prof_log.c
index 0632c3b3..384d5e38 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -243,8 +243,7 @@ prof_try_log(tsd_t *tsd, size_t usize, prof_info_t *prof_info) {
 	    iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL, true,
 	    arena_get(TSDN_NULL, 0, true), true);
 
-	const char *prod_thr_name = (tctx->tdata->thread_name == NULL)?
-				        "" : tctx->tdata->thread_name;
+	const char *prod_thr_name = tctx->tdata->thread_name;
 	const char *cons_thr_name = prof_thread_name_get(tsd);
 
 	prof_bt_t bt;
diff --git a/src/prof_recent.c b/src/prof_recent.c
index 834a9446..4c3c6296 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -495,7 +495,7 @@ prof_recent_alloc_dump_node(emitter_t *emitter, prof_recent_t *node) {
 	    &node->alloc_tctx->thr_uid);
 	prof_tdata_t *alloc_tdata = node->alloc_tctx->tdata;
 	assert(alloc_tdata != NULL);
-	if (alloc_tdata->thread_name != NULL) {
+	if (!prof_thread_name_empty(alloc_tdata)) {
 		emitter_json_kv(emitter, "alloc_thread_name",
 		    emitter_type_string, &alloc_tdata->thread_name);
 	}
@@ -511,7 +511,7 @@ prof_recent_alloc_dump_node(emitter_t *emitter, prof_recent_t *node) {
 		    emitter_type_uint64, &node->dalloc_tctx->thr_uid);
 		prof_tdata_t *dalloc_tdata = node->dalloc_tctx->tdata;
 		assert(dalloc_tdata != NULL);
-		if (dalloc_tdata->thread_name != NULL) {
+		if (!prof_thread_name_empty(dalloc_tdata)) {
 			emitter_json_kv(emitter, "dalloc_thread_name",
 			    emitter_type_string, &dalloc_tdata->thread_name);
 		}
diff --git a/src/prof_sys.c b/src/prof_sys.c
index d2487fd6..3f7196f8 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -462,12 +462,17 @@ prof_sys_thread_name_read_t *JET_MUTABLE prof_sys_thread_name_read =
 
 void
 prof_sys_thread_name_fetch(tsd_t *tsd) {
-#define THREAD_NAME_MAX_LEN 16
-	char buf[THREAD_NAME_MAX_LEN];
-	if (!prof_sys_thread_name_read(buf, THREAD_NAME_MAX_LEN)) {
-		prof_thread_name_set_impl(tsd, buf);
+	prof_tdata_t *tdata = prof_tdata_get(tsd, true);
+	if (tdata == NULL) {
+		return;
 	}
-#undef THREAD_NAME_MAX_LEN
+
+	if (prof_sys_thread_name_read(tdata->thread_name,
+	    PROF_THREAD_NAME_MAX_LEN) != 0) {
+		prof_thread_name_clear(tdata);
+	}
+
+	tdata->thread_name[PROF_THREAD_NAME_MAX_LEN - 1] = '\0';
 }
 
 int
diff --git a/test/unit/prof_thread_name.c b/test/unit/prof_thread_name.c
index 3c4614fc..0fc29f75 100644
--- a/test/unit/prof_thread_name.c
+++ b/test/unit/prof_thread_name.c
@@ -14,8 +14,6 @@ mallctl_thread_name_get_impl(const char *thread_name_expected, const char *func,
 	expect_str_eq(thread_name_old, thread_name_expected,
 	    "%s():%d: Unexpected thread.prof.name value", func, line);
 }
-#define mallctl_thread_name_get(a)					\
-	mallctl_thread_name_get_impl(a, __func__, __LINE__)
 
 static void
 mallctl_thread_name_set_impl(const char *thread_name, const char *func,
@@ -26,51 +24,59 @@ mallctl_thread_name_set_impl(const char *thread_name, const char *func,
 	    func, line);
 	mallctl_thread_name_get_impl(thread_name, func, line);
 }
+
+#define mallctl_thread_name_get(a)					\
+	mallctl_thread_name_get_impl(a, __func__, __LINE__)
+
 #define mallctl_thread_name_set(a)					\
 	mallctl_thread_name_set_impl(a, __func__, __LINE__)
 
 TEST_BEGIN(test_prof_thread_name_validation) {
-	const char *thread_name;
-
 	test_skip_if(!config_prof);
 	test_skip_if(opt_prof_sys_thread_name);
 
 	mallctl_thread_name_get("");
-	mallctl_thread_name_set("hi there");
+
+	const char *test_name1 = "test case1";
+	mallctl_thread_name_set(test_name1);
+
+	/* Test name longer than the max len. */
+	char long_name[] =
+	    "test case longer than expected; test case longer than expected";
+	expect_zu_gt(strlen(long_name), PROF_THREAD_NAME_MAX_LEN,
+	   "Long test name not long enough");
+	const char *test_name_long = long_name;
+	expect_d_eq(mallctl("thread.prof.name", NULL, NULL,
+	    (void *)&test_name_long, sizeof(test_name_long)), 0,
+	    "Unexpected mallctl failure from thread.prof.name");
+	/* Long name cut to match. */
+	long_name[PROF_THREAD_NAME_MAX_LEN - 1] = '\0';
+	mallctl_thread_name_get(test_name_long);
 
 	/* NULL input shouldn't be allowed. */
-	thread_name = NULL;
+	const char *test_name2 = NULL;
 	expect_d_eq(mallctl("thread.prof.name", NULL, NULL,
-	    (void *)&thread_name, sizeof(thread_name)), EFAULT,
-	    "Unexpected mallctl result writing \"%s\" to thread.prof.name",
-	    thread_name);
+	    (void *)&test_name2, sizeof(test_name2)), EINVAL,
+	    "Unexpected mallctl result writing to thread.prof.name");
 
 	/* '\n' shouldn't be allowed. */
-	thread_name = "hi\nthere";
+	const char *test_name3 = "test\ncase";
 	expect_d_eq(mallctl("thread.prof.name", NULL, NULL,
-	    (void *)&thread_name, sizeof(thread_name)), EFAULT,
+	    (void *)&test_name3, sizeof(test_name3)), EINVAL,
 	    "Unexpected mallctl result writing \"%s\" to thread.prof.name",
-	    thread_name);
+	    test_name3);
 
 	/* Simultaneous read/write shouldn't be allowed. */
-	{
-		const char *thread_name_old;
-		size_t sz;
-
-		sz = sizeof(thread_name_old);
-		expect_d_eq(mallctl("thread.prof.name",
-		    (void *)&thread_name_old, &sz, (void *)&thread_name,
-		    sizeof(thread_name)), EPERM,
-		    "Unexpected mallctl result writing \"%s\" to "
-		    "thread.prof.name", thread_name);
-	}
+	const char *thread_name_old;
+	size_t sz = sizeof(thread_name_old);
+	expect_d_eq(mallctl("thread.prof.name", (void *)&thread_name_old, &sz,
+	    (void *)&test_name1, sizeof(test_name1)), EPERM,
+	    "Unexpected mallctl result from thread.prof.name");
 
 	mallctl_thread_name_set("");
 }
 TEST_END
 
-#define NTHREADS	4
-#define NRESET		25
 static void *
 thd_start(void *varg) {
 	unsigned thd_ind = *(unsigned *)varg;
@@ -82,6 +88,7 @@ thd_start(void *varg) {
 	mallctl_thread_name_get("");
 	mallctl_thread_name_set(thread_name);
 
+#define NRESET 25
 	for (i = 0; i < NRESET; i++) {
 		expect_d_eq(mallctl("prof.reset", NULL, NULL, NULL, 0), 0,
 		    "Unexpected error while resetting heap profile data");
@@ -92,12 +99,14 @@ thd_start(void *varg) {
 	mallctl_thread_name_set("");
 
 	return NULL;
+#undef NRESET
 }
 
 TEST_BEGIN(test_prof_thread_name_threaded) {
 	test_skip_if(!config_prof);
 	test_skip_if(opt_prof_sys_thread_name);
 
+#define NTHREADS 4
 	thd_t thds[NTHREADS];
 	unsigned thd_args[NTHREADS];
 	unsigned i;
@@ -109,10 +118,9 @@ TEST_BEGIN(test_prof_thread_name_threaded) {
 	for (i = 0; i < NTHREADS; i++) {
 		thd_join(thds[i], NULL);
 	}
+#undef NTHREADS
 }
 TEST_END
-#undef NTHREADS
-#undef NRESET
 
 int
 main(void) {

From e62aa478c79865242363d3531fc58c4c7f65a1b4 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 28 Mar 2023 20:09:41 -0700
Subject: [PATCH 058/395] Rearrange the bools in prof_tdata_t to save some
 bytes.

This lowered the sizeof(prof_tdata_t) from 200 to 192 which is a round size
class.  Afterwards the tdata_t size remain unchanged with the last commit, which
effectively inlined the storage of thread names for free.
---
 include/jemalloc/internal/prof_structs.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h
index da3cf8d5..49061f02 100644
--- a/include/jemalloc/internal/prof_structs.h
+++ b/include/jemalloc/internal/prof_structs.h
@@ -156,9 +156,6 @@ struct prof_tdata_s {
 	 */
 	uint64_t		thr_discrim;
 
-	bool			attached;
-	bool			expired;
-
 	rb_node(prof_tdata_t)	tdata_link;
 
 	/*
@@ -198,6 +195,9 @@ struct prof_tdata_s {
 	 */
 	bool			active;
 
+	bool			attached;
+	bool			expired;
+
 	/* Temporary storage for summation during dump. */
 	prof_cnt_t		cnt_summed;
 

From 434a68e221f7dbb6f30bd13d318d0c22e1b47e78 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 30 Mar 2023 19:02:24 -0700
Subject: [PATCH 059/395] Disallow decay during reentrancy.

Decay should not be triggered during reentrant calls (may cause lock order
reversal / deadlocks).  Added a delay_trigger flag to the tickers to bypass
decay when rentrancy_level is not zero.
---
 include/jemalloc/internal/arena_inlines_b.h |  3 +-
 include/jemalloc/internal/ticker.h          | 33 +++++++----
 test/unit/ticker.c                          | 61 +++++++++++++++++----
 3 files changed, 76 insertions(+), 21 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index c9d7db86..609e73d3 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -131,7 +131,8 @@ arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks) {
 	 */
 	ticker_geom_t *decay_ticker = tsd_arena_decay_tickerp_get(tsd);
 	uint64_t *prng_state = tsd_prng_statep_get(tsd);
-	if (unlikely(ticker_geom_ticks(decay_ticker, prng_state, nticks))) {
+	if (unlikely(ticker_geom_ticks(decay_ticker, prng_state, nticks,
+	    tsd_reentrancy_level_get(tsd) > 0))) {
 		arena_decay(tsdn, arena, false, false);
 	}
 }
diff --git a/include/jemalloc/internal/ticker.h b/include/jemalloc/internal/ticker.h
index 6b51ddec..de034995 100644
--- a/include/jemalloc/internal/ticker.h
+++ b/include/jemalloc/internal/ticker.h
@@ -57,23 +57,27 @@ ticker_read(const ticker_t *ticker) {
 JEMALLOC_NOINLINE
 #endif
 static bool
-ticker_fixup(ticker_t *ticker) {
+ticker_fixup(ticker_t *ticker, bool delay_trigger) {
+	if (delay_trigger) {
+		ticker->tick = 0;
+		return false;
+	}
 	ticker->tick = ticker->nticks;
 	return true;
 }
 
 static inline bool
-ticker_ticks(ticker_t *ticker, int32_t nticks) {
+ticker_ticks(ticker_t *ticker, int32_t nticks, bool delay_trigger) {
 	ticker->tick -= nticks;
 	if (unlikely(ticker->tick < 0)) {
-		return ticker_fixup(ticker);
+		return ticker_fixup(ticker, delay_trigger);
 	}
 	return false;
 }
 
 static inline bool
-ticker_tick(ticker_t *ticker) {
-	return ticker_ticks(ticker, 1);
+ticker_tick(ticker_t *ticker, bool delay_trigger) {
+	return ticker_ticks(ticker, 1, delay_trigger);
 }
 
 /*
@@ -150,26 +154,35 @@ ticker_geom_read(const ticker_geom_t *ticker) {
 JEMALLOC_NOINLINE
 #endif
 static bool
-ticker_geom_fixup(ticker_geom_t *ticker, uint64_t *prng_state) {
+ticker_geom_fixup(ticker_geom_t *ticker, uint64_t *prng_state,
+    bool delay_trigger) {
+	if (delay_trigger) {
+		ticker->tick = 0;
+		return false;
+	}
+
 	uint64_t idx = prng_lg_range_u64(prng_state, TICKER_GEOM_NBITS);
 	ticker->tick = (uint32_t)(
 	    (uint64_t)ticker->nticks * (uint64_t)ticker_geom_table[idx]
 	    / (uint64_t)TICKER_GEOM_MUL);
+
 	return true;
 }
 
 static inline bool
-ticker_geom_ticks(ticker_geom_t *ticker, uint64_t *prng_state, int32_t nticks) {
+ticker_geom_ticks(ticker_geom_t *ticker, uint64_t *prng_state, int32_t nticks,
+    bool delay_trigger) {
 	ticker->tick -= nticks;
 	if (unlikely(ticker->tick < 0)) {
-		return ticker_geom_fixup(ticker, prng_state);
+		return ticker_geom_fixup(ticker, prng_state, delay_trigger);
 	}
 	return false;
 }
 
 static inline bool
-ticker_geom_tick(ticker_geom_t *ticker, uint64_t *prng_state) {
-	return ticker_geom_ticks(ticker, prng_state, 1);
+ticker_geom_tick(ticker_geom_t *ticker, uint64_t *prng_state,
+    bool delay_trigger) {
+	return ticker_geom_ticks(ticker, prng_state, 1, delay_trigger);
 }
 
 #endif /* JEMALLOC_INTERNAL_TICKER_H */
diff --git a/test/unit/ticker.c b/test/unit/ticker.c
index 0dd77861..c4147a0c 100644
--- a/test/unit/ticker.c
+++ b/test/unit/ticker.c
@@ -13,12 +13,12 @@ TEST_BEGIN(test_ticker_tick) {
 		for (j = 0; j < NTICKS; j++) {
 			expect_u_eq(ticker_read(&ticker), NTICKS - j,
 			    "Unexpected ticker value (i=%d, j=%d)", i, j);
-			expect_false(ticker_tick(&ticker),
+			expect_false(ticker_tick(&ticker, false),
 			    "Unexpected ticker fire (i=%d, j=%d)", i, j);
 		}
 		expect_u32_eq(ticker_read(&ticker), 0,
 		    "Expected ticker depletion");
-		expect_true(ticker_tick(&ticker),
+		expect_true(ticker_tick(&ticker, false),
 		    "Expected ticker fire (i=%d)", i);
 		expect_u32_eq(ticker_read(&ticker), NTICKS,
 		    "Expected ticker reset");
@@ -34,12 +34,15 @@ TEST_BEGIN(test_ticker_ticks) {
 	ticker_init(&ticker, NTICKS);
 
 	expect_u_eq(ticker_read(&ticker), NTICKS, "Unexpected ticker value");
-	expect_false(ticker_ticks(&ticker, NTICKS), "Unexpected ticker fire");
+	expect_false(ticker_ticks(&ticker, NTICKS, false),
+	    "Unexpected ticker fire");
 	expect_u_eq(ticker_read(&ticker), 0, "Unexpected ticker value");
-	expect_true(ticker_ticks(&ticker, NTICKS), "Expected ticker fire");
+	expect_true(ticker_ticks(&ticker, NTICKS, false),
+	    "Expected ticker fire");
 	expect_u_eq(ticker_read(&ticker), NTICKS, "Unexpected ticker value");
 
-	expect_true(ticker_ticks(&ticker, NTICKS + 1), "Expected ticker fire");
+	expect_true(ticker_ticks(&ticker, NTICKS + 1, false),
+	    "Expected ticker fire");
 	expect_u_eq(ticker_read(&ticker), NTICKS, "Unexpected ticker value");
 #undef NTICKS
 }
@@ -52,13 +55,14 @@ TEST_BEGIN(test_ticker_copy) {
 	ticker_init(&ta, NTICKS);
 	ticker_copy(&tb, &ta);
 	expect_u_eq(ticker_read(&tb), NTICKS, "Unexpected ticker value");
-	expect_true(ticker_ticks(&tb, NTICKS + 1), "Expected ticker fire");
+	expect_true(ticker_ticks(&tb, NTICKS + 1, false),
+	    "Expected ticker fire");
 	expect_u_eq(ticker_read(&tb), NTICKS, "Unexpected ticker value");
 
-	ticker_tick(&ta);
+	ticker_tick(&ta, false);
 	ticker_copy(&tb, &ta);
 	expect_u_eq(ticker_read(&tb), NTICKS - 1, "Unexpected ticker value");
-	expect_true(ticker_ticks(&tb, NTICKS), "Expected ticker fire");
+	expect_true(ticker_ticks(&tb, NTICKS, false), "Expected ticker fire");
 	expect_u_eq(ticker_read(&tb), NTICKS, "Unexpected ticker value");
 #undef NTICKS
 }
@@ -74,7 +78,7 @@ TEST_BEGIN(test_ticker_geom) {
 	/* Just some random constant. */
 	uint64_t prng_state = 0x343219f93496db9fULL;
 	for (uint64_t i = 0; i < niters; i++) {
-		while(!ticker_geom_tick(&ticker, &prng_state)) {
+		while(!ticker_geom_tick(&ticker, &prng_state, false)) {
 			total_ticks++;
 		}
 	}
@@ -90,11 +94,48 @@ TEST_BEGIN(test_ticker_geom) {
 }
 TEST_END
 
+TEST_BEGIN(test_ticker_delay) {
+	const int32_t ticks = 1000;
+	const uint64_t niters = 10000;
+
+	ticker_t t1;
+	ticker_init(&t1, ticks);
+
+	ticker_geom_t t2;
+	/* Just some random constant. */
+	uint64_t prng_state = 0x43219f93496db9f3ULL;
+	ticker_geom_init(&t2, ticks);
+
+	bool delay = false;
+	expect_false(ticker_ticks(&t1, ticks, delay), "Unexpected ticker fire");
+	expect_false(ticker_geom_ticks(&t2, &prng_state, ticks, delay),
+	    "Unexpected ticker fire");
+	expect_d_eq(ticker_read(&t1), 0, "Unexpected ticker value");
+	expect_d_eq(ticker_geom_read(&t2), 0, "Unexpected ticker value");
+
+	delay = true;
+	/* Not allowed to fire when delay is set to true. */
+	for (unsigned i = 0; i < niters; i++) {
+		expect_false(ticker_tick(&t1, delay), "Unexpected ticker fire");
+		expect_false(ticker_geom_tick(&t2, &prng_state, delay),
+		    "Unexpected ticker fire");
+		expect_d_eq(ticker_read(&t1), 0, "Unexpected ticker value");
+		expect_d_eq(ticker_geom_read(&t2), 0, "Unexpected ticker value");
+	}
+
+	delay = false;
+	expect_true(ticker_tick(&t1, delay), "Expected ticker fire");
+	expect_true(ticker_geom_tick(&t2, &prng_state, delay),
+	    "Expected ticker fire");
+}
+TEST_END
+
 int
 main(void) {
 	return test(
 	    test_ticker_tick,
 	    test_ticker_ticks,
 	    test_ticker_copy,
-	    test_ticker_geom);
+	    test_ticker_geom,
+	    test_ticker_delay);
 }

From 5f64ad60cdd2359249c863c2a01f8555672d7c35 Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@fb.com>
Date: Wed, 18 Jan 2023 15:43:43 -0800
Subject: [PATCH 060/395] Remove locked flag set in malloc_mutex_trylock

As a hint flag of the lock, parameter locked should be set only
when the lock is gained or freed.
---
 include/jemalloc/internal/mutex.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h
index 63a0b1b3..03d3557b 100644
--- a/include/jemalloc/internal/mutex.h
+++ b/include/jemalloc/internal/mutex.h
@@ -175,7 +175,6 @@ malloc_mutex_trylock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 	witness_assert_not_owner(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
 	if (isthreaded) {
 		if (malloc_mutex_trylock_final(mutex)) {
-			atomic_store_b(&mutex->locked, true, ATOMIC_RELAXED);
 			return true;
 		}
 		mutex_owner_stats_update(tsdn, mutex);

From 521970fb2e5278b7b92061933cbacdbb9478998a Mon Sep 17 00:00:00 2001
From: Eric Mueller <eric.mueller1024@gmail.com>
Date: Mon, 17 Apr 2023 18:59:25 -0700
Subject: [PATCH 061/395] Check for equality instead of assigning in asserts in
 hpa_from_pai.

It appears like a simple typo means we're unconditionally overwriting
some fields in hpa_from_pai when asserts are enabled. From hpa_shard_init,
it looks like these fields have these values anyway, so this shouldn't
cause bugs, but if something is wrong it seems better to have these
asserts in place.

See issue #2412.
---
 src/hpa.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/hpa.c b/src/hpa.c
index 8ebb2db2..1e736ad4 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -703,10 +703,10 @@ hpa_alloc_batch_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 
 static hpa_shard_t *
 hpa_from_pai(pai_t *self) {
-	assert(self->alloc = &hpa_alloc);
-	assert(self->expand = &hpa_expand);
-	assert(self->shrink = &hpa_shrink);
-	assert(self->dalloc = &hpa_dalloc);
+	assert(self->alloc == &hpa_alloc);
+	assert(self->expand == &hpa_expand);
+	assert(self->shrink == &hpa_shrink);
+	assert(self->dalloc == &hpa_dalloc);
 	return (hpa_shard_t *)self;
 }
 

From fc680128e0aed18d878bdc71c1ceb53e79da3de7 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Fri, 21 Apr 2023 10:49:18 -0700
Subject: [PATCH 062/395] Remove errant `assert` in `arena_extent_alloc_large`

This codepath may generate deferred work when the HPA is enabled.
See also [@davidtgoldblatt's relevant comment on the PR which
introduced this](https://github.com/jemalloc/jemalloc/pull/2107#discussion_r699770967)
which prevented a similarly incorrect `assert` from being added elsewhere.
---
 src/arena.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/arena.c b/src/arena.c
index 970f60ed..9592ab9d 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -340,7 +340,6 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 	    arena_get_ehooks(arena), esize, alignment);
 	edata_t *edata = pa_alloc(tsdn, &arena->pa_shard, esize, alignment,
 	    /* slab */ false, szind, zero, guarded, &deferred_work_generated);
-	assert(deferred_work_generated == false);
 
 	if (edata != NULL) {
 		if (config_stats) {

From f2b28906e63bef7518c58236e3e9dde8e4fceb89 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Amaury=20S=C3=A9chet?= <deadalnix@gmail.com>
Date: Thu, 20 Apr 2023 22:38:28 +0000
Subject: [PATCH 063/395] Some nits in cache_bin.h

---
 include/jemalloc/internal/cache_bin.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index ee8b1ae2..c9c8f865 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -454,9 +454,9 @@ cache_bin_dalloc_easy(cache_bin_t *bin, void *ptr) {
 		return false;
 	}
 
-        if (unlikely(cache_bin_dalloc_safety_checks(bin, ptr))) {
-                return true;
-        }
+	if (unlikely(cache_bin_dalloc_safety_checks(bin, ptr))) {
+		return true;
+	}
 
 	bin->stack_head--;
 	*bin->stack_head = ptr;
@@ -642,7 +642,7 @@ cache_bin_finish_flush(cache_bin_t *bin, cache_bin_info_t *info,
 	unsigned rem = cache_bin_ncached_get_local(bin, info) - nflushed;
 	memmove(bin->stack_head + nflushed, bin->stack_head,
 	    rem * sizeof(void *));
-	bin->stack_head = bin->stack_head + nflushed;
+	bin->stack_head += nflushed;
 	cache_bin_low_water_adjust(bin);
 }
 

From 6841110bd6ed17b32a5fed90c53c64555366a792 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Mon, 1 May 2023 11:49:35 -0700
Subject: [PATCH 064/395] Make `edata_cmp_summary_comp` 30% faster

`edata_cmp_summary_comp` is one of the very hottest functions, taking up
3% of all time spent inside Jemalloc. I noticed that all existing
callsites rely only on the sign of the value returned by this function,
so I came up with this equivalent branchless implementation which
preserves this property. After empirical measurement, I have found that
this implementation is 30% faster, therefore representing a 1% speed-up
to the allocator as a whole.

At @interwq's suggestion, I've applied the same optimization to
`edata_esnead_comp` in case this function becomes hotter in the future.
---
 include/jemalloc/internal/edata.h | 35 +++++++++++++++++--------------
 1 file changed, 19 insertions(+), 16 deletions(-)

diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index e77a55e6..d2d16c46 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -664,13 +664,20 @@ edata_cmp_summary_get(const edata_t *edata) {
 
 static inline int
 edata_cmp_summary_comp(edata_cmp_summary_t a, edata_cmp_summary_t b) {
-	int ret;
-	ret = (a.sn > b.sn) - (a.sn < b.sn);
-	if (ret != 0) {
-		return ret;
-	}
-	ret = (a.addr > b.addr) - (a.addr < b.addr);
-	return ret;
+	/*
+	 * Logically, what we're doing here is comparing based on `.sn`, and
+	 * falling back to comparing on `.addr` in the case that `a.sn == b.sn`.
+	 * We accomplish this by multiplying the result of the `.sn` comparison
+	 * by 2, so that so long as it is not 0, it will dominate the `.addr`
+	 * comparison in determining the sign of the returned result value.
+	 * The justification for doing things this way is that this is
+	 * branchless - all of the branches that would be present in a
+	 * straightforward implementation are common cases, and thus the branch
+	 * prediction accuracy is not great. As a result, this implementation
+	 * is measurably faster (by around 30%).
+	 */
+	return (2 * ((a.sn > b.sn) - (a.sn < b.sn))) +
+	       ((a.addr > b.addr) - (a.addr < b.addr));
 }
 
 static inline int
@@ -683,15 +690,11 @@ edata_snad_comp(const edata_t *a, const edata_t *b) {
 
 static inline int
 edata_esnead_comp(const edata_t *a, const edata_t *b) {
-	int ret;
-
-	ret = edata_esn_comp(a, b);
-	if (ret != 0) {
-		return ret;
-	}
-
-	ret = edata_ead_comp(a, b);
-	return ret;
+	/*
+	 * Similar to `edata_cmp_summary_comp`, we've opted for a
+	 * branchless implementation for the sake of performance.
+	 */
+	return (2 * edata_esn_comp(a, b)) + edata_ead_comp(a, b);
 }
 
 ph_proto(, edata_avail, edata_t)

From 70344a2d38eb71a162ea19d1a4fee8f0d168588b Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Mon, 8 May 2023 12:37:18 -0700
Subject: [PATCH 065/395] Make eligible functions `static`

The codebase is already very disciplined in making any function which
can be `static`, but there are a few that appear to have slipped through
the cracks.
---
 include/jemalloc/internal/extent.h | 2 --
 src/decay.c                        | 2 +-
 src/extent.c                       | 4 +++-
 src/hpa.c                          | 2 +-
 test/unit/bit_util.c               | 4 ++--
 test/unit/double_free.c            | 7 ++++---
 test/unit/nstime.c                 | 2 +-
 test/unit/pa.c                     | 3 ++-
 test/unit/size_check.c             | 4 ++--
 9 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index 1d51d410..367793db 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -44,8 +44,6 @@ void extent_destroy_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *edata);
 bool extent_commit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length);
-bool extent_decommit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
-    size_t offset, size_t length);
 bool extent_purge_lazy_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length);
 bool extent_purge_forced_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
diff --git a/src/decay.c b/src/decay.c
index d801b2bc..dd107a34 100644
--- a/src/decay.c
+++ b/src/decay.c
@@ -14,7 +14,7 @@ static const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = {
  * Generate a new deadline that is uniformly random within the next epoch after
  * the current one.
  */
-void
+static void
 decay_deadline_init(decay_t *decay) {
 	nstime_copy(&decay->deadline, &decay->epoch);
 	nstime_add(&decay->deadline, &decay->interval);
diff --git a/src/extent.c b/src/extent.c
index cf3d1f31..3374dd58 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -43,6 +43,8 @@ static edata_t *extent_try_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 static edata_t *extent_alloc_retained(tsdn_t *tsdn, pac_t *pac,
     ehooks_t *ehooks, edata_t *expand_edata, size_t size, size_t alignment,
     bool zero, bool *commit, bool guarded);
+static bool extent_decommit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks,
+    edata_t *edata, size_t offset, size_t length);
 
 /******************************************************************************/
 
@@ -1118,7 +1120,7 @@ extent_commit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
 	    /* growing_retained */ false);
 }
 
-bool
+static bool
 extent_decommit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
diff --git a/src/hpa.c b/src/hpa.c
index 1e736ad4..7462025c 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -83,7 +83,7 @@ hpa_alloc_ps(tsdn_t *tsdn, hpa_central_t *central) {
 	    CACHELINE);
 }
 
-hpdata_t *
+static hpdata_t *
 hpa_central_extract(tsdn_t *tsdn, hpa_central_t *central, size_t size,
     bool *oom) {
 	/* Don't yet support big allocations; these should get filtered out. */
diff --git a/test/unit/bit_util.c b/test/unit/bit_util.c
index 7d31b210..295abb1b 100644
--- a/test/unit/bit_util.c
+++ b/test/unit/bit_util.c
@@ -48,7 +48,7 @@ TEST_BEGIN(test_pow2_ceil_zu) {
 }
 TEST_END
 
-void
+static void
 expect_lg_ceil_range(size_t input, unsigned answer) {
 	if (input == 1) {
 		expect_u_eq(0, answer, "Got %u as lg_ceil of 1", answer);
@@ -60,7 +60,7 @@ expect_lg_ceil_range(size_t input, unsigned answer) {
 	    "Got %u as lg_ceil of %zu", answer, input);
 }
 
-void
+static void
 expect_lg_floor_range(size_t input, unsigned answer) {
 	if (input == 1) {
 		expect_u_eq(0, answer, "Got %u as lg_floor of 1", answer);
diff --git a/test/unit/double_free.c b/test/unit/double_free.c
index e73efe71..f1e50cd2 100644
--- a/test/unit/double_free.c
+++ b/test/unit/double_free.c
@@ -9,19 +9,20 @@ void fake_abort(const char *message) {
 	fake_abort_called = true;
 }
 
-void
+static void
 test_double_free_pre(void) {
 	safety_check_set_abort(&fake_abort);
 	fake_abort_called = false;
 }
 
-void
+static void
 test_double_free_post() {
 	expect_b_eq(fake_abort_called, true, "Double-free check didn't fire.");
 	safety_check_set_abort(NULL);
 }
 
-bool tcache_enabled() {
+static bool
+tcache_enabled() {
 	bool enabled;
 	size_t sz = sizeof(enabled);
 	assert_d_eq(
diff --git a/test/unit/nstime.c b/test/unit/nstime.c
index 56238ab3..e7e11e61 100644
--- a/test/unit/nstime.c
+++ b/test/unit/nstime.c
@@ -201,7 +201,7 @@ TEST_BEGIN(test_nstime_divide) {
 }
 TEST_END
 
-void
+static void
 test_nstime_since_once(nstime_t *t) {
 	nstime_t old_t;
 	nstime_copy(&old_t, t);
diff --git a/test/unit/pa.c b/test/unit/pa.c
index b1e2f6e9..d44bb95c 100644
--- a/test/unit/pa.c
+++ b/test/unit/pa.c
@@ -48,7 +48,8 @@ struct test_data_s {
 	extent_hooks_t hooks;
 };
 
-test_data_t *init_test_data(ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
+static test_data_t *
+init_test_data(ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
 	test_data_t *test_data = calloc(1, sizeof(test_data_t));
 	assert_ptr_not_null(test_data, "");
 	init_test_extent_hooks(&test_data->hooks);
diff --git a/test/unit/size_check.c b/test/unit/size_check.c
index accdc405..3cb3bc9c 100644
--- a/test/unit/size_check.c
+++ b/test/unit/size_check.c
@@ -14,7 +14,7 @@ void fake_abort(const char *message) {
 #define LARGE_SIZE1 SC_LARGE_MINCLASS
 #define LARGE_SIZE2 (LARGE_SIZE1 * 2)
 
-void *
+static void *
 test_invalid_size_pre(size_t sz) {
 	safety_check_set_abort(&fake_abort);
 
@@ -25,7 +25,7 @@ test_invalid_size_pre(size_t sz) {
 	return ptr;
 }
 
-void
+static void
 test_invalid_size_post(void) {
 	expect_true(fake_abort_called, "Safety check didn't fire");
 	safety_check_set_abort(NULL);

From 12311fe6c37720225a3e8b5798e7051d153d29c1 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Tue, 9 May 2023 09:37:01 -0700
Subject: [PATCH 066/395] Fix segfault in `extent_try_coalesce_impl`

Static analysis flagged this. `extent_record` was passing `NULL` as the
value for `coalesced` to `extent_try_coalesce`, which in turn passes
that argument to `extent_try_coalesce_impl`, where it is written to
without checking if it is `NULL`. I can confirm from reviewing the
fleetwide coredump data that this was in fact being hit in production.
---
 src/extent.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/extent.c b/src/extent.c
index 3374dd58..fdcd0afb 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -822,6 +822,7 @@ static edata_t *
 extent_try_coalesce_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, bool *coalesced) {
 	assert(!edata_guarded_get(edata));
+	assert(coalesced != NULL);
 	/*
 	 * We avoid checking / locking inactive neighbors for large size
 	 * classes, since they are eagerly coalesced on deallocation which can
@@ -928,8 +929,9 @@ extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 		goto label_skip_coalesce;
 	}
 	if (!ecache->delay_coalesce) {
+		bool coalesced_unused;
 		edata = extent_try_coalesce(tsdn, pac, ehooks, ecache, edata,
-		    NULL);
+		    &coalesced_unused);
 	} else if (edata_size_get(edata) >= SC_LARGE_MINCLASS) {
 		assert(ecache == &pac->ecache_dirty);
 		/* Always coalesce large extents eagerly. */

From dc0a184f8d349546af6a051eb87be47715eacff3 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Mon, 8 May 2023 18:18:39 -0700
Subject: [PATCH 067/395] Fix possible `NULL` pointer dereference in
 `VERIFY_READ`

Static analysis flagged this. Fixed by simply checking `oldlenp`
before dereferencing it.
---
 src/ctl.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/ctl.c b/src/ctl.c
index cfd4ac6e..61511d34 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1816,7 +1816,9 @@ ctl_mtx_assert_held(tsdn_t *tsdn) {
 /* Verify that the space provided is enough. */
 #define VERIFY_READ(t)	do {						\
 	if (oldp == NULL || oldlenp == NULL || *oldlenp != sizeof(t)) {	\
-		*oldlenp = 0;						\
+		if (oldlenp != NULL) {					\
+			*oldlenp = 0;					\
+		}							\
 		ret = EINVAL;						\
 		goto label_return;					\
 	}								\

From 019cccc293f96c9f7886373d816aab061f65f7de Mon Sep 17 00:00:00 2001
From: auxten <auxten@users.noreply.github.com>
Date: Wed, 3 May 2023 13:25:12 +0800
Subject: [PATCH 068/395] Make arenas_lookup_ctl triable

---
 src/ctl.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/ctl.c b/src/ctl.c
index 61511d34..e7d6529e 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -3215,19 +3215,21 @@ arenas_lookup_ctl(tsd_t *tsd, const size_t *mib,
 	int ret;
 	unsigned arena_ind;
 	void *ptr;
-	edata_t *edata;
+	emap_full_alloc_ctx_t alloc_ctx;
+	bool ptr_not_present;
 	arena_t *arena;
 
 	ptr = NULL;
 	ret = EINVAL;
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	WRITE(ptr, void *);
-	edata = emap_edata_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr);
-	if (edata == NULL) {
+	ptr_not_present = emap_full_alloc_ctx_try_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr,
+		&alloc_ctx);
+	if (ptr_not_present) {
 		goto label_return;
 	}
 
-	arena = arena_get_from_edata(edata);
+	arena = arena_get_from_edata(alloc_ctx.edata);
 	if (arena == NULL) {
 		goto label_return;
 	}

From 5bac384970a8224daee0b07475950a5291fc37d3 Mon Sep 17 00:00:00 2001
From: auxten <auxten@users.noreply.github.com>
Date: Wed, 3 May 2023 22:34:30 +0800
Subject: [PATCH 069/395] If ptr present check if alloc_ctx.edata == NULL

---
 src/ctl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/ctl.c b/src/ctl.c
index e7d6529e..c495ecf6 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -3225,7 +3225,7 @@ arenas_lookup_ctl(tsd_t *tsd, const size_t *mib,
 	WRITE(ptr, void *);
 	ptr_not_present = emap_full_alloc_ctx_try_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr,
 		&alloc_ctx);
-	if (ptr_not_present) {
+	if (ptr_not_present || alloc_ctx.edata == NULL) {
 		goto label_return;
 	}
 

From 6ea8a7e928c86f7976c5e1356a22292509f8705b Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 10 May 2023 16:20:14 -0700
Subject: [PATCH 070/395] Add config detection for
 JEMALLOC_HAVE_PTHREAD_SET_NAME_NP.

and use it on the background thread name setting.
---
 configure.ac                                          | 10 ++++++++++
 include/jemalloc/internal/jemalloc_internal_defs.h.in |  3 +++
 src/background_thread.c                               |  2 +-
 3 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index ec7a97cb..5b18fd34 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1926,6 +1926,16 @@ dnl Check if we have dlsym support.
   if test "x${je_cv_pthread_getname_np}" = "xyes" ; then
     AC_DEFINE([JEMALLOC_HAVE_PTHREAD_GETNAME_NP], [ ], [ ])
   fi
+  dnl Check if pthread_set_name_np is available with the expected API.
+  JE_COMPILABLE([pthread_set_name_np(3)], [
+#include <pthread.h>
+#include <pthread_np.h>
+], [
+  pthread_set_name_np(pthread_self(), "set_name_test");
+], [je_cv_pthread_set_name_np])
+  if test "x${je_cv_pthread_set_name_np}" = "xyes" ; then
+    AC_DEFINE([JEMALLOC_HAVE_PTHREAD_SET_NAME_NP], [ ], [ ])
+  fi
   dnl Check if pthread_get_name_np is not necessarily present despite
   dnl the pthread_set_name_np counterpart
   JE_COMPILABLE([pthread_get_name_np(3)], [
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 87845a48..e61667a6 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -89,6 +89,9 @@
 /* Defined if pthread_getname_np(3) is available. */
 #undef JEMALLOC_HAVE_PTHREAD_GETNAME_NP
 
+/* Defined if pthread_set_name_np(3) is available. */
+#undef JEMALLOC_HAVE_PTHREAD_SET_NAME_NP
+
 /* Defined if pthread_get_name_np(3) is available. */
 #undef JEMALLOC_HAVE_PTHREAD_GET_NAME_NP
 
diff --git a/src/background_thread.c b/src/background_thread.c
index 3c006cec..1d5bde6c 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -467,7 +467,7 @@ background_thread_entry(void *ind_arg) {
 	assert(thread_ind < max_background_threads);
 #ifdef JEMALLOC_HAVE_PTHREAD_SETNAME_NP
 	pthread_setname_np(pthread_self(), "jemalloc_bg_thd");
-#elif defined(__FreeBSD__) || defined(__DragonFly__)
+#elif defined(JEMALLOC_HAVE_PTHREAD_SET_NAME_NP)
 	pthread_set_name_np(pthread_self(), "jemalloc_bg_thd");
 #endif
 	if (opt_percpu_arena != percpu_arena_disabled) {

From 94ace05832209543bde81d0a5f0e2a9660243abd Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 10 May 2023 13:20:40 -0700
Subject: [PATCH 071/395] Fix the prof thread_name reference in prof_recent
 dump.

As pointed out in #2434, the thread_name in prof_tdata_t was changed in #2407.
This also requires an update for the prof_recent dump, specifically the emitter
expects a "char **" which is fixed in this commit.
---
 src/prof_recent.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/prof_recent.c b/src/prof_recent.c
index 4c3c6296..e5b3fb17 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -496,8 +496,9 @@ prof_recent_alloc_dump_node(emitter_t *emitter, prof_recent_t *node) {
 	prof_tdata_t *alloc_tdata = node->alloc_tctx->tdata;
 	assert(alloc_tdata != NULL);
 	if (!prof_thread_name_empty(alloc_tdata)) {
+		const char *thread_name = alloc_tdata->thread_name;
 		emitter_json_kv(emitter, "alloc_thread_name",
-		    emitter_type_string, &alloc_tdata->thread_name);
+		    emitter_type_string, &thread_name);
 	}
 	uint64_t alloc_time_ns = nstime_ns(&node->alloc_time);
 	emitter_json_kv(emitter, "alloc_time", emitter_type_uint64,
@@ -512,8 +513,9 @@ prof_recent_alloc_dump_node(emitter_t *emitter, prof_recent_t *node) {
 		prof_tdata_t *dalloc_tdata = node->dalloc_tctx->tdata;
 		assert(dalloc_tdata != NULL);
 		if (!prof_thread_name_empty(dalloc_tdata)) {
+			const char *thread_name = dalloc_tdata->thread_name;
 			emitter_json_kv(emitter, "dalloc_thread_name",
-			    emitter_type_string, &dalloc_tdata->thread_name);
+			    emitter_type_string, &thread_name);
 		}
 		assert(!nstime_equals_zero(&node->dalloc_time));
 		uint64_t dalloc_time_ns = nstime_ns(&node->dalloc_time);

From d4a2b8bab10980d4677d43560f27ac9ef66cde45 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 10 May 2023 16:32:51 -0700
Subject: [PATCH 072/395] Add the prof_sys_thread_name feature in the
 prof_recent unit test.

This tests the combination of the prof_recent and thread_name features.
Verified that it catches the issue being fixed in this PR.

Also explicitly set thread name in test/unit/prof_recent.  This fixes the name
testing when no default thread name is set (e.g. FreeBSD).
---
 test/include/test/thd.h  |  6 ++++--
 test/src/thd.c           | 18 ++++++++++++++++++
 test/unit/prof_recent.c  | 14 ++++++--------
 test/unit/prof_recent.sh |  2 +-
 4 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/test/include/test/thd.h b/test/include/test/thd.h
index 47a51262..848c5271 100644
--- a/test/include/test/thd.h
+++ b/test/include/test/thd.h
@@ -5,5 +5,7 @@ typedef HANDLE thd_t;
 typedef pthread_t thd_t;
 #endif
 
-void	thd_create(thd_t *thd, void *(*proc)(void *), void *arg);
-void	thd_join(thd_t thd, void **ret);
+void thd_create(thd_t *thd, void *(*proc)(void *), void *arg);
+void thd_join(thd_t thd, void **ret);
+bool thd_has_setname(void);
+void thd_setname(const char *name);
diff --git a/test/src/thd.c b/test/src/thd.c
index 9a15eabb..8f91a595 100644
--- a/test/src/thd.c
+++ b/test/src/thd.c
@@ -32,3 +32,21 @@ thd_join(thd_t thd, void **ret) {
 	pthread_join(thd, ret);
 }
 #endif
+
+void
+thd_setname(const char *name) {
+#ifdef JEMALLOC_HAVE_PTHREAD_SETNAME_NP
+	pthread_setname_np(pthread_self(), name);
+#elif defined(JEMALLOC_HAVE_PTHREAD_SET_NAME_NP)
+	pthread_set_name_np(pthread_self(), name);
+#endif
+}
+
+bool
+thd_has_setname(void) {
+#if defined(JEMALLOC_HAVE_PTHREAD_SETNAME_NP) || defined(JEMALLOC_HAVE_PTHREAD_SET_NAME_NP)
+	return true;
+#else
+	return false;
+#endif
+}
diff --git a/test/unit/prof_recent.c b/test/unit/prof_recent.c
index 4fb37236..2cf699d8 100644
--- a/test/unit/prof_recent.c
+++ b/test/unit/prof_recent.c
@@ -5,6 +5,8 @@
 /* As specified in the shell script */
 #define OPT_ALLOC_MAX 3
 
+const char *test_thread_name = "test_thread";
+
 /* Invariant before and after every test (when config_prof is on) */
 static void
 confirm_prof_setup() {
@@ -439,16 +441,11 @@ confirm_record(const char *template, const confirm_record_t *records,
 			}
 			ASSERT_CHAR(',');
 
-			if (opt_prof_sys_thread_name) {
+			if (thd_has_setname() && opt_prof_sys_thread_name) {
 				ASSERT_FORMATTED_STR("\"%s_thread_name\"",
 				    *type);
-				ASSERT_CHAR(':');
-				ASSERT_CHAR('"');
-				while (*start != '"') {
-					++start;
-				}
-				ASSERT_CHAR('"');
-				ASSERT_CHAR(',');
+				ASSERT_FORMATTED_STR(":\"%s\",",
+				    test_thread_name);
 			}
 
 			ASSERT_FORMATTED_STR("\"%s_time\"", *type);
@@ -495,6 +492,7 @@ confirm_record(const char *template, const confirm_record_t *records,
 TEST_BEGIN(test_prof_recent_alloc_dump) {
 	test_skip_if(!config_prof);
 
+	thd_setname(test_thread_name);
 	confirm_prof_setup();
 
 	ssize_t future;
diff --git a/test/unit/prof_recent.sh b/test/unit/prof_recent.sh
index 58a54a47..10415bf3 100644
--- a/test/unit/prof_recent.sh
+++ b/test/unit/prof_recent.sh
@@ -1,5 +1,5 @@
 #!/bin/sh
 
 if [ "x${enable_prof}" = "x1" ] ; then
-  export MALLOC_CONF="prof:true,prof_active:true,lg_prof_sample:0,prof_recent_alloc_max:3"
+  export MALLOC_CONF="prof:true,prof_active:true,lg_prof_sample:0,prof_recent_alloc_max:3,prof_sys_thread_name:true"
 fi

From 0288126d9cc0d061766e37cbbaabaa78aff3aff5 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Thu, 11 May 2023 14:20:30 -0700
Subject: [PATCH 073/395] Fix possible `NULL` pointer dereference from
 `mallctl("prof.prefix", ...)`

Static analysis flagged this issue. Here is a minimal program which
causes a segfault within Jemalloc:
```
#include <jemalloc/jemalloc.h>

const char *malloc_conf = "prof:true";

int main() {
  mallctl("prof.prefix", NULL, NULL, NULL, 0);
}
```

Fixed by checking if `prefix` is `NULL`.
---
 src/prof_sys.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/prof_sys.c b/src/prof_sys.c
index 3f7196f8..3cbb3a85 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -749,6 +749,9 @@ bool
 prof_prefix_set(tsdn_t *tsdn, const char *prefix) {
 	cassert(config_prof);
 	ctl_mtx_assert_held(tsdn);
+	if (prefix == NULL) {
+		return true;
+	}
 	malloc_mutex_lock(tsdn, &prof_dump_filename_mtx);
 	if (prof_prefix == NULL) {
 		malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);

From 3e2ba7a6510be583edb316372f8cfff35f2f25d5 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Wed, 26 Apr 2023 14:10:41 -0700
Subject: [PATCH 074/395] Remove dead stores detected by static analysis

None of these are harmful, and they are almost certainly optimized
away by the compiler. The motivation for fixing them anyway is that
we'd like to enable static analysis as part of CI, and the first step
towards that is resolving the warnings it produces at present.
---
 include/jemalloc/internal/log.h | 3 +--
 src/ctl.c                       | 2 +-
 src/jemalloc.c                  | 3 +--
 src/malloc_io.c                 | 6 +-----
 src/pages.c                     | 2 +-
 src/stats.c                     | 4 ++--
 6 files changed, 7 insertions(+), 13 deletions(-)

diff --git a/include/jemalloc/internal/log.h b/include/jemalloc/internal/log.h
index 64208586..f39c598a 100644
--- a/include/jemalloc/internal/log.h
+++ b/include/jemalloc/internal/log.h
@@ -96,8 +96,7 @@ log_impl_varargs(const char *name, ...) {
 	dst_offset += malloc_snprintf(buf, JEMALLOC_LOG_BUFSIZE, "%s: ", name);
 	dst_offset += malloc_vsnprintf(buf + dst_offset,
 	    JEMALLOC_LOG_BUFSIZE - dst_offset, format, ap);
-	dst_offset += malloc_snprintf(buf + dst_offset,
-	    JEMALLOC_LOG_BUFSIZE - dst_offset, "\n");
+	malloc_snprintf(buf + dst_offset, JEMALLOC_LOG_BUFSIZE - dst_offset, "\n");
 	va_end(ap);
 
 	malloc_write(buf);
diff --git a/src/ctl.c b/src/ctl.c
index c495ecf6..e597b2bb 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -2745,7 +2745,6 @@ arena_i_dss_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	int ret;
 	const char *dss = NULL;
 	unsigned arena_ind;
-	dss_prec_t dss_prec_old = dss_prec_limit;
 	dss_prec_t dss_prec = dss_prec_limit;
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
@@ -2773,6 +2772,7 @@ arena_i_dss_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	 * Access via index narenas is deprecated, and scheduled for removal in
 	 * 6.0.0.
 	 */
+	dss_prec_t dss_prec_old;
 	if (arena_ind == MALLCTL_ARENAS_ALL || arena_ind ==
 	    ctl_arenas->narenas) {
 		if (dss_prec != dss_prec_limit &&
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 7407022f..37cd159c 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2389,7 +2389,6 @@ imalloc_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
 	 * from the ind_large bucket.
 	 */
 	szind_t ind_large;
-	size_t bumped_usize = usize;
 
 	dopts->alignment = prof_sample_align(dopts->alignment);
 	if (usize <= SC_SMALL_MAXCLASS) {
@@ -2398,7 +2397,7 @@ imalloc_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
 		    sz_sa2u(SC_LARGE_MINCLASS, dopts->alignment))
 			== SC_LARGE_MINCLASS);
 		ind_large = sz_size2index(SC_LARGE_MINCLASS);
-		bumped_usize = sz_s2u(SC_LARGE_MINCLASS);
+		size_t bumped_usize = sz_s2u(SC_LARGE_MINCLASS);
 		ret = imalloc_no_sample(sopts, dopts, tsd, bumped_usize,
 		    bumped_usize, ind_large);
 		if (unlikely(ret == NULL)) {
diff --git a/src/malloc_io.c b/src/malloc_io.c
index b76885cb..6de409b3 100644
--- a/src/malloc_io.c
+++ b/src/malloc_io.c
@@ -423,7 +423,6 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
 			unsigned char len = '?';
 			char *s;
 			size_t slen;
-			bool first_width_digit = true;
 			bool pad_zero = false;
 
 			f++;
@@ -462,9 +461,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
 				}
 				break;
 			case '0':
-				if (first_width_digit) {
-					pad_zero = true;
-				}
+				pad_zero = true;
 				JEMALLOC_FALLTHROUGH;
 			case '1': case '2': case '3': case '4':
 			case '5': case '6': case '7': case '8': case '9': {
@@ -474,7 +471,6 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
 				assert(uwidth != UINTMAX_MAX || get_errno() !=
 				    ERANGE);
 				width = (int)uwidth;
-				first_width_digit = false;
 				break;
 			} default:
 				break;
diff --git a/src/pages.c b/src/pages.c
index b672e4de..09b51b88 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -68,7 +68,6 @@ static int madvise_dont_need_zeros_is_faulty = -1;
  */
 static int madvise_MADV_DONTNEED_zeroes_pages()
 {
-	int works = -1;
 	size_t size = PAGE;
 
 	void * addr = mmap(NULL, size, PROT_READ|PROT_WRITE,
@@ -83,6 +82,7 @@ static int madvise_MADV_DONTNEED_zeroes_pages()
 	}
 
 	memset(addr, 'A', size);
+	int works;
 	if (madvise(addr, size, MADV_DONTNEED) == 0) {
 		works = memchr(addr, 'A', size) == NULL;
 	} else {
diff --git a/src/stats.c b/src/stats.c
index 43360a2d..59db4f8e 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1803,7 +1803,7 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 		size_t sz;
 		VARIABLE_ARRAY(bool, initialized, narenas);
 		bool destroyed_initialized;
-		unsigned i, j, ninitialized;
+		unsigned i, ninitialized;
 
 		xmallctlnametomib("arena.0.initialized", mib, &miblen);
 		for (i = ninitialized = 0; i < narenas; i++) {
@@ -1843,7 +1843,7 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 
 		/* Unmerged stats. */
 		if (unmerged) {
-			for (i = j = 0; i < narenas; i++) {
+			for (i = 0; i < narenas; i++) {
 				if (initialized[i]) {
 					char arena_ind_str[20];
 					malloc_snprintf(arena_ind_str,

From 4e6f1e920814eafb4ca165a861e9c886022b35e3 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Mon, 15 May 2023 10:39:15 -0700
Subject: [PATCH 075/395] Allow overriding `LG_PAGE`

This is useful for our internal builds where we override the
configuration in the header files generated by autoconf.
---
 .../internal/jemalloc_internal_defs.h.in         |  2 ++
 .../internal/jemalloc_internal_overrides.h       | 16 ++++++++++++++++
 2 files changed, 18 insertions(+)
 create mode 100644 include/jemalloc/internal/jemalloc_internal_overrides.h

diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index e61667a6..20355949 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -452,4 +452,6 @@
  */
 #undef JEMALLOC_HAVE_RDTSCP
 
+#include "jemalloc_internal_overrides.h"
+
 #endif /* JEMALLOC_INTERNAL_DEFS_H_ */
diff --git a/include/jemalloc/internal/jemalloc_internal_overrides.h b/include/jemalloc/internal/jemalloc_internal_overrides.h
new file mode 100644
index 00000000..ddd6ee17
--- /dev/null
+++ b/include/jemalloc/internal/jemalloc_internal_overrides.h
@@ -0,0 +1,16 @@
+#ifndef JEMALLOC_INTERNAL_OVERRIDES_H
+#define JEMALLOC_INTERNAL_OVERRIDES_H
+
+/*
+ * Under normal circumstances this header serves no purpose, as these settings
+ * can be customized via the corresponding autoconf options at configure-time.
+ * Overriding in this fashion is useful when the header files generated by
+ * autoconf are used as input for another build system.
+ */
+
+#ifdef JEMALLOC_OVERRIDE_LG_PAGE
+    #undef LG_PAGE
+    #define LG_PAGE JEMALLOC_OVERRIDE_LG_PAGE
+#endif
+
+#endif /* JEMALLOC_INTERNAL_OVERRIDES_H */

From 9c32689e576906332d2ceaabafc2a927d152beba Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Thu, 18 May 2023 10:13:59 -0700
Subject: [PATCH 076/395] Fix bug where hpa_shard was not being destroyed

It appears that this was a simple mistake where `hpa_shard_disable` was
being called instead of `hpa_shard_destroy`. At present
`hpa_shard_destroy` is not called anywhere at all outside of test-cases,
which further suggests that this is a bug. @davidtgoldblatt noted
however that since HPA is disabled for manual arenas and we don't
support destruction for auto arenas that presently there is no way to
actually trigger this bug. Nonetheless, it should be fixed.
---
 src/pa.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/pa.c b/src/pa.c
index eb7e4620..18c850d7 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -108,7 +108,7 @@ pa_shard_destroy(tsdn_t *tsdn, pa_shard_t *shard) {
 	pac_destroy(tsdn, &shard->pac);
 	if (shard->ever_used_hpa) {
 		sec_flush(tsdn, &shard->hpa_sec);
-		hpa_shard_disable(tsdn, &shard->hpa_shard);
+		hpa_shard_destroy(tsdn, &shard->hpa_shard);
 	}
 }
 

From a2259f9fa6c9a82cacf1d85cf7d92a1a44484a97 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 25 May 2023 14:22:37 -0700
Subject: [PATCH 077/395] Fix the include path of
 "jemalloc_internal_overrides.h".

---
 include/jemalloc/internal/jemalloc_internal_defs.h.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 20355949..bef99dea 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -452,6 +452,6 @@
  */
 #undef JEMALLOC_HAVE_RDTSCP
 
-#include "jemalloc_internal_overrides.h"
+#include "jemalloc/internal/jemalloc_internal_overrides.h"
 
 #endif /* JEMALLOC_INTERNAL_DEFS_H_ */

From d577e9b5880906dbd4ab04fb61de5650170ac08b Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 17 May 2023 11:54:56 -0700
Subject: [PATCH 078/395] Explicitly cast to unsigned for MALLOCX_ARENA and
 _TCACHE defines.

---
 include/jemalloc/internal/jemalloc_internal_types.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_types.h b/include/jemalloc/internal/jemalloc_internal_types.h
index b23a8bed..b1c48be9 100644
--- a/include/jemalloc/internal/jemalloc_internal_types.h
+++ b/include/jemalloc/internal/jemalloc_internal_types.h
@@ -45,12 +45,12 @@ typedef enum malloc_init_e malloc_init_t;
 #define MALLOCX_ARENA_SHIFT	20
 #define MALLOCX_TCACHE_SHIFT	8
 #define MALLOCX_ARENA_MASK \
-    (((1 << MALLOCX_ARENA_BITS) - 1) << MALLOCX_ARENA_SHIFT)
+    ((unsigned)(((1U << MALLOCX_ARENA_BITS) - 1) << MALLOCX_ARENA_SHIFT))
 /* NB: Arena index bias decreases the maximum number of arenas by 1. */
-#define MALLOCX_ARENA_LIMIT	((1 << MALLOCX_ARENA_BITS) - 1)
+#define MALLOCX_ARENA_LIMIT	((unsigned)((1U << MALLOCX_ARENA_BITS) - 1))
 #define MALLOCX_TCACHE_MASK \
-    (((1 << MALLOCX_TCACHE_BITS) - 1) << MALLOCX_TCACHE_SHIFT)
-#define MALLOCX_TCACHE_MAX	((1 << MALLOCX_TCACHE_BITS) - 3)
+    ((unsigned)(((1U << MALLOCX_TCACHE_BITS) - 1) << MALLOCX_TCACHE_SHIFT))
+#define MALLOCX_TCACHE_MAX	((unsigned)((1U << MALLOCX_TCACHE_BITS) - 3))
 #define MALLOCX_LG_ALIGN_MASK	((1 << MALLOCX_LG_ALIGN_BITS) - 1)
 /* Use MALLOCX_ALIGN_GET() if alignment may not be specified in flags. */
 #define MALLOCX_ALIGN_GET_SPECIFIED(flags)				\

From d59e30cbc9fa47425a4ba907ab8f8b580e26f37e Mon Sep 17 00:00:00 2001
From: Arne Welzel <arne.welzel@corelight.com>
Date: Wed, 24 May 2023 14:13:28 +0200
Subject: [PATCH 079/395] Rename fallback_impl to fallbackNewImpl and prune in
 jeprof

The existing fallback_impl name seemed a bit generic and given
it's static probably okay to rename.

Closes #2451
---
 bin/jeprof.in        | 2 ++
 src/jemalloc_cpp.cpp | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/bin/jeprof.in b/bin/jeprof.in
index dbf6252b..b734f50b 100644
--- a/bin/jeprof.in
+++ b/bin/jeprof.in
@@ -2957,6 +2957,8 @@ sub RemoveUninterestingFrames {
                       '@JEMALLOC_PREFIX@malloc',
                       'newImpl',
                       'void* newImpl',
+                      'fallbackNewImpl',
+                      'void* fallbackNewImpl',
                       '@JEMALLOC_PREFIX@free',
                       '@JEMALLOC_PREFIX@memalign',
                       '@JEMALLOC_PREFIX@posix_memalign',
diff --git a/src/jemalloc_cpp.cpp b/src/jemalloc_cpp.cpp
index e39615bc..4258b1ad 100644
--- a/src/jemalloc_cpp.cpp
+++ b/src/jemalloc_cpp.cpp
@@ -100,7 +100,7 @@ handleOOM(std::size_t size, bool nothrow) {
 template <bool IsNoExcept>
 JEMALLOC_NOINLINE
 static void *
-fallback_impl(std::size_t size) noexcept(IsNoExcept) {
+fallbackNewImpl(std::size_t size) noexcept(IsNoExcept) {
 	void *ptr = malloc_default(size);
 	if (likely(ptr != nullptr)) {
 		return ptr;
@@ -112,7 +112,7 @@ template <bool IsNoExcept>
 JEMALLOC_ALWAYS_INLINE
 void *
 newImpl(std::size_t size) noexcept(IsNoExcept) {
-	return imalloc_fastpath(size, &fallback_impl<IsNoExcept>);
+	return imalloc_fastpath(size, &fallbackNewImpl<IsNoExcept>);
 }
 
 void *

From c1d3ad46746da038cfc66ea5b545d195f511b0f4 Mon Sep 17 00:00:00 2001
From: Arne Welzel <arne.welzel@corelight.com>
Date: Wed, 24 May 2023 21:07:49 +0200
Subject: [PATCH 080/395] Prune je_malloc_default and do_rallocx in jeprof

Running a simple Ruby and Python execution je_malloc_default and
do_rallocx() in the resulting SVG / text output. Prune these, too.

    MALLOC_CONF='stats_print:true,lg_prof_sample:8,prof:true,prof_final:true' \
        python3 -c '[x for x in range(10000000)]'

    MALLOC_CONF='stats_print:true,lg_prof_sample:8,prof:true,prof_final:true' \
        ruby -e 'puts (0..1000).map{"0"}.join(" ")'
---
 bin/jeprof.in | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/bin/jeprof.in b/bin/jeprof.in
index b734f50b..65f616d4 100644
--- a/bin/jeprof.in
+++ b/bin/jeprof.in
@@ -2955,6 +2955,7 @@ sub RemoveUninterestingFrames {
     foreach my $name ('@JEMALLOC_PREFIX@calloc',
                       'cfree',
                       '@JEMALLOC_PREFIX@malloc',
+                      'je_malloc_default',
                       'newImpl',
                       'void* newImpl',
                       'fallbackNewImpl',
@@ -2968,6 +2969,7 @@ sub RemoveUninterestingFrames {
                       '@JEMALLOC_PREFIX@realloc',
                       '@JEMALLOC_PREFIX@mallocx',
                       '@JEMALLOC_PREFIX@rallocx',
+                      'do_rallocx',
                       '@JEMALLOC_PREFIX@xallocx',
                       '@JEMALLOC_PREFIX@dallocx',
                       '@JEMALLOC_PREFIX@sdallocx',

From 6d4aa33753d1d6fa60925b40e0fd40f1e6a42ef4 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Sat, 27 May 2023 14:21:11 -0700
Subject: [PATCH 081/395] Extract the calculation of psset heap assignment for
 an hpdata into a common function

This is in preparation for upcoming changes I plan to make to this
logic. Extracting it into a common function will make this easier and
less error-prone, and cleans up the existing code regardless.
---
 include/jemalloc/internal/hpdata.h |  4 +--
 src/psset.c                        | 47 ++++++++++++------------------
 2 files changed, 21 insertions(+), 30 deletions(-)

diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index 1fb534db..5bf7aae8 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -343,12 +343,12 @@ hpdata_assert_consistent(hpdata_t *hpdata) {
 }
 
 static inline bool
-hpdata_empty(hpdata_t *hpdata) {
+hpdata_empty(const hpdata_t *hpdata) {
 	return hpdata->h_nactive == 0;
 }
 
 static inline bool
-hpdata_full(hpdata_t *hpdata) {
+hpdata_full(const hpdata_t *hpdata) {
 	return hpdata->h_nactive == HUGEPAGE_PAGES;
 }
 
diff --git a/src/psset.c b/src/psset.c
index 9a8f054f..55966816 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -92,8 +92,20 @@ psset_bin_stats_remove(psset_t *psset, psset_bin_stats_t *binstats,
 	psset_bin_stats_insert_remove(psset, binstats, ps, false);
 }
 
+static pszind_t
+psset_hpdata_heap_index(const hpdata_t *ps) {
+	assert(!hpdata_full(ps));
+	assert(!hpdata_empty(ps));
+	size_t longest_free_range = hpdata_longest_free_range_get(ps);
+	pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
+	    longest_free_range << LG_PAGE));
+	assert(pind < PSSET_NPSIZES);
+	return pind;
+}
+
 static void
-psset_hpdata_heap_remove(psset_t *psset, pszind_t pind, hpdata_t *ps) {
+psset_hpdata_heap_remove(psset_t *psset, hpdata_t *ps) {
+	pszind_t pind = psset_hpdata_heap_index(ps);
 	hpdata_age_heap_remove(&psset->pageslabs[pind], ps);
 	if (hpdata_age_heap_empty(&psset->pageslabs[pind])) {
 		fb_unset(psset->pageslab_bitmap, PSSET_NPSIZES, (size_t)pind);
@@ -101,7 +113,8 @@ psset_hpdata_heap_remove(psset_t *psset, pszind_t pind, hpdata_t *ps) {
 }
 
 static void
-psset_hpdata_heap_insert(psset_t *psset, pszind_t pind, hpdata_t *ps) {
+psset_hpdata_heap_insert(psset_t *psset, hpdata_t *ps) {
+	pszind_t pind = psset_hpdata_heap_index(ps);
 	if (hpdata_age_heap_empty(&psset->pageslabs[pind])) {
 		fb_set(psset->pageslab_bitmap, PSSET_NPSIZES, (size_t)pind);
 	}
@@ -115,12 +128,7 @@ psset_stats_insert(psset_t* psset, hpdata_t *ps) {
 	} else if (hpdata_full(ps)) {
 		psset_bin_stats_insert(psset, psset->stats.full_slabs, ps);
 	} else {
-		size_t longest_free_range = hpdata_longest_free_range_get(ps);
-
-		pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
-		    longest_free_range << LG_PAGE));
-		assert(pind < PSSET_NPSIZES);
-
+		pszind_t pind = psset_hpdata_heap_index(ps);
 		psset_bin_stats_insert(psset, psset->stats.nonfull_slabs[pind],
 		    ps);
 	}
@@ -133,12 +141,7 @@ psset_stats_remove(psset_t *psset, hpdata_t *ps) {
 	} else if (hpdata_full(ps)) {
 		psset_bin_stats_remove(psset, psset->stats.full_slabs, ps);
 	} else {
-		size_t longest_free_range = hpdata_longest_free_range_get(ps);
-
-		pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
-		    longest_free_range << LG_PAGE));
-		assert(pind < PSSET_NPSIZES);
-
+		pszind_t pind = psset_hpdata_heap_index(ps);
 		psset_bin_stats_remove(psset, psset->stats.nonfull_slabs[pind],
 		    ps);
 	}
@@ -165,13 +168,7 @@ psset_alloc_container_insert(psset_t *psset, hpdata_t *ps) {
 		 * going to return them from a psset_pick_alloc call.
 		 */
 	} else {
-		size_t longest_free_range = hpdata_longest_free_range_get(ps);
-
-		pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
-		    longest_free_range << LG_PAGE));
-		assert(pind < PSSET_NPSIZES);
-
-		psset_hpdata_heap_insert(psset, pind, ps);
+		psset_hpdata_heap_insert(psset, ps);
 	}
 }
 
@@ -186,13 +183,7 @@ psset_alloc_container_remove(psset_t *psset, hpdata_t *ps) {
 	} else if (hpdata_full(ps)) {
 		/* Same as above -- do nothing in this case. */
 	} else {
-		size_t longest_free_range = hpdata_longest_free_range_get(ps);
-
-		pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
-		    longest_free_range << LG_PAGE));
-		assert(pind < PSSET_NPSIZES);
-
-		psset_hpdata_heap_remove(psset, pind, ps);
+		psset_hpdata_heap_remove(psset, ps);
 	}
 }
 

From 5832ef658975d5f2da2bdfddf55712d9fa343e30 Mon Sep 17 00:00:00 2001
From: Christos Zoulas <christos@zoulas.com>
Date: Wed, 31 May 2023 13:00:35 -0400
Subject: [PATCH 082/395] Use a local variable to set the alignment for this
 particular allocation instead of changing mmap_flags which makes the change
 permanent. This was enforcing large alignments for allocations that did not
 need it causing fragmentation. Reported by Andreas Gustafsson.

---
 src/pages.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/pages.c b/src/pages.c
index 09b51b88..2d5b8164 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -155,6 +155,7 @@ os_pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
 	 * of existing mappings, and we only want to create new mappings.
 	 */
 	{
+		int flags = mmap_flags;
 #ifdef __NetBSD__
 		/*
 		 * On NetBSD PAGE for a platform is defined to the
@@ -164,12 +165,12 @@ os_pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
 		 */
 		if (alignment > os_page || PAGE > os_page) {
 			unsigned int a = ilog2(MAX(alignment, PAGE));
-			mmap_flags |= MAP_ALIGNED(a);
+			flags |= MAP_ALIGNED(a);
 		}
 #endif
 		int prot = *commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
 
-		ret = mmap(addr, size, prot, mmap_flags, PAGES_FD_TAG, 0);
+		ret = mmap(addr, size, prot, flags, PAGES_FD_TAG, 0);
 	}
 	assert(ret != NULL);
 

From 86eb49b47847e48390c672371987ff4e476e53a3 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 6 Jun 2023 13:37:07 -0700
Subject: [PATCH 083/395] Fix the arena selection for oversized allocations.

Use the per-arena oversize_threshold, instead of the global setting.
---
 include/jemalloc/internal/arena_externs.h   |  2 +-
 include/jemalloc/internal/arena_inlines_b.h | 16 ++++++++++------
 src/arena.c                                 |  5 ++++-
 src/jemalloc.c                              |  2 +-
 4 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 9f5c1958..3821233f 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -100,7 +100,7 @@ unsigned arena_nthreads_get(arena_t *arena, bool internal);
 void arena_nthreads_inc(arena_t *arena, bool internal);
 void arena_nthreads_dec(arena_t *arena, bool internal);
 arena_t *arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config);
-bool arena_init_huge(void);
+bool arena_init_huge(arena_t *a0);
 bool arena_is_huge(unsigned arena_ind);
 arena_t *arena_choose_huge(tsd_t *tsd);
 bin_t *arena_bin_choose(tsdn_t *tsdn, arena_t *arena, szind_t binind,
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 609e73d3..b57dbfdd 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -28,14 +28,18 @@ arena_choose_maybe_huge(tsd_t *tsd, arena_t *arena, size_t size) {
 	 * 1) is using auto arena selection (i.e. arena == NULL), and 2) the
 	 * thread is not assigned to a manual arena.
 	 */
-	if (unlikely(size >= oversize_threshold)) {
-		arena_t *tsd_arena = tsd_arena_get(tsd);
-		if (tsd_arena == NULL || arena_is_auto(tsd_arena)) {
-			return arena_choose_huge(tsd);
-		}
+	arena_t *tsd_arena = tsd_arena_get(tsd);
+	if (tsd_arena == NULL) {
+		tsd_arena = arena_choose(tsd, NULL);
 	}
 
-	return arena_choose(tsd, NULL);
+	size_t threshold = atomic_load_zu(
+	    &tsd_arena->pa_shard.pac.oversize_threshold, ATOMIC_RELAXED);
+	if (unlikely(size >= threshold) && arena_is_auto(tsd_arena)) {
+		return arena_choose_huge(tsd);
+	}
+
+	return tsd_arena;
 }
 
 JEMALLOC_ALWAYS_INLINE void
diff --git a/src/arena.c b/src/arena.c
index 9592ab9d..ab1a9ab8 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1770,7 +1770,7 @@ arena_choose_huge(tsd_t *tsd) {
 }
 
 bool
-arena_init_huge(void) {
+arena_init_huge(arena_t *a0) {
 	bool huge_enabled;
 
 	/* The threshold should be large size class. */
@@ -1783,6 +1783,9 @@ arena_init_huge(void) {
 		/* Reserve the index for the huge arena. */
 		huge_arena_ind = narenas_total_get();
 		oversize_threshold = opt_oversize_threshold;
+		/* a0 init happened before malloc_conf_init. */
+		atomic_store_zu(&a0->pa_shard.pac.oversize_threshold,
+		    oversize_threshold, ATOMIC_RELAXED);
 		huge_enabled = true;
 	}
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 37cd159c..8a69d81b 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2090,7 +2090,7 @@ malloc_init_narenas(void) {
 		    narenas_auto);
 	}
 	narenas_total_set(narenas_auto);
-	if (arena_init_huge()) {
+	if (arena_init_huge(a0)) {
 		narenas_total_inc();
 	}
 	manual_arena_base = narenas_total_get();

From 90176f8a87a0b5bdb0ac4c1a515b1d9c58dc5a82 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Tue, 9 May 2023 12:06:47 -0700
Subject: [PATCH 084/395] Fix segfault in rb `*_tree_remove`

Static analysis flagged this. It's possible to segfault in the
`*_tree_remove` function generated by `rb_gen`, as `nodep` may
still be `NULL` after the initial for loop. I can confirm from reviewing
the fleetwide coredump data that this was in fact being hit in
production, primarily through `tctx_tree_remove`, and much more rarely
through `gctx_tree_remove`.
---
 include/jemalloc/internal/arena_inlines_b.h | 61 +++++++++++----------
 include/jemalloc/internal/rb.h              | 15 +++++
 2 files changed, 48 insertions(+), 28 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index b57dbfdd..11b0ce46 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -42,6 +42,34 @@ arena_choose_maybe_huge(tsd_t *tsd, arena_t *arena, size_t size) {
 	return tsd_arena;
 }
 
+JEMALLOC_ALWAYS_INLINE bool
+large_dalloc_safety_checks(edata_t *edata, const void *ptr, szind_t szind) {
+	if (!config_opt_safety_checks) {
+		return false;
+	}
+
+	/*
+	 * Eagerly detect double free and sized dealloc bugs for large sizes.
+	 * The cost is low enough (as edata will be accessed anyway) to be
+	 * enabled all the time.
+	 */
+	if (unlikely(edata == NULL ||
+	    edata_state_get(edata) != extent_state_active)) {
+		safety_check_fail("Invalid deallocation detected: "
+		    "pages being freed (%p) not currently active, "
+		    "possibly caused by double free bugs.", ptr);
+		return true;
+	}
+	size_t input_size = sz_index2size(szind);
+	if (unlikely(input_size != edata_usize_get(edata))) {
+		safety_check_fail_sized_dealloc(/* current_dealloc */ true, ptr,
+		    /* true_size */ edata_usize_get(edata), input_size);
+		return true;
+	}
+
+	return false;
+}
+
 JEMALLOC_ALWAYS_INLINE void
 arena_prof_info_get(tsd_t *tsd, const void *ptr, emap_alloc_ctx_t *alloc_ctx,
     prof_info_t *prof_info, bool reset_recent) {
@@ -65,6 +93,11 @@ arena_prof_info_get(tsd_t *tsd, const void *ptr, emap_alloc_ctx_t *alloc_ctx,
 	if (unlikely(!is_slab)) {
 		/* edata must have been initialized at this point. */
 		assert(edata != NULL);
+		if (reset_recent &&
+		    large_dalloc_safety_checks(edata, ptr,
+		    edata_szind_get(edata))) {
+			return;
+		}
 		large_prof_info_get(tsd, edata, prof_info, reset_recent);
 	} else {
 		prof_info->alloc_tctx = (prof_tctx_t *)(uintptr_t)1U;
@@ -215,34 +248,6 @@ arena_vsalloc(tsdn_t *tsdn, const void *ptr) {
 	return sz_index2size(full_alloc_ctx.szind);
 }
 
-JEMALLOC_ALWAYS_INLINE bool
-large_dalloc_safety_checks(edata_t *edata, void *ptr, szind_t szind) {
-	if (!config_opt_safety_checks) {
-		return false;
-	}
-
-	/*
-	 * Eagerly detect double free and sized dealloc bugs for large sizes.
-	 * The cost is low enough (as edata will be accessed anyway) to be
-	 * enabled all the time.
-	 */
-	if (unlikely(edata == NULL ||
-	    edata_state_get(edata) != extent_state_active)) {
-		safety_check_fail("Invalid deallocation detected: "
-		    "pages being freed (%p) not currently active, "
-		    "possibly caused by double free bugs.", ptr);
-		return true;
-	}
-	size_t input_size = sz_index2size(szind);
-	if (unlikely(input_size != edata_usize_get(edata))) {
-		safety_check_fail_sized_dealloc(/* current_dealloc */ true, ptr,
-		    /* true_size */ edata_usize_get(edata), input_size);
-		return true;
-	}
-
-	return false;
-}
-
 static inline void
 arena_dalloc_large_no_tcache(tsdn_t *tsdn, void *ptr, szind_t szind) {
 	if (config_prof && unlikely(szind < SC_NBINS)) {
diff --git a/include/jemalloc/internal/rb.h b/include/jemalloc/internal/rb.h
index a9a51cb6..fc1dac7c 100644
--- a/include/jemalloc/internal/rb.h
+++ b/include/jemalloc/internal/rb.h
@@ -560,6 +560,20 @@ a_prefix##reverse_iter_filtered(a_rbt_type *rbtree, a_type *start,	\
  * the same as with the unfiltered version, with the added constraint that the
  * returned node must pass the filter.
  */
+JEMALLOC_ALWAYS_INLINE void
+rb_remove_safety_checks(const void *nodep, const char *function_name) {
+	if (!config_opt_safety_checks) {
+		return;
+	}
+	if (unlikely(nodep == NULL)) {
+		safety_check_fail(
+		    "<jemalloc>: Invalid deallocation detected in %s: "
+		    "attempting to remove node from tree but node was "
+		    "not found. Possibly caused by double free bugs.",
+		    function_name);
+        }
+}
+
 #define rb_gen(a_attr, a_prefix, a_rbt_type, a_type, a_field, a_cmp)	\
     rb_gen_impl(a_attr, a_prefix, a_rbt_type, a_type, a_field, a_cmp,	\
 	rb_empty_summarize, false)
@@ -852,6 +866,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
 	    }								\
 	}								\
     }									\
+    rb_remove_safety_checks(nodep, __func__);				\
     assert(nodep->node == node);					\
     pathp--;								\
     if (pathp->node != node) {						\

From 210f0d0b2bb3ed51a83a675c34f09fc36ac686e1 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Thu, 15 Jun 2023 14:47:20 -0700
Subject: [PATCH 085/395] Fix read of uninitialized data in `prof_free`

In #2433, I inadvertently introduced a regression which causes the use of
uninitialized data. Namely, the control path I added for the safety
check in `arena_prof_info_get` neglected to set `prof_info->alloc_tctx`
when the check fails, resulting in `prof_info.alloc_tctx` being
uninitialized [when it is read at the end of
`prof_free`](https://github.com/jemalloc/jemalloc/blob/90176f8a87a0b5bdb0ac4c1a515b1d9c58dc5a82/include/jemalloc/internal/prof_inlines.h#L272).
---
 include/jemalloc/internal/arena_inlines_b.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 11b0ce46..bf25a31c 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -96,6 +96,7 @@ arena_prof_info_get(tsd_t *tsd, const void *ptr, emap_alloc_ctx_t *alloc_ctx,
 		if (reset_recent &&
 		    large_dalloc_safety_checks(edata, ptr,
 		    edata_szind_get(edata))) {
+			prof_info->alloc_tctx = (prof_tctx_t *)(uintptr_t)1U;
 			return;
 		}
 		large_prof_info_get(tsd, edata, prof_info, reset_recent);

From bb0333e745a71aea0230a09be49a752115d45bb7 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Fri, 12 May 2023 13:17:52 -0700
Subject: [PATCH 086/395] Fix remaining static analysis warnings

Fix or suppress the remaining warnings generated by static analysis.
This is a necessary step before we can incorporate static analysis into
CI. Where possible, I've preferred to modify the code itself instead of
just disabling the warning with a magic comment, so that if we decide to
use different static analysis tools in the future we will be covered
against them raising similar warnings.
---
 .../internal/jemalloc_internal_inlines_c.h    |  2 ++
 include/jemalloc/internal/ph.h                |  6 ++++
 include/jemalloc/internal/rb.h                |  1 +
 include/jemalloc/internal/rtree.h             |  5 +++-
 include/jemalloc/internal/witness.h           |  3 ++
 src/background_thread.c                       | 28 +++++++++++++------
 src/ctl.c                                     | 17 ++++++++---
 src/decay.c                                   |  1 +
 src/extent.c                                  |  1 +
 src/jemalloc.c                                |  3 +-
 src/jemalloc_cpp.cpp                          |  2 ++
 src/malloc_io.c                               |  3 +-
 12 files changed, 56 insertions(+), 16 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index 719b8eea..206f1400 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -325,6 +325,8 @@ imalloc_fastpath(size_t size, void *(fallback_alloc)(size_t)) {
 	tcache_t *tcache = tsd_tcachep_get(tsd);
 	assert(tcache == tcache_get(tsd));
 	cache_bin_t *bin = &tcache->bins[ind];
+	/* Suppress spurious warning from static analysis */
+	assert(bin != NULL);
 	bool tcache_success;
 	void *ret;
 
diff --git a/include/jemalloc/internal/ph.h b/include/jemalloc/internal/ph.h
index c3cf8743..1fabee5d 100644
--- a/include/jemalloc/internal/ph.h
+++ b/include/jemalloc/internal/ph.h
@@ -127,6 +127,7 @@ phn_merge_ordered(void *phn0, void *phn1, size_t offset,
 	phn0child = phn_lchild_get(phn0, offset);
 	phn_next_set(phn1, phn0child, offset);
 	if (phn0child != NULL) {
+		/* NOLINTNEXTLINE(readability-suspicious-call-argument) */
 		phn_prev_set(phn0child, phn1, offset);
 	}
 	phn_lchild_set(phn0, phn1, offset);
@@ -143,6 +144,7 @@ phn_merge(void *phn0, void *phn1, size_t offset, ph_cmp_t cmp) {
 		phn_merge_ordered(phn0, phn1, offset, cmp);
 		result = phn0;
 	} else {
+		/* NOLINTNEXTLINE(readability-suspicious-call-argument) */
 		phn_merge_ordered(phn1, phn0, offset, cmp);
 		result = phn1;
 	}
@@ -188,10 +190,12 @@ phn_merge_siblings(void *phn, size_t offset, ph_cmp_t cmp) {
 				phn_prev_set(phn1, NULL, offset);
 				phn_next_set(phn1, NULL, offset);
 				phn0 = phn_merge(phn0, phn1, offset, cmp);
+				/* NOLINTNEXTLINE(readability-suspicious-call-argument) */
 				phn_next_set(tail, phn0, offset);
 				tail = phn0;
 				phn0 = phnrest;
 			} else {
+				/* NOLINTNEXTLINE(readability-suspicious-call-argument) */
 				phn_next_set(tail, phn0, offset);
 				tail = phn0;
 				phn0 = NULL;
@@ -210,6 +214,7 @@ phn_merge_siblings(void *phn, size_t offset, ph_cmp_t cmp) {
 				if (head == NULL) {
 					break;
 				}
+				/* NOLINTNEXTLINE(readability-suspicious-call-argument) */
 				phn_next_set(tail, phn0, offset);
 				tail = phn0;
 				phn0 = head;
@@ -298,6 +303,7 @@ ph_try_aux_merge_pair(ph_t *ph, size_t offset, ph_cmp_t cmp) {
 	phn0 = phn_merge(phn0, phn1, offset, cmp);
 	phn_next_set(phn0, next_phn1, offset);
 	if (next_phn1 != NULL) {
+		/* NOLINTNEXTLINE(readability-suspicious-call-argument) */
 		phn_prev_set(next_phn1, phn0, offset);
 	}
 	phn_next_set(ph->root, phn0, offset);
diff --git a/include/jemalloc/internal/rb.h b/include/jemalloc/internal/rb.h
index fc1dac7c..343e7c13 100644
--- a/include/jemalloc/internal/rb.h
+++ b/include/jemalloc/internal/rb.h
@@ -867,6 +867,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
 	}								\
     }									\
     rb_remove_safety_checks(nodep, __func__);				\
+    assert(nodep != NULL);                                              \
     assert(nodep->node == node);					\
     pathp--;								\
     if (pathp->node != node) {						\
diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index a00adb29..22f5f9dc 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -268,6 +268,10 @@ rtree_contents_encode(rtree_contents_t contents, void **bits,
     unsigned *additional) {
 #ifdef RTREE_LEAF_COMPACT
 	*bits = (void *)rtree_leaf_elm_bits_encode(contents);
+	/* Suppress spurious warning from static analysis */
+	if (config_debug) {
+		*additional = 0;
+	}
 #else
 	*additional = (unsigned)contents.metadata.slab
 	    | ((unsigned)contents.metadata.is_head << 1)
@@ -299,7 +303,6 @@ rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree,
 	assert((uintptr_t)contents.edata % EDATA_ALIGNMENT == 0);
 	void *bits;
 	unsigned additional;
-
 	rtree_contents_encode(contents, &bits, &additional);
 	rtree_leaf_elm_write_commit(tsdn, rtree, elm, bits, additional);
 }
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index e81b9a00..fbe5f943 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -341,6 +341,9 @@ witness_lock(witness_tsdn_t *witness_tsdn, witness_t *witness) {
 		witness_lock_error(witnesses, witness);
 	}
 
+	/* Suppress spurious warning from static analysis */
+	assert(ql_empty(witnesses) ||
+	    qr_prev(ql_first(witnesses), link) != NULL);
 	ql_elm_new(witness, link);
 	ql_tail_insert(witnesses, witness, link);
 }
diff --git a/src/background_thread.c b/src/background_thread.c
index 1d5bde6c..53b492bb 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -340,8 +340,9 @@ background_thread_create_signals_masked(pthread_t *thread,
 }
 
 static bool
-check_background_thread_creation(tsd_t *tsd, unsigned *n_created,
-    bool *created_threads) {
+check_background_thread_creation(tsd_t *tsd,
+    const size_t const_max_background_threads,
+    unsigned *n_created, bool *created_threads) {
 	bool ret = false;
 	if (likely(*n_created == n_background_threads)) {
 		return ret;
@@ -349,7 +350,7 @@ check_background_thread_creation(tsd_t *tsd, unsigned *n_created,
 
 	tsdn_t *tsdn = tsd_tsdn(tsd);
 	malloc_mutex_unlock(tsdn, &background_thread_info[0].mtx);
-	for (unsigned i = 1; i < max_background_threads; i++) {
+	for (unsigned i = 1; i < const_max_background_threads; i++) {
 		if (created_threads[i]) {
 			continue;
 		}
@@ -391,10 +392,19 @@ check_background_thread_creation(tsd_t *tsd, unsigned *n_created,
 
 static void
 background_thread0_work(tsd_t *tsd) {
-	/* Thread0 is also responsible for launching / terminating threads. */
-	VARIABLE_ARRAY(bool, created_threads, max_background_threads);
+	/*
+	 * Thread0 is also responsible for launching / terminating threads.
+	 * We are guaranteed that `max_background_threads` will not change
+	 * underneath us. Unfortunately static analysis tools do not understand
+	 * this, so we are extracting `max_background_threads` into a local
+	 * variable solely for the sake of exposing this information to such
+	 * tools.
+	 */
+	const size_t const_max_background_threads = max_background_threads;
+	assert(const_max_background_threads > 0);
+	VARIABLE_ARRAY(bool, created_threads, const_max_background_threads);
 	unsigned i;
-	for (i = 1; i < max_background_threads; i++) {
+	for (i = 1; i < const_max_background_threads; i++) {
 		created_threads[i] = false;
 	}
 	/* Start working, and create more threads when asked. */
@@ -404,8 +414,8 @@ background_thread0_work(tsd_t *tsd) {
 		    &background_thread_info[0])) {
 			continue;
 		}
-		if (check_background_thread_creation(tsd, &n_created,
-		    (bool *)&created_threads)) {
+		if (check_background_thread_creation(tsd, const_max_background_threads,
+		    &n_created, (bool *)&created_threads)) {
 			continue;
 		}
 		background_work_sleep_once(tsd_tsdn(tsd),
@@ -417,7 +427,7 @@ background_thread0_work(tsd_t *tsd) {
 	 * the global background_thread mutex (and is waiting) for us.
 	 */
 	assert(!background_thread_enabled());
-	for (i = 1; i < max_background_threads; i++) {
+	for (i = 1; i < const_max_background_threads; i++) {
 		background_thread_info_t *info = &background_thread_info[i];
 		assert(info->state != background_thread_paused);
 		if (created_threads[i]) {
diff --git a/src/ctl.c b/src/ctl.c
index e597b2bb..7d0ab346 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1314,9 +1314,18 @@ ctl_background_thread_stats_read(tsdn_t *tsdn) {
 
 static void
 ctl_refresh(tsdn_t *tsdn) {
-	unsigned i;
+	malloc_mutex_assert_owner(tsdn, &ctl_mtx);
+	/*
+	 * We are guaranteed that `ctl_arenas->narenas` will not change
+	 * underneath us since we hold `ctl_mtx` for the duration of this
+	 * function. Unfortunately static analysis tools do not understand this,
+	 * so we are extracting `narenas` into a local variable solely for the
+	 * sake of exposing this information to such tools.
+	 */
+	const unsigned narenas = ctl_arenas->narenas;
+	assert(narenas > 0);
 	ctl_arena_t *ctl_sarena = arenas_i(MALLCTL_ARENAS_ALL);
-	VARIABLE_ARRAY(arena_t *, tarenas, ctl_arenas->narenas);
+	VARIABLE_ARRAY(arena_t *, tarenas, narenas);
 
 	/*
 	 * Clear sum stats, since they will be merged into by
@@ -1324,11 +1333,11 @@ ctl_refresh(tsdn_t *tsdn) {
 	 */
 	ctl_arena_clear(ctl_sarena);
 
-	for (i = 0; i < ctl_arenas->narenas; i++) {
+	for (unsigned i = 0; i < narenas; i++) {
 		tarenas[i] = arena_get(tsdn, i, false);
 	}
 
-	for (i = 0; i < ctl_arenas->narenas; i++) {
+	for (unsigned i = 0; i < narenas; i++) {
 		ctl_arena_t *ctl_arena = arenas_i(i);
 		bool initialized = (tarenas[i] != NULL);
 
diff --git a/src/decay.c b/src/decay.c
index dd107a34..f75696dd 100644
--- a/src/decay.c
+++ b/src/decay.c
@@ -157,6 +157,7 @@ decay_deadline_reached(const decay_t *decay, const nstime_t *time) {
 uint64_t
 decay_npages_purge_in(decay_t *decay, nstime_t *time, size_t npages_new) {
 	uint64_t decay_interval_ns = decay_epoch_duration_ns(decay);
+	assert(decay_interval_ns != 0);
 	size_t n_epoch = (size_t)(nstime_ns(time) / decay_interval_ns);
 
 	uint64_t npages_purge;
diff --git a/src/extent.c b/src/extent.c
index fdcd0afb..18e4698c 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -407,6 +407,7 @@ extent_recycle_extract(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		edata = emap_try_acquire_edata_neighbor_expand(tsdn, pac->emap,
 		    expand_edata, EXTENT_PAI_PAC, ecache->state);
 		if (edata != NULL) {
+			/* NOLINTNEXTLINE(readability-suspicious-call-argument) */
 			extent_assert_can_expand(expand_edata, edata);
 			if (edata_size_get(edata) < size) {
 				emap_release_edata(tsdn, pac->emap, edata,
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 8a69d81b..88559be0 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -991,7 +991,8 @@ obtain_malloc_conf(unsigned which_source, char buf[PATH_MAX + 1]) {
 		 * Each source should only be read once, to minimize # of
 		 * syscalls on init.
 		 */
-		assert(read_source++ == which_source);
+		assert(read_source == which_source);
+		read_source++;
 	}
 	assert(which_source < MALLOC_CONF_NSOURCES);
 
diff --git a/src/jemalloc_cpp.cpp b/src/jemalloc_cpp.cpp
index 4258b1ad..44569c14 100644
--- a/src/jemalloc_cpp.cpp
+++ b/src/jemalloc_cpp.cpp
@@ -1,5 +1,6 @@
 #include <mutex>
 #include <new>
+// NOLINTBEGIN(misc-use-anonymous-namespace)
 
 #define JEMALLOC_CPP_CPP_
 #ifdef __cplusplus
@@ -258,3 +259,4 @@ operator delete[](void* ptr, std::size_t size, std::align_val_t alignment) noexc
 }
 
 #endif  // __cpp_aligned_new
+// NOLINTEND(misc-use-anonymous-namespace)
diff --git a/src/malloc_io.c b/src/malloc_io.c
index 6de409b3..192d8208 100644
--- a/src/malloc_io.c
+++ b/src/malloc_io.c
@@ -316,7 +316,8 @@ x2s(uintmax_t x, bool alt_form, bool uppercase, char *s, size_t *slen_p) {
 	if (alt_form) {
 		s -= 2;
 		(*slen_p) += 2;
-		memcpy(s, uppercase ? "0X" : "0x", 2);
+		s[0] = '0';
+		s[1] = uppercase ? 'X' : 'x';
 	}
 	return s;
 }

From 05385191d4ba42eb219141503a42c648722a8d4f Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Wed, 17 May 2023 17:00:10 -0700
Subject: [PATCH 087/395] Add GitHub action which runs static analysis

Now that all of the various issues that static analysis uncovered have
been fixed (#2431, #2432, #2433, #2436, #2437, #2446), I've added a
GitHub action which will run static analysis for every PR going forward.
When static analysis detects issues with your code, the GitHub action
provides a link to download its findings in a form tailored for human
consumption.

Take a look at [this demonstration of what it looks like when static
analysis issues are
found](https://github.com/Svetlitski/jemalloc/actions/runs/5010245602)
on my fork for an example (make sure to follow the instructions in the
error message to download and inspect the results).
---
 .github/workflows/static_analysis.yaml | 68 ++++++++++++++++++++++++++
 .gitignore                             |  4 ++
 scripts/run_static_analysis.sh         | 52 ++++++++++++++++++++
 3 files changed, 124 insertions(+)
 create mode 100644 .github/workflows/static_analysis.yaml
 create mode 100755 scripts/run_static_analysis.sh

diff --git a/.github/workflows/static_analysis.yaml b/.github/workflows/static_analysis.yaml
new file mode 100644
index 00000000..547b1564
--- /dev/null
+++ b/.github/workflows/static_analysis.yaml
@@ -0,0 +1,68 @@
+name: 'Static Analysis'
+on: [pull_request]
+jobs:
+  static-analysis:
+    runs-on: ubuntu-latest
+    steps:
+      # We build libunwind ourselves because sadly the version
+      # provided by Ubuntu via apt-get is much too old.
+      - name: Check out libunwind
+        uses: actions/checkout@v3
+        with:
+          repository: libunwind/libunwind
+          path: libunwind
+          ref: 'v1.6.2'
+          github-server-url: 'https://github.com'
+      - name: Install libunwind
+        run: |
+          cd libunwind
+          autoreconf -i
+          ./configure --prefix=/usr
+          make -s -j $(nproc) V=0
+          sudo make -s install V=0
+          cd ..
+          rm -rf libunwind
+      - name: Check out repository
+        uses: actions/checkout@v3
+      # We download LLVM directly from the latest stable release
+      # on GitHub, because this tends to be much newer than the
+      # version available via apt-get in Ubuntu.
+      - name: Download LLVM
+        uses: dsaltares/fetch-gh-release-asset@master
+        with:
+          repo: 'llvm/llvm-project'
+          version: 'latest'
+          file: 'clang[+]llvm-.*x86_64-linux-gnu.*'
+          regex: true
+          target: 'llvm_assets/'
+          token: ${{ secrets.GITHUB_TOKEN }}
+      - name: Install prerequisites
+        id: install_prerequisites
+        run: |
+          tar -C llvm_assets -xaf llvm_assets/*.tar* &
+          sudo apt-get update
+          sudo apt-get install -y jq bear python3-pip
+          pip install codechecker
+          echo "Extracting LLVM from tar" 1>&2
+          wait
+          echo "LLVM_BIN_DIR=$(echo llvm_assets/clang*/bin)" >> "$GITHUB_OUTPUT"
+      - name: Run static analysis
+        id: run_static_analysis
+        run: >
+          PATH="${{ steps.install_prerequisites.outputs.LLVM_BIN_DIR }}:$PATH"
+          LDFLAGS='-L/usr/lib'
+          scripts/run_static_analysis.sh static_analysis_results "$GITHUB_OUTPUT"
+      - name: Upload static analysis results
+        if: ${{ steps.run_static_analysis.outputs.HAS_STATIC_ANALYSIS_RESULTS }} == '1'
+        uses: actions/upload-artifact@v3
+        with:
+          name: static_analysis_results
+          path: static_analysis_results
+      - name: Check static analysis results
+        run: |
+          if [[ "${{ steps.run_static_analysis.outputs.HAS_STATIC_ANALYSIS_RESULTS }}" == '1' ]]
+          then
+              echo "::error::Static analysis found issues with your code. Download the 'static_analysis_results' artifact from this workflow and view the 'index.html' file contained within it in a web browser locally for detailed results."
+              exit 1
+          fi
+
diff --git a/.gitignore b/.gitignore
index 1c0b3385..0f5e7aae 100644
--- a/.gitignore
+++ b/.gitignore
@@ -45,6 +45,10 @@
 /src/*.[od]
 /src/*.sym
 
+compile_commands.json
+/static_analysis_raw_results
+/static_analysis_results
+
 /run_tests.out/
 
 /test/test.sh
diff --git a/scripts/run_static_analysis.sh b/scripts/run_static_analysis.sh
new file mode 100755
index 00000000..db870689
--- /dev/null
+++ b/scripts/run_static_analysis.sh
@@ -0,0 +1,52 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+git clean -Xfd
+
+export CC='clang'
+export CXX='clang++'
+compile_time_malloc_conf='background_thread:true,'\
+'metadata_thp:auto,'\
+'abort_conf:true,'\
+'muzzy_decay_ms:0,'\
+'zero_realloc:free,'\
+'prof_unbias:false,'\
+'prof_time_resolution:high'
+
+./autogen.sh \
+	--with-private-namespace=jemalloc_ \
+	--disable-cache-oblivious \
+	--enable-prof \
+	--enable-prof-libunwind \
+	--with-malloc-conf="$compile_time_malloc_conf" \
+	--enable-readlinkat \
+	--enable-opt-safety-checks \
+	--enable-uaf-detection \
+	--enable-force-getenv \
+	--enable-debug # Enabling debug for static analysis is important,
+	               # otherwise you'll get tons of warnings for things
+	               # that are already covered by `assert`s.
+
+bear -- make -s -j $(nproc)
+# We end up with lots of duplicate entries in the compilation database, one for
+# each output file type (e.g. .o, .d, .sym, etc.). There must be exactly one
+# entry for each file in the compilation database in order for
+# cross-translation-unit analysis to work, so we deduplicate the database here.
+jq '[.[] | select(.output | test("/[^./]*\\.o$"))]' compile_commands.json > compile_commands.json.tmp
+mv compile_commands.json.tmp compile_commands.json
+
+CC_ANALYZERS_FROM_PATH=1 CodeChecker analyze compile_commands.json --jobs $(nproc) \
+	--ctu --compile-uniqueing strict --output static_analysis_raw_results \
+	--analyzers clang-tidy clangsa
+
+html_output_dir="${1:-static_analysis_results}"
+result=${2:-/dev/null}
+# We're echoing a value because we want to indicate whether or not any errors
+# were found, but we always want the script to have a successful exit code so
+# that we actually reach the step in the GitHub action where we upload the results.
+if CodeChecker parse --export html --output "$html_output_dir" static_analysis_raw_results
+then
+	echo "HAS_STATIC_ANALYSIS_RESULTS=0" >> "$result"
+else
+	echo "HAS_STATIC_ANALYSIS_RESULTS=1" >> "$result"
+fi

From f2e00d2fd3e56e6599f889ee09d5c41ed4012015 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Fri, 28 Apr 2023 12:29:49 -0700
Subject: [PATCH 088/395] Remove trailing whitespace

Additionally, added a GitHub Action to ensure no more trailing
whitespace will creep in again in the future.

I'm excluding Markdown files from this check, since trailing whitespace
is significant there, and also excluding `build-aux/install-sh` because
there is significant trailing whitespace on the line that sets
`defaultIFS`.
---
 .github/workflows/check_formatting.yaml        | 10 ++++++++++
 bin/jeprof.in                                  | 12 ++++++------
 build-aux/install-sh                           | 14 +++++++-------
 configure.ac                                   |  2 +-
 .../internal/jemalloc_internal_defs.h.in       |  6 +++---
 include/msvc_compat/C99/stdint.h               | 18 +++++++++---------
 scripts/check_trailing_whitespace.sh           |  7 +++++++
 test/include/test/SFMT-params.h                | 18 +++++++++---------
 test/include/test/SFMT.h                       | 14 +++++++-------
 test/include/test/arena_util.h                 |  2 +-
 test/src/timer.c                               |  8 ++++----
 test/stress/cpp/microbench.cpp                 | 12 ++++++------
 12 files changed, 70 insertions(+), 53 deletions(-)
 create mode 100644 .github/workflows/check_formatting.yaml
 create mode 100755 scripts/check_trailing_whitespace.sh

diff --git a/.github/workflows/check_formatting.yaml b/.github/workflows/check_formatting.yaml
new file mode 100644
index 00000000..f7be77b1
--- /dev/null
+++ b/.github/workflows/check_formatting.yaml
@@ -0,0 +1,10 @@
+name: 'Check Formatting'
+on: [pull_request]
+jobs:
+  check-formatting:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check out repository
+        uses: actions/checkout@v3
+      - name: Check for trailing whitespace
+        run: scripts/check_trailing_whitespace.sh
diff --git a/bin/jeprof.in b/bin/jeprof.in
index 65f616d4..f02c1f3e 100644
--- a/bin/jeprof.in
+++ b/bin/jeprof.in
@@ -4504,19 +4504,19 @@ sub FindLibrary {
 # For libc libraries, the copy in /usr/lib/debug contains debugging symbols
 sub DebuggingLibrary {
   my $file = shift;
-      
+
   if ($file !~ m|^/|) {
     return undef;
   }
-      
+
   # Find debug symbol file if it's named after the library's name.
-  
-  if (-f "/usr/lib/debug$file") {                 
+
+  if (-f "/usr/lib/debug$file") {
     if($main::opt_debug) { print STDERR "found debug info for $file in /usr/lib/debug$file\n"; }
     return "/usr/lib/debug$file";
   } elsif (-f "/usr/lib/debug$file.debug") {
     if($main::opt_debug) { print STDERR "found debug info for $file in /usr/lib/debug$file.debug\n"; }
-    return "/usr/lib/debug$file.debug"; 
+    return "/usr/lib/debug$file.debug";
   }
 
   if(!$main::opt_debug_syms_by_id) {
@@ -4525,7 +4525,7 @@ sub DebuggingLibrary {
   }
 
   # Find debug file if it's named after the library's build ID.
-  
+
   my $readelf = '';
   if (!$main::gave_up_on_elfutils) {
     $readelf = qx/eu-readelf -n ${file}/;
diff --git a/build-aux/install-sh b/build-aux/install-sh
index ebc66913..b44de098 100755
--- a/build-aux/install-sh
+++ b/build-aux/install-sh
@@ -115,7 +115,7 @@ fi
 if [ x"$dir_arg" != x ]; then
 	dst=$src
 	src=""
-	
+
 	if [ -d $dst ]; then
 		instcmd=:
 	else
@@ -124,7 +124,7 @@ if [ x"$dir_arg" != x ]; then
 else
 
 # Waiting for this to be detected by the "$instcmd $src $dsttmp" command
-# might cause directories to be created, which would be especially bad 
+# might cause directories to be created, which would be especially bad
 # if $src (and thus $dsttmp) contains '*'.
 
 	if [ -f $src -o -d $src ]
@@ -134,7 +134,7 @@ else
 		echo "install:  $src does not exist"
 		exit 1
 	fi
-	
+
 	if [ x"$dst" = x ]
 	then
 		echo "install:	no destination specified"
@@ -201,17 +201,17 @@ else
 
 # If we're going to rename the final executable, determine the name now.
 
-	if [ x"$transformarg" = x ] 
+	if [ x"$transformarg" = x ]
 	then
 		dstfile=`basename $dst`
 	else
-		dstfile=`basename $dst $transformbasename | 
+		dstfile=`basename $dst $transformbasename |
 			sed $transformarg`$transformbasename
 	fi
 
 # don't allow the sed command to completely eliminate the filename
 
-	if [ x"$dstfile" = x ] 
+	if [ x"$dstfile" = x ]
 	then
 		dstfile=`basename $dst`
 	else
@@ -242,7 +242,7 @@ else
 # Now rename the file to the real destination.
 
 	$doit $rmcmd -f $dstdir/$dstfile &&
-	$doit $mvcmd $dsttmp $dstdir/$dstfile 
+	$doit $mvcmd $dsttmp $dstdir/$dstfile
 
 fi &&
 
diff --git a/configure.ac b/configure.ac
index 5b18fd34..687b221c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2501,7 +2501,7 @@ AC_SUBST([enable_initial_exec_tls])
 if test "x${je_cv_tls_model}" = "xyes" -a \
        "x${enable_initial_exec_tls}" = "x1" ; then
   AC_DEFINE([JEMALLOC_TLS_MODEL],
-            [__attribute__((tls_model("initial-exec")))], 
+            [__attribute__((tls_model("initial-exec")))],
             [ ])
 else
   AC_DEFINE([JEMALLOC_TLS_MODEL], [ ], [ ])
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index bef99dea..cce638d3 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -446,9 +446,9 @@
 /* If defined, use volatile asm during benchmarks. */
 #undef JEMALLOC_HAVE_ASM_VOLATILE
 
-/* 
- * If defined, support the use of rdtscp to get the time stamp counter 
- * and the processor ID. 
+/*
+ * If defined, support the use of rdtscp to get the time stamp counter
+ * and the processor ID.
  */
 #undef JEMALLOC_HAVE_RDTSCP
 
diff --git a/include/msvc_compat/C99/stdint.h b/include/msvc_compat/C99/stdint.h
index d02608a5..c66fbb81 100644
--- a/include/msvc_compat/C99/stdint.h
+++ b/include/msvc_compat/C99/stdint.h
@@ -1,32 +1,32 @@
 // ISO C9x  compliant stdint.h for Microsoft Visual Studio
-// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 
-// 
+// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124
+//
 //  Copyright (c) 2006-2008 Alexander Chemeris
-// 
+//
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are met:
-// 
+//
 //   1. Redistributions of source code must retain the above copyright notice,
 //      this list of conditions and the following disclaimer.
-// 
+//
 //   2. Redistributions in binary form must reproduce the above copyright
 //      notice, this list of conditions and the following disclaimer in the
 //      documentation and/or other materials provided with the distribution.
-// 
+//
 //   3. The name of the author may be used to endorse or promote products
 //      derived from this software without specific prior written permission.
-// 
+//
 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
 // WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 // MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
 // EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
-// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
+// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-// 
+//
 ///////////////////////////////////////////////////////////////////////////////
 
 #ifndef _MSC_VER // [
diff --git a/scripts/check_trailing_whitespace.sh b/scripts/check_trailing_whitespace.sh
new file mode 100755
index 00000000..7aafe131
--- /dev/null
+++ b/scripts/check_trailing_whitespace.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+
+if git grep -E '\s+$' -- ':!*.md' ':!build-aux/install-sh'
+then
+	echo 'Error: found trailing whitespace' 1>&2
+	exit 1
+fi
diff --git a/test/include/test/SFMT-params.h b/test/include/test/SFMT-params.h
index ade66222..6730adf8 100644
--- a/test/include/test/SFMT-params.h
+++ b/test/include/test/SFMT-params.h
@@ -45,7 +45,7 @@
 /*-----------------
   BASIC DEFINITIONS
   -----------------*/
-/** Mersenne Exponent. The period of the sequence 
+/** Mersenne Exponent. The period of the sequence
  *  is a multiple of 2^MEXP-1.
  * #define MEXP 19937 */
 /** SFMT generator has an internal state array of 128-bit integers,
@@ -63,25 +63,25 @@
   following definitions are in paramsXXXX.h file.
   ----------------------*/
 /** the pick up position of the array.
-#define POS1 122 
+#define POS1 122
 */
 
 /** the parameter of shift left as four 32-bit registers.
 #define SL1 18
  */
 
-/** the parameter of shift left as one 128-bit register. 
- * The 128-bit integer is shifted by (SL2 * 8) bits. 
-#define SL2 1 
+/** the parameter of shift left as one 128-bit register.
+ * The 128-bit integer is shifted by (SL2 * 8) bits.
+#define SL2 1
 */
 
 /** the parameter of shift right as four 32-bit registers.
 #define SR1 11
 */
 
-/** the parameter of shift right as one 128-bit register. 
- * The 128-bit integer is shifted by (SL2 * 8) bits. 
-#define SR2 1 
+/** the parameter of shift right as one 128-bit register.
+ * The 128-bit integer is shifted by (SL2 * 8) bits.
+#define SR2 1
 */
 
 /** A bitmask, used in the recursion.  These parameters are introduced
@@ -89,7 +89,7 @@
 #define MSK1 0xdfffffefU
 #define MSK2 0xddfecb7fU
 #define MSK3 0xbffaffffU
-#define MSK4 0xbffffff6U 
+#define MSK4 0xbffffff6U
 */
 
 /** These definitions are part of a 128-bit period certification vector.
diff --git a/test/include/test/SFMT.h b/test/include/test/SFMT.h
index 863fc55e..338dd45c 100644
--- a/test/include/test/SFMT.h
+++ b/test/include/test/SFMT.h
@@ -33,8 +33,8 @@
  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
-/** 
- * @file SFMT.h 
+/**
+ * @file SFMT.h
  *
  * @brief SIMD oriented Fast Mersenne Twister(SFMT) pseudorandom
  * number generator
@@ -53,7 +53,7 @@
  * and you have to define PRIu64 and PRIx64 in this file as follows:
  * @verbatim
  typedef unsigned int uint32_t
- typedef unsigned long long uint64_t  
+ typedef unsigned long long uint64_t
  #define PRIu64 "llu"
  #define PRIx64 "llx"
 @endverbatim
@@ -84,8 +84,8 @@ int get_min_array_size64(void);
 /* These real versions are due to Isaku Wada */
 /** generates a random number on [0,1]-real-interval */
 static inline double to_real1(uint32_t v) {
-    return v * (1.0/4294967295.0); 
-    /* divided by 2^32-1 */ 
+    return v * (1.0/4294967295.0);
+    /* divided by 2^32-1 */
 }
 
 /** generates a random number on [0,1]-real-interval */
@@ -95,7 +95,7 @@ static inline double genrand_real1(sfmt_t *ctx) {
 
 /** generates a random number on [0,1)-real-interval */
 static inline double to_real2(uint32_t v) {
-    return v * (1.0/4294967296.0); 
+    return v * (1.0/4294967296.0);
     /* divided by 2^32 */
 }
 
@@ -106,7 +106,7 @@ static inline double genrand_real2(sfmt_t *ctx) {
 
 /** generates a random number on (0,1)-real-interval */
 static inline double to_real3(uint32_t v) {
-    return (((double)v) + 0.5)*(1.0/4294967296.0); 
+    return (((double)v) + 0.5)*(1.0/4294967296.0);
     /* divided by 2^32 */
 }
 
diff --git a/test/include/test/arena_util.h b/test/include/test/arena_util.h
index 9a41dacb..535c1aa1 100644
--- a/test/include/test/arena_util.h
+++ b/test/include/test/arena_util.h
@@ -26,7 +26,7 @@ do_arena_create(ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
 
 static inline void
 do_arena_destroy(unsigned arena_ind) {
-	/* 
+	/*
 	 * For convenience, flush tcache in case there are cached items.
 	 * However not assert success since the tcache may be disabled.
 	 */
diff --git a/test/src/timer.c b/test/src/timer.c
index 0f39d5f6..94528a34 100644
--- a/test/src/timer.c
+++ b/test/src/timer.c
@@ -28,12 +28,12 @@ timer_ratio(timedelta_t *a, timedelta_t *b, char *buf, size_t buflen) {
 	size_t i = 0;
 	size_t j, n;
 
-	/* 
- 	* The time difference could be 0 if the two clock readings are 
+	/*
+ 	* The time difference could be 0 if the two clock readings are
  	* identical, either due to the operations being measured in the middle
- 	* took very little time (or even got optimized away), or the clock 
+ 	* took very little time (or even got optimized away), or the clock
  	* readings are bad / very coarse grained clock.
- 	* Thus, bump t1 if it is 0 to avoid dividing 0. 
+ 	* Thus, bump t1 if it is 0 to avoid dividing 0.
  	*/
 	if (t1 == 0) {
 	    t1 = 1;
diff --git a/test/stress/cpp/microbench.cpp b/test/stress/cpp/microbench.cpp
index 203c3dc9..7422d1ca 100644
--- a/test/stress/cpp/microbench.cpp
+++ b/test/stress/cpp/microbench.cpp
@@ -52,15 +52,15 @@ malloc_sdallocx(void) {
 #endif
 
 TEST_BEGIN(test_free_vs_delete) {
-	compare_funcs(10*1000*1000, 100*1000*1000, 
-	    "malloc_free", (void *)malloc_free, 
+	compare_funcs(10*1000*1000, 100*1000*1000,
+	    "malloc_free", (void *)malloc_free,
 	    "new_delete", (void *)new_delete);
 }
 TEST_END
 
 TEST_BEGIN(test_free_array_vs_delete_array) {
-	compare_funcs(10*1000*1000, 100*1000*1000, 
-	    "malloc_free_array", (void *)malloc_free_array, 
+	compare_funcs(10*1000*1000, 100*1000*1000,
+	    "malloc_free_array", (void *)malloc_free_array,
 	    "delete_array", (void *)new_delete_array);
 }
 TEST_END
@@ -68,8 +68,8 @@ TEST_END
 
 TEST_BEGIN(test_sized_delete_vs_sdallocx) {
 #if __cpp_sized_deallocation >= 201309
-	compare_funcs(10*1000*1000, 100*1000*1000, 
-	    "new_size_delete", (void *)new_sized_delete, 
+	compare_funcs(10*1000*1000, 100*1000*1000,
+	    "new_size_delete", (void *)new_sized_delete,
 	    "malloc_sdallocx", (void *)malloc_sdallocx);
 #else
 	malloc_printf("Skipping test_sized_delete_vs_sdallocx since \

From 46e464a26bcf83c414db489c23236663ee570260 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Fri, 23 Jun 2023 13:30:16 -0700
Subject: [PATCH 089/395] Fix downloading LLVM in GitHub Action

It turns out LLVM does not include a build for every platform in the
assets for every release, just some of them. As such, I've pinned us to
the latest release version with a corresponding build.
---
 .github/workflows/static_analysis.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/static_analysis.yaml b/.github/workflows/static_analysis.yaml
index 547b1564..df60b5a1 100644
--- a/.github/workflows/static_analysis.yaml
+++ b/.github/workflows/static_analysis.yaml
@@ -31,7 +31,7 @@ jobs:
         uses: dsaltares/fetch-gh-release-asset@master
         with:
           repo: 'llvm/llvm-project'
-          version: 'latest'
+          version: 'tags/llvmorg-16.0.4'
           file: 'clang[+]llvm-.*x86_64-linux-gnu.*'
           regex: true
           target: 'llvm_assets/'

From d1313313101f9df127bba08bf8fd90a849bf3b87 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 23 Jun 2023 14:13:26 -0700
Subject: [PATCH 090/395] Avoid eager purging on the dedicated oversize arena
 when using bg thds.

We have observed new workload patterns (namely ML training type) that cycle
through oversized allocations frequently, because 1) the dataset might be sparse
which is faster to go through, and 2) GPU accelerated.  As a result, the eager
purging from the oversize arena becomes a bottleneck.  To offer an easy
solution, allow normal purging of the oversized extents when background threads
are enabled.
---
 src/arena.c                    | 61 +++++++++++++++++++++-------------
 src/extent.c                   |  1 +
 test/unit/oversize_threshold.c |  5 ++-
 3 files changed, 42 insertions(+), 25 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index ab1a9ab8..3b151b77 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1730,6 +1730,42 @@ label_error:
 	return NULL;
 }
 
+static arena_t *
+arena_create_huge_arena(tsd_t *tsd, unsigned ind) {
+	assert(ind != 0);
+
+	arena_t *huge_arena = arena_get(tsd_tsdn(tsd), ind, true);
+	if (huge_arena == NULL) {
+		return NULL;
+	}
+
+	char *huge_arena_name = "auto_oversize";
+	strncpy(huge_arena->name, huge_arena_name, ARENA_NAME_LEN);
+	huge_arena->name[ARENA_NAME_LEN - 1] = '\0';
+
+	/*
+	 * Purge eagerly for huge allocations, because: 1) number of huge
+	 * allocations is usually small, which means ticker based decay is not
+	 * reliable; and 2) less immediate reuse is expected for huge
+	 * allocations.
+	 *
+	 * However, with background threads enabled, keep normal purging since
+	 * the purging delay is bounded.
+	 */
+	if (!background_thread_enabled()
+	    && arena_dirty_decay_ms_default_get() > 0) {
+		arena_decay_ms_set(tsd_tsdn(tsd), huge_arena,
+		    extent_state_dirty, 0);
+	}
+	if (!background_thread_enabled()
+	    &&arena_muzzy_decay_ms_default_get() > 0) {
+		arena_decay_ms_set(tsd_tsdn(tsd), huge_arena,
+		    extent_state_muzzy, 0);
+	}
+
+	return huge_arena;
+}
+
 arena_t *
 arena_choose_huge(tsd_t *tsd) {
 	/* huge_arena_ind can be 0 during init (will use a0). */
@@ -1740,30 +1776,7 @@ arena_choose_huge(tsd_t *tsd) {
 	arena_t *huge_arena = arena_get(tsd_tsdn(tsd), huge_arena_ind, false);
 	if (huge_arena == NULL) {
 		/* Create the huge arena on demand. */
-		assert(huge_arena_ind != 0);
-		huge_arena = arena_get(tsd_tsdn(tsd), huge_arena_ind, true);
-		if (huge_arena == NULL) {
-			return NULL;
-		}
-
-		char *huge_arena_name = "auto_oversize";
-		strncpy(huge_arena->name, huge_arena_name, ARENA_NAME_LEN);
-		huge_arena->name[ARENA_NAME_LEN - 1] = '\0';
-
-		/*
-		 * Purge eagerly for huge allocations, because: 1) number of
-		 * huge allocations is usually small, which means ticker based
-		 * decay is not reliable; and 2) less immediate reuse is
-		 * expected for huge allocations.
-		 */
-		if (arena_dirty_decay_ms_default_get() > 0) {
-			arena_decay_ms_set(tsd_tsdn(tsd), huge_arena,
-			    extent_state_dirty, 0);
-		}
-		if (arena_muzzy_decay_ms_default_get() > 0) {
-			arena_decay_ms_set(tsd_tsdn(tsd), huge_arena,
-			    extent_state_muzzy, 0);
-		}
+		huge_arena = arena_create_huge_arena(tsd, huge_arena_ind);
 	}
 
 	return huge_arena;
diff --git a/src/extent.c b/src/extent.c
index 18e4698c..477050b6 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -944,6 +944,7 @@ extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 		} while (coalesced);
 		if (edata_size_get(edata) >=
 		    atomic_load_zu(&pac->oversize_threshold, ATOMIC_RELAXED)
+		    && !background_thread_enabled()
 		    && extent_may_force_decay(pac)) {
 			/* Shortcut to purge the oversize extent eagerly. */
 			malloc_mutex_unlock(tsdn, &ecache->mtx);
diff --git a/test/unit/oversize_threshold.c b/test/unit/oversize_threshold.c
index 44a8f76a..95ce6537 100644
--- a/test/unit/oversize_threshold.c
+++ b/test/unit/oversize_threshold.c
@@ -120,7 +120,10 @@ TEST_BEGIN(test_oversize_threshold) {
 	 */
 	ptr = mallocx(2 * 1024 * 1024, MALLOCX_ARENA(arena));
 	dallocx(ptr, MALLOCX_TCACHE_NONE);
-	expect_zu_ge(max_purged, 2 * 1024 * 1024, "Expected a 2MB purge");
+	if (!is_background_thread_enabled()) {
+		expect_zu_ge(max_purged, 2 * 1024 * 1024,
+		    "Expected a 2MB purge");
+	}
 }
 TEST_END
 

From e1338703efb77f7d276ee65121fa63bb66ede239 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Mon, 3 Jul 2023 14:45:28 -0700
Subject: [PATCH 091/395] Address compiler warnings in the unit tests

---
 test/unit/hpa.c      | 2 ++
 test/unit/retained.c | 2 --
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index dfd57f39..f7874281 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -32,6 +32,8 @@ static hpa_shard_opts_t test_hpa_shard_opts_default = {
 	false,
 	/* hugify_delay_ms */
 	10 * 1000,
+	/* min_purge_interval_ms */
+	5 * 1000,
 };
 
 static hpa_shard_t *
diff --git a/test/unit/retained.c b/test/unit/retained.c
index aa9f6847..340f2d38 100644
--- a/test/unit/retained.c
+++ b/test/unit/retained.c
@@ -144,7 +144,6 @@ TEST_BEGIN(test_retained) {
 
 		arena_t *arena = arena_get(tsdn_fetch(), arena_ind, false);
 		size_t usable = 0;
-		size_t fragmented = 0;
 		for (pszind_t pind = sz_psz2ind(HUGEPAGE); pind <
 		    arena->pa_shard.pac.exp_grow.next; pind++) {
 			size_t psz = sz_pind2sz(pind);
@@ -158,7 +157,6 @@ TEST_BEGIN(test_retained) {
 				    "Excessive retained memory "
 				    "(%#zx[+%#zx] > %#zx)", usable, psz_usable,
 				    allocated);
-				fragmented += psz_fragmented;
 				usable += psz_usable;
 			}
 		}

From 5a858c64d6f049c64c11baf907ab8655e6ed72a3 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Fri, 2 Jun 2023 15:15:37 -0700
Subject: [PATCH 092/395] Reduce the memory overhead of sampled small
 allocations

Previously, small allocations which were sampled as part of heap
profiling were rounded up to `SC_LARGE_MINCLASS`. This additional memory
usage becomes problematic when the page size is increased, as noted in #2358.

Small allocations are now rounded up to the nearest multiple of `PAGE`
instead, reducing the memory overhead by a factor of 4 in the most
extreme cases.
---
 include/jemalloc/internal/arena_externs.h     |  9 +--
 include/jemalloc/internal/arena_inlines_b.h   | 11 ++-
 .../internal/jemalloc_internal_inlines_c.h    | 69 +++++++++++++-----
 include/jemalloc/internal/pages.h             |  3 +
 include/jemalloc/internal/prof_inlines.h      |  9 +--
 include/jemalloc/internal/prof_types.h        |  8 +++
 include/jemalloc/internal/safety_check.h      | 40 +++++++++--
 include/jemalloc/internal/sz.h                | 15 ++++
 include/jemalloc/internal/tcache_inlines.h    |  2 +-
 src/arena.c                                   | 64 +++++++++--------
 src/jemalloc.c                                | 72 ++++++++++---------
 src/pages.c                                   |  2 +-
 12 files changed, 206 insertions(+), 98 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 3821233f..2d82ad8f 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -65,10 +65,11 @@ void arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena,
     const unsigned nfill);
 
 void *arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size,
-    szind_t ind, bool zero);
+    szind_t ind, bool zero, bool slab);
 void *arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize,
-    size_t alignment, bool zero, tcache_t *tcache);
-void arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize);
+    size_t alignment, bool zero, bool slab, tcache_t *tcache);
+void arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize,
+    size_t bumped_usize);
 void arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
     bool slow_path);
 void arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, edata_t *slab);
@@ -81,7 +82,7 @@ void arena_dalloc_small(tsdn_t *tsdn, void *ptr);
 bool arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
     size_t extra, bool zero, size_t *newsize);
 void *arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
-    size_t size, size_t alignment, bool zero, tcache_t *tcache,
+    size_t size, size_t alignment, bool zero, bool slab, tcache_t *tcache,
     hook_ralloc_args_t *hook_args);
 dss_prec_t arena_dss_prec_get(arena_t *arena);
 ehooks_t *arena_get_ehooks(arena_t *arena);
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index bf25a31c..420a62b2 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -182,23 +182,22 @@ arena_decay_tick(tsdn_t *tsdn, arena_t *arena) {
 
 JEMALLOC_ALWAYS_INLINE void *
 arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero,
-    tcache_t *tcache, bool slow_path) {
+    bool slab, tcache_t *tcache, bool slow_path) {
 	assert(!tsdn_null(tsdn) || tcache == NULL);
 
 	if (likely(tcache != NULL)) {
-		if (likely(size <= SC_SMALL_MAXCLASS)) {
+		if (likely(slab)) {
+			assert(sz_can_use_slab(size));
 			return tcache_alloc_small(tsdn_tsd(tsdn), arena,
 			    tcache, size, ind, zero, slow_path);
-		}
-		if (likely(size <= tcache_maxclass)) {
+		} else if (likely(size <= tcache_maxclass)) {
 			return tcache_alloc_large(tsdn_tsd(tsdn), arena,
 			    tcache, size, ind, zero, slow_path);
 		}
 		/* (size > tcache_maxclass) case falls through. */
-		assert(size > tcache_maxclass);
 	}
 
-	return arena_malloc_hard(tsdn, arena, size, ind, zero);
+	return arena_malloc_hard(tsdn, arena, size, ind, zero, slab);
 }
 
 JEMALLOC_ALWAYS_INLINE arena_t *
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index 206f1400..ae9cb0c2 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -52,10 +52,12 @@ isalloc(tsdn_t *tsdn, const void *ptr) {
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache,
-    bool is_internal, arena_t *arena, bool slow_path) {
+iallocztm_explicit_slab(tsdn_t *tsdn, size_t size, szind_t ind, bool zero,
+    bool slab, tcache_t *tcache, bool is_internal, arena_t *arena,
+    bool slow_path) {
 	void *ret;
 
+	assert(!slab || sz_can_use_slab(size)); /* slab && large is illegal */
 	assert(!is_internal || tcache == NULL);
 	assert(!is_internal || arena == NULL || arena_is_auto(arena));
 	if (!tsdn_null(tsdn) && tsd_reentrancy_level_get(tsdn_tsd(tsdn)) == 0) {
@@ -63,13 +65,21 @@ iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache,
 		    WITNESS_RANK_CORE, 0);
 	}
 
-	ret = arena_malloc(tsdn, arena, size, ind, zero, tcache, slow_path);
+	ret = arena_malloc(tsdn, arena, size, ind, zero, slab, tcache, slow_path);
 	if (config_stats && is_internal && likely(ret != NULL)) {
 		arena_internal_add(iaalloc(tsdn, ret), isalloc(tsdn, ret));
 	}
 	return ret;
 }
 
+JEMALLOC_ALWAYS_INLINE void *
+iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache,
+    bool is_internal, arena_t *arena, bool slow_path) {
+	bool slab = sz_can_use_slab(size);
+	return iallocztm_explicit_slab(tsdn, size, ind, zero, slab, tcache,
+	    is_internal, arena, slow_path);
+}
+
 JEMALLOC_ALWAYS_INLINE void *
 ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero, bool slow_path) {
 	return iallocztm(tsd_tsdn(tsd), size, ind, zero, tcache_get(tsd), false,
@@ -77,10 +87,11 @@ ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero, bool slow_path) {
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
-    tcache_t *tcache, bool is_internal, arena_t *arena) {
+ipallocztm_explicit_slab(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
+    bool slab, tcache_t *tcache, bool is_internal, arena_t *arena) {
 	void *ret;
 
+	assert(!slab || sz_can_use_slab(usize)); /* slab && large is illegal */
 	assert(usize != 0);
 	assert(usize == sz_sa2u(usize, alignment));
 	assert(!is_internal || tcache == NULL);
@@ -88,7 +99,7 @@ ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	ret = arena_palloc(tsdn, arena, usize, alignment, zero, tcache);
+	ret = arena_palloc(tsdn, arena, usize, alignment, zero, slab, tcache);
 	assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret);
 	if (config_stats && is_internal && likely(ret != NULL)) {
 		arena_internal_add(iaalloc(tsdn, ret), isalloc(tsdn, ret));
@@ -96,12 +107,26 @@ ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
 	return ret;
 }
 
+JEMALLOC_ALWAYS_INLINE void *
+ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
+    tcache_t *tcache, bool is_internal, arena_t *arena) {
+	return ipallocztm_explicit_slab(tsdn, usize, alignment, zero,
+	    sz_can_use_slab(usize), tcache, is_internal, arena);
+}
+
 JEMALLOC_ALWAYS_INLINE void *
 ipalloct(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
     tcache_t *tcache, arena_t *arena) {
 	return ipallocztm(tsdn, usize, alignment, zero, tcache, false, arena);
 }
 
+JEMALLOC_ALWAYS_INLINE void *
+ipalloct_explicit_slab(tsdn_t *tsdn, size_t usize, size_t alignment,
+    bool zero, bool slab, tcache_t *tcache, arena_t *arena) {
+	return ipallocztm_explicit_slab(tsdn, usize, alignment, zero, slab,
+	    tcache, false, arena);
+}
+
 JEMALLOC_ALWAYS_INLINE void *
 ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero) {
 	return ipallocztm(tsd_tsdn(tsd), usize, alignment, zero,
@@ -146,7 +171,7 @@ isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 
 JEMALLOC_ALWAYS_INLINE void *
 iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
-    size_t alignment, bool zero, tcache_t *tcache, arena_t *arena,
+    size_t alignment, bool zero, bool slab, tcache_t *tcache, arena_t *arena,
     hook_ralloc_args_t *hook_args) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
@@ -157,7 +182,8 @@ iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
 	if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) {
 		return NULL;
 	}
-	p = ipalloct(tsdn, usize, alignment, zero, tcache, arena);
+	p = ipalloct_explicit_slab(tsdn, usize, alignment, zero, slab,
+	    tcache, arena);
 	if (p == NULL) {
 		return NULL;
 	}
@@ -184,8 +210,9 @@ iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
  * passed-around anywhere.
  */
 JEMALLOC_ALWAYS_INLINE void *
-iralloct(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t alignment,
-    bool zero, tcache_t *tcache, arena_t *arena, hook_ralloc_args_t *hook_args)
+iralloct_explicit_slab(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
+    size_t alignment, bool zero, bool slab, tcache_t *tcache, arena_t *arena,
+    hook_ralloc_args_t *hook_args)
 {
 	assert(ptr != NULL);
 	assert(size != 0);
@@ -199,18 +226,28 @@ iralloct(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t alignment,
 		 * and copy.
 		 */
 		return iralloct_realign(tsdn, ptr, oldsize, size, alignment,
-		    zero, tcache, arena, hook_args);
+		    zero, slab, tcache, arena, hook_args);
 	}
 
 	return arena_ralloc(tsdn, arena, ptr, oldsize, size, alignment, zero,
-	    tcache, hook_args);
+	    slab, tcache, hook_args);
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+iralloct(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t alignment,
+    size_t usize, bool zero, tcache_t *tcache, arena_t *arena,
+    hook_ralloc_args_t *hook_args)
+{
+	bool slab = sz_can_use_slab(usize);
+	return iralloct_explicit_slab(tsdn, ptr, oldsize, size, alignment, zero,
+	    slab, tcache, arena, hook_args);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
 iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment,
-    bool zero, hook_ralloc_args_t *hook_args) {
-	return iralloct(tsd_tsdn(tsd), ptr, oldsize, size, alignment, zero,
-	    tcache_get(tsd), NULL, hook_args);
+    size_t usize, bool zero, hook_ralloc_args_t *hook_args) {
+	return iralloct(tsd_tsdn(tsd), ptr, oldsize, size, alignment, usize,
+	    zero, tcache_get(tsd), NULL, hook_args);
 }
 
 JEMALLOC_ALWAYS_INLINE bool
@@ -405,7 +442,7 @@ maybe_check_alloc_ctx(tsd_t *tsd, void *ptr, emap_alloc_ctx_t *alloc_ctx) {
 
 JEMALLOC_ALWAYS_INLINE bool
 prof_sample_aligned(const void *ptr) {
-        return ((uintptr_t)ptr & PAGE_MASK) == 0;
+	return ((uintptr_t)ptr & PROF_SAMPLE_ALIGNMENT_MASK) == 0;
 }
 
 JEMALLOC_ALWAYS_INLINE bool
diff --git a/include/jemalloc/internal/pages.h b/include/jemalloc/internal/pages.h
index ad1f606a..361de587 100644
--- a/include/jemalloc/internal/pages.h
+++ b/include/jemalloc/internal/pages.h
@@ -1,6 +1,9 @@
 #ifndef JEMALLOC_INTERNAL_PAGES_EXTERNS_H
 #define JEMALLOC_INTERNAL_PAGES_EXTERNS_H
 
+/* Actual operating system page size, detected during bootstrap, <= PAGE. */
+extern size_t	os_page;
+
 /* Page size.  LG_PAGE is determined by the configure script. */
 #ifdef PAGE_MASK
 #  undef PAGE_MASK
diff --git a/include/jemalloc/internal/prof_inlines.h b/include/jemalloc/internal/prof_inlines.h
index b74b115c..b5273010 100644
--- a/include/jemalloc/internal/prof_inlines.h
+++ b/include/jemalloc/internal/prof_inlines.h
@@ -239,14 +239,15 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t size, size_t usize,
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-prof_sample_align(size_t orig_align) {
+prof_sample_align(size_t usize, size_t orig_align) {
 	/*
-	 * Enforce page alignment, so that sampled allocations can be identified
+	 * Enforce alignment, so that sampled allocations can be identified
 	 * w/o metadata lookup.
 	 */
 	assert(opt_prof);
-	return (opt_cache_oblivious && orig_align < PAGE) ? PAGE :
-	    orig_align;
+	return (orig_align < PROF_SAMPLE_ALIGNMENT &&
+	       (sz_can_use_slab(usize) || opt_cache_oblivious)) ?
+	           PROF_SAMPLE_ALIGNMENT : orig_align;
 }
 
 JEMALLOC_ALWAYS_INLINE bool
diff --git a/include/jemalloc/internal/prof_types.h b/include/jemalloc/internal/prof_types.h
index 104f7e61..046ea204 100644
--- a/include/jemalloc/internal/prof_types.h
+++ b/include/jemalloc/internal/prof_types.h
@@ -80,4 +80,12 @@ typedef struct prof_recent_s prof_recent_t;
 /* Thread name storage size limit. */
 #define PROF_THREAD_NAME_MAX_LEN 16
 
+/*
+ * Minimum required alignment for sampled allocations. Over-aligning sampled
+ * allocations allows us to quickly identify them on the dalloc path without
+ * resorting to metadata lookup.
+ */
+#define PROF_SAMPLE_ALIGNMENT PAGE
+#define PROF_SAMPLE_ALIGNMENT_MASK PAGE_MASK
+
 #endif /* JEMALLOC_INTERNAL_PROF_TYPES_H */
diff --git a/include/jemalloc/internal/safety_check.h b/include/jemalloc/internal/safety_check.h
index 900cfa55..7854c1bf 100644
--- a/include/jemalloc/internal/safety_check.h
+++ b/include/jemalloc/internal/safety_check.h
@@ -3,6 +3,8 @@
 
 #define SAFETY_CHECK_DOUBLE_FREE_MAX_SCAN_DEFAULT 32
 
+#include "jemalloc/internal/pages.h"
+
 void safety_check_fail_sized_dealloc(bool current_dealloc, const void *ptr,
     size_t true_size, size_t input_size);
 void safety_check_fail(const char *format, ...);
@@ -12,22 +14,50 @@ typedef void (*safety_check_abort_hook_t)(const char *message);
 /* Can set to NULL for a default. */
 void safety_check_set_abort(safety_check_abort_hook_t abort_fn);
 
+#define REDZONE_SIZE ((size_t) 32)
+#define REDZONE_FILL_VALUE 0xBC
+
+/*
+ * Normally the redzone extends `REDZONE_SIZE` bytes beyond the end of
+ * the allocation. However, we don't let the redzone extend onto another
+ * OS page because this would impose additional overhead if that page was
+ * not already resident in memory.
+ */
+JEMALLOC_ALWAYS_INLINE const unsigned char *
+compute_redzone_end(const void *_ptr, size_t usize, size_t bumped_usize) {
+	const unsigned char *ptr = (const unsigned char *) _ptr;
+	const unsigned char *redzone_end = usize + REDZONE_SIZE < bumped_usize ?
+	    &ptr[usize + REDZONE_SIZE] : &ptr[bumped_usize];
+	const unsigned char *page_end = (const unsigned char *)
+	    ALIGNMENT_CEILING(((uintptr_t) (&ptr[usize])), os_page);
+	return redzone_end < page_end ? redzone_end : page_end;
+}
+
 JEMALLOC_ALWAYS_INLINE void
 safety_check_set_redzone(void *ptr, size_t usize, size_t bumped_usize) {
-	assert(usize < bumped_usize);
-	for (size_t i = usize; i < bumped_usize && i < usize + 32; ++i) {
-		*((unsigned char *)ptr + i) = 0xBC;
+	assert(usize <= bumped_usize);
+	const unsigned char *redzone_end =
+		compute_redzone_end(ptr, usize, bumped_usize);
+	for (unsigned char *curr = &((unsigned char *)ptr)[usize];
+	     curr < redzone_end; curr++) {
+		*curr = REDZONE_FILL_VALUE;
 	}
 }
 
 JEMALLOC_ALWAYS_INLINE void
 safety_check_verify_redzone(const void *ptr, size_t usize, size_t bumped_usize)
 {
-	for (size_t i = usize; i < bumped_usize && i < usize + 32; ++i) {
-		if (unlikely(*((unsigned char *)ptr + i) != 0xBC)) {
+	const unsigned char *redzone_end =
+		compute_redzone_end(ptr, usize, bumped_usize);
+	for (const unsigned char *curr= &((const unsigned char *)ptr)[usize];
+	     curr < redzone_end; curr++) {
+		if (unlikely(*curr != REDZONE_FILL_VALUE)) {
 			safety_check_fail("Use after free error\n");
 		}
 	}
 }
 
+#undef REDZONE_SIZE
+#undef REDZONE_FILL_VALUE
+
 #endif /*JEMALLOC_INTERNAL_SAFETY_CHECK_H */
diff --git a/include/jemalloc/internal/sz.h b/include/jemalloc/internal/sz.h
index 3c0fc1da..a799cea9 100644
--- a/include/jemalloc/internal/sz.h
+++ b/include/jemalloc/internal/sz.h
@@ -365,6 +365,21 @@ sz_sa2u(size_t size, size_t alignment) {
 	return usize;
 }
 
+/*
+ * Under normal circumstances, whether or not to use a slab
+ * to satisfy an allocation depends solely on the allocation's
+ * effective size. However, this is *not* the case when an allocation
+ * is sampled for profiling, in which case you *must not* use a slab
+ * regardless of the effective size. Thus `sz_can_use_slab` is called
+ * on the common path, but there exist `*_explicit_slab` variants of
+ * several functions for handling the aforementioned case of
+ * sampled allocations.
+ */
+JEMALLOC_ALWAYS_INLINE bool
+sz_can_use_slab(size_t size) {
+	return size <= SC_SMALL_MAXCLASS;
+}
+
 size_t sz_psz_quantize_floor(size_t size);
 size_t sz_psz_quantize_ceil(size_t size);
 
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 2634f145..2b8db0a3 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -60,7 +60,7 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
 		if (unlikely(tcache_small_bin_disabled(binind, bin))) {
 			/* stats and zero are handled directly by the arena. */
 			return arena_malloc_hard(tsd_tsdn(tsd), arena, size,
-			    binind, zero);
+			    binind, zero, /* slab */ true);
 		}
 		tcache_bin_flush_stashed(tsd, tcache, bin, binind,
 		    /* is_small */ true);
diff --git a/src/arena.c b/src/arena.c
index 3b151b77..b154b7a5 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1191,7 +1191,7 @@ arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) {
 
 void *
 arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind,
-    bool zero) {
+    bool zero, bool slab) {
 	assert(!tsdn_null(tsdn) || arena != NULL);
 
 	if (likely(!tsdn_null(tsdn))) {
@@ -1201,18 +1201,19 @@ arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind,
 		return NULL;
 	}
 
-	if (likely(size <= SC_SMALL_MAXCLASS)) {
+	if (likely(slab)) {
+		assert(sz_can_use_slab(size));
 		return arena_malloc_small(tsdn, arena, ind, zero);
+	} else {
+		return large_malloc(tsdn, arena, sz_index2size(ind), zero);
 	}
-	return large_malloc(tsdn, arena, sz_index2size(ind), zero);
 }
 
 void *
 arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
-    bool zero, tcache_t *tcache) {
-	void *ret;
-
-	if (usize <= SC_SMALL_MAXCLASS) {
+    bool zero, bool slab, tcache_t *tcache) {
+	if (slab) {
+		assert(sz_can_use_slab(usize));
 		/* Small; alignment doesn't require special slab placement. */
 
 		/* usize should be a result of sz_sa2u() */
@@ -1223,27 +1224,26 @@ arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 		 */
 		assert(alignment <= PAGE);
 
-		ret = arena_malloc(tsdn, arena, usize, sz_size2index(usize),
-		    zero, tcache, true);
+		return arena_malloc(tsdn, arena, usize, sz_size2index(usize),
+		    zero, slab, tcache, true);
 	} else {
 		if (likely(alignment <= CACHELINE)) {
-			ret = large_malloc(tsdn, arena, usize, zero);
+			return large_malloc(tsdn, arena, usize, zero);
 		} else {
-			ret = large_palloc(tsdn, arena, usize, alignment, zero);
+			return large_palloc(tsdn, arena, usize, alignment, zero);
 		}
 	}
-	return ret;
 }
 
 void
-arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize) {
+arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize, size_t bumped_usize) {
 	cassert(config_prof);
 	assert(ptr != NULL);
-	assert(isalloc(tsdn, ptr) == SC_LARGE_MINCLASS);
-	assert(usize <= SC_SMALL_MAXCLASS);
+	assert(isalloc(tsdn, ptr) == bumped_usize);
+	assert(sz_can_use_slab(usize));
 
 	if (config_opt_safety_checks) {
-		safety_check_set_redzone(ptr, usize, SC_LARGE_MINCLASS);
+		safety_check_set_redzone(ptr, usize, bumped_usize);
 	}
 
 	edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
@@ -1259,13 +1259,19 @@ static size_t
 arena_prof_demote(tsdn_t *tsdn, edata_t *edata, const void *ptr) {
 	cassert(config_prof);
 	assert(ptr != NULL);
+	size_t usize = isalloc(tsdn, ptr);
+	size_t bumped_usize = sz_sa2u(usize, PROF_SAMPLE_ALIGNMENT);
+	assert(bumped_usize <= SC_LARGE_MINCLASS &&
+	    PAGE_CEILING(bumped_usize) == bumped_usize);
+	assert(edata_size_get(edata) - bumped_usize <= sz_large_pad);
+	szind_t szind = sz_size2index(bumped_usize);
 
-	edata_szind_set(edata, SC_NBINS);
-	emap_remap(tsdn, &arena_emap_global, edata, SC_NBINS, /* slab */ false);
+	edata_szind_set(edata, szind);
+	emap_remap(tsdn, &arena_emap_global, edata, szind, /* slab */ false);
 
-	assert(isalloc(tsdn, ptr) == SC_LARGE_MINCLASS);
+	assert(isalloc(tsdn, ptr) == bumped_usize);
 
-	return SC_LARGE_MINCLASS;
+	return bumped_usize;
 }
 
 void
@@ -1282,10 +1288,10 @@ arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 		 * Currently, we only do redzoning for small sampled
 		 * allocations.
 		 */
-		assert(bumped_usize == SC_LARGE_MINCLASS);
 		safety_check_verify_redzone(ptr, usize, bumped_usize);
 	}
-	if (bumped_usize <= tcache_maxclass && tcache != NULL) {
+	if (bumped_usize >= SC_LARGE_MINCLASS &&
+	    bumped_usize <= tcache_maxclass && tcache != NULL) {
 		tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr,
 		    sz_size2index(bumped_usize), slow_path);
 	} else {
@@ -1443,28 +1449,30 @@ done:
 
 static void *
 arena_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize,
-    size_t alignment, bool zero, tcache_t *tcache) {
+    size_t alignment, bool zero, bool slab, tcache_t *tcache) {
 	if (alignment == 0) {
 		return arena_malloc(tsdn, arena, usize, sz_size2index(usize),
-		    zero, tcache, true);
+		    zero, slab, tcache, true);
 	}
 	usize = sz_sa2u(usize, alignment);
 	if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) {
 		return NULL;
 	}
-	return ipalloct(tsdn, usize, alignment, zero, tcache, arena);
+	return ipalloct_explicit_slab(tsdn, usize, alignment, zero, slab,
+	    tcache, arena);
 }
 
 void *
 arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
-    size_t size, size_t alignment, bool zero, tcache_t *tcache,
+    size_t size, size_t alignment, bool zero, bool slab, tcache_t *tcache,
     hook_ralloc_args_t *hook_args) {
 	size_t usize = alignment == 0 ? sz_s2u(size) : sz_sa2u(size, alignment);
 	if (unlikely(usize == 0 || size > SC_LARGE_MAXCLASS)) {
 		return NULL;
 	}
 
-	if (likely(usize <= SC_SMALL_MAXCLASS)) {
+	if (likely(slab)) {
+		assert(sz_can_use_slab(usize));
 		/* Try to avoid moving the allocation. */
 		UNUSED size_t newsize;
 		if (!arena_ralloc_no_move(tsdn, ptr, oldsize, usize, 0, zero,
@@ -1488,7 +1496,7 @@ arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
 	 * object.  In that case, fall back to allocating new space and copying.
 	 */
 	void *ret = arena_ralloc_move_helper(tsdn, arena, usize, alignment,
-	    zero, tcache);
+	    zero, slab, tcache);
 	if (ret == NULL) {
 		return NULL;
 	}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 88559be0..4e4e4bee 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2360,7 +2360,7 @@ arena_get_from_ind(tsd_t *tsd, unsigned arena_ind, arena_t **arena_p) {
 /* ind is ignored if dopts->alignment > 0. */
 JEMALLOC_ALWAYS_INLINE void *
 imalloc_no_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
-    size_t size, size_t usize, szind_t ind) {
+    size_t size, size_t usize, szind_t ind, bool slab) {
 	/* Fill in the tcache. */
 	tcache_t *tcache = tcache_get_from_ind(tsd, dopts->tcache_ind,
 	    sopts->slow, /* is_alloc */ true);
@@ -2372,12 +2372,12 @@ imalloc_no_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
 	}
 
 	if (unlikely(dopts->alignment != 0)) {
-		return ipalloct(tsd_tsdn(tsd), usize, dopts->alignment,
-		    dopts->zero, tcache, arena);
+		return ipalloct_explicit_slab(tsd_tsdn(tsd), usize,
+		    dopts->alignment, dopts->zero, slab, tcache, arena);
 	}
 
-	return iallocztm(tsd_tsdn(tsd), size, ind, dopts->zero, tcache, false,
-	    arena, sopts->slow);
+	return iallocztm_explicit_slab(tsd_tsdn(tsd), size, ind, dopts->zero,
+	    slab, tcache, false, arena, sopts->slow);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
@@ -2385,28 +2385,26 @@ imalloc_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
     size_t usize, szind_t ind) {
 	void *ret;
 
+	dopts->alignment = prof_sample_align(usize, dopts->alignment);
 	/*
-	 * For small allocations, sampling bumps the usize.  If so, we allocate
-	 * from the ind_large bucket.
+	 * If the allocation is small enough that it would normally be allocated
+	 * on a slab, we need to take additional steps to ensure that it gets
+	 * its own extent instead.
 	 */
-	szind_t ind_large;
-
-	dopts->alignment = prof_sample_align(dopts->alignment);
-	if (usize <= SC_SMALL_MAXCLASS) {
-		assert(((dopts->alignment == 0) ?
-		    sz_s2u(SC_LARGE_MINCLASS) :
-		    sz_sa2u(SC_LARGE_MINCLASS, dopts->alignment))
-			== SC_LARGE_MINCLASS);
-		ind_large = sz_size2index(SC_LARGE_MINCLASS);
-		size_t bumped_usize = sz_s2u(SC_LARGE_MINCLASS);
+	if (sz_can_use_slab(usize)) {
+		assert((dopts->alignment & PROF_SAMPLE_ALIGNMENT_MASK) == 0);
+		size_t bumped_usize = sz_sa2u(usize, dopts->alignment);
+		szind_t bumped_ind = sz_size2index(bumped_usize);
+		dopts->tcache_ind = TCACHE_IND_NONE;
 		ret = imalloc_no_sample(sopts, dopts, tsd, bumped_usize,
-		    bumped_usize, ind_large);
+		    bumped_usize, bumped_ind, /* slab */ false);
 		if (unlikely(ret == NULL)) {
 			return NULL;
 		}
-		arena_prof_promote(tsd_tsdn(tsd), ret, usize);
+		arena_prof_promote(tsd_tsdn(tsd), ret, usize, bumped_usize);
 	} else {
-		ret = imalloc_no_sample(sopts, dopts, tsd, usize, usize, ind);
+		ret = imalloc_no_sample(sopts, dopts, tsd, usize, usize, ind,
+		    /* slab */ false);
 	}
 	assert(prof_sample_aligned(ret));
 
@@ -2532,9 +2530,10 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 
 		emap_alloc_ctx_t alloc_ctx;
 		if (likely((uintptr_t)tctx == (uintptr_t)1U)) {
-			alloc_ctx.slab = (usize <= SC_SMALL_MAXCLASS);
+			alloc_ctx.slab = sz_can_use_slab(usize);
 			allocation = imalloc_no_sample(
-			    sopts, dopts, tsd, usize, usize, ind);
+			    sopts, dopts, tsd, usize, usize, ind,
+			    alloc_ctx.slab);
 		} else if ((uintptr_t)tctx > (uintptr_t)1U) {
 			allocation = imalloc_sample(
 			    sopts, dopts, tsd, usize, ind);
@@ -2551,7 +2550,7 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 	} else {
 		assert(!opt_prof);
 		allocation = imalloc_no_sample(sopts, dopts, tsd, size, usize,
-		    ind);
+		    ind, sz_can_use_slab(usize));
 		if (unlikely(allocation == NULL)) {
 			goto label_oom;
 		}
@@ -3314,18 +3313,25 @@ irallocx_prof_sample(tsdn_t *tsdn, void *old_ptr, size_t old_usize,
 		return NULL;
 	}
 
-	alignment = prof_sample_align(alignment);
-	if (usize <= SC_SMALL_MAXCLASS) {
-		p = iralloct(tsdn, old_ptr, old_usize,
-		    SC_LARGE_MINCLASS, alignment, zero, tcache,
-		    arena, hook_args);
+	alignment = prof_sample_align(usize, alignment);
+	/*
+	 * If the allocation is small enough that it would normally be allocated
+	 * on a slab, we need to take additional steps to ensure that it gets
+	 * its own extent instead.
+	 */
+	if (sz_can_use_slab(usize)) {
+		size_t bumped_usize = sz_sa2u(usize, alignment);
+		p = iralloct_explicit_slab(tsdn, old_ptr, old_usize,
+		    bumped_usize, alignment, zero, /* slab */ false,
+		    tcache, arena, hook_args);
 		if (p == NULL) {
 			return NULL;
 		}
-		arena_prof_promote(tsdn, p, usize);
+		arena_prof_promote(tsdn, p, usize, bumped_usize);
 	} else {
-		p = iralloct(tsdn, old_ptr, old_usize, usize, alignment, zero,
-		    tcache, arena, hook_args);
+		p = iralloct_explicit_slab(tsdn, old_ptr, old_usize, usize,
+		    alignment, zero, /* slab */ false, tcache, arena,
+		    hook_args);
 	}
 	assert(prof_sample_aligned(p));
 
@@ -3348,7 +3354,7 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
 		    usize, alignment, zero, tcache, arena, tctx, hook_args);
 	} else {
 		p = iralloct(tsd_tsdn(tsd), old_ptr, old_usize, size, alignment,
-		    zero, tcache, arena, hook_args);
+		    usize, zero, tcache, arena, hook_args);
 	}
 	if (unlikely(p == NULL)) {
 		prof_alloc_rollback(tsd, tctx);
@@ -3407,7 +3413,7 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 		}
 	} else {
 		p = iralloct(tsd_tsdn(tsd), ptr, old_usize, size, alignment,
-		    zero, tcache, arena, &hook_args);
+		    usize, zero, tcache, arena, &hook_args);
 		if (unlikely(p == NULL)) {
 			goto label_oom;
 		}
diff --git a/src/pages.c b/src/pages.c
index 2d5b8164..41bbef57 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -33,7 +33,7 @@
 /* Data. */
 
 /* Actual operating system page size, detected during bootstrap, <= PAGE. */
-static size_t	os_page;
+size_t	os_page;
 
 #ifndef _WIN32
 #  define PAGES_PROT_COMMIT (PROT_READ | PROT_WRITE)

From ebd7e99f5c1bd486d9eee5f10a48a92585efc1e3 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Mon, 12 Jun 2023 14:13:17 -0700
Subject: [PATCH 093/395] Add a test-case for small profiled allocations

Validate that small allocations (i.e. those with `size <= SC_SMALL_MAXCLASS`)
which are sampled for profiling maintain the expected invariants even
though they now take up less space.
---
 Makefile.in             |  1 +
 test/unit/prof_small.c  | 78 +++++++++++++++++++++++++++++++++++++++++
 test/unit/prof_small.sh |  6 ++++
 3 files changed, 85 insertions(+)
 create mode 100644 test/unit/prof_small.c
 create mode 100644 test/unit/prof_small.sh

diff --git a/Makefile.in b/Makefile.in
index 450abeb4..a0131558 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -256,6 +256,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/prof_mdump.c \
 	$(srcroot)test/unit/prof_recent.c \
 	$(srcroot)test/unit/prof_reset.c \
+	$(srcroot)test/unit/prof_small.c \
 	$(srcroot)test/unit/prof_stats.c \
 	$(srcroot)test/unit/prof_tctx.c \
 	$(srcroot)test/unit/prof_thread_name.c \
diff --git a/test/unit/prof_small.c b/test/unit/prof_small.c
new file mode 100644
index 00000000..4a982b70
--- /dev/null
+++ b/test/unit/prof_small.c
@@ -0,0 +1,78 @@
+#include "test/jemalloc_test.h"
+
+static void assert_small_allocation_sampled(void *ptr, size_t size) {
+	assert_ptr_not_null(ptr, "Unexpected malloc failure");
+	assert_zu_le(size, SC_SMALL_MAXCLASS, "Unexpected large size class");
+	edata_t *edata = emap_edata_lookup(TSDN_NULL, &arena_emap_global, ptr);
+	assert_ptr_not_null(edata, "Unable to find edata for allocation");
+	expect_false(edata_slab_get(edata),
+	    "Sampled small allocations should not be placed on slabs");
+	expect_ptr_eq(edata_base_get(edata), ptr,
+	    "Sampled allocations should be page-aligned");
+	expect_zu_eq(edata_usize_get(edata), size,
+	    "Edata usize did not match requested size");
+	expect_zu_eq(edata_size_get(edata), PAGE_CEILING(size) + sz_large_pad,
+	    "Edata actual size was not a multiple of PAGE");
+	prof_tctx_t *prof_tctx = edata_prof_tctx_get(edata);
+	expect_ptr_not_null(prof_tctx, "Edata had null prof_tctx");
+	expect_ptr_not_null(prof_tctx->tdata,
+	    "Edata had null prof_tdata despite being sampled");
+}
+
+TEST_BEGIN(test_profile_small_allocations) {
+	test_skip_if(!config_prof);
+
+	for (szind_t index = 0; index < SC_NBINS; index++) {
+		size_t size = sz_index2size(index);
+		void *ptr = malloc(size);
+		assert_small_allocation_sampled(ptr, size);
+		free(ptr);
+	}
+}
+TEST_END
+
+TEST_BEGIN(test_profile_small_reallocations_growing) {
+	test_skip_if(!config_prof);
+
+	void *ptr = NULL;
+	for (szind_t index = 0; index < SC_NBINS; index++) {
+		size_t size = sz_index2size(index);
+		ptr = realloc(ptr, size);
+		assert_small_allocation_sampled(ptr, size);
+	}
+}
+TEST_END
+
+TEST_BEGIN(test_profile_small_reallocations_shrinking) {
+	test_skip_if(!config_prof);
+
+	void *ptr = NULL;
+	for (szind_t index = SC_NBINS; index-- > 0;) {
+		size_t size = sz_index2size(index);
+		ptr = realloc(ptr, size);
+		assert_small_allocation_sampled(ptr, size);
+	}
+}
+TEST_END
+
+TEST_BEGIN(test_profile_small_reallocations_same_size_class) {
+	test_skip_if(!config_prof);
+
+	for (szind_t index = 0; index < SC_NBINS; index++) {
+		size_t size = sz_index2size(index);
+		void *ptr = malloc(size);
+		assert_small_allocation_sampled(ptr, size);
+		ptr = realloc(ptr, size - 1);
+		assert_small_allocation_sampled(ptr, size);
+		free(ptr);
+	}
+}
+TEST_END
+
+int
+main(void) {
+	return test(test_profile_small_allocations,
+	    test_profile_small_reallocations_growing,
+	    test_profile_small_reallocations_shrinking,
+	    test_profile_small_reallocations_same_size_class);
+}
diff --git a/test/unit/prof_small.sh b/test/unit/prof_small.sh
new file mode 100644
index 00000000..d14cb8c5
--- /dev/null
+++ b/test/unit/prof_small.sh
@@ -0,0 +1,6 @@
+#!/bin/sh
+
+if [ "x${enable_prof}" = "x1" ] ; then
+  export MALLOC_CONF="prof:true,lg_prof_sample:0"
+fi
+

From 602edd75664e2a2ef3063d9b3bd42d1f81a1be2b Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 5 Jul 2023 13:33:34 -0700
Subject: [PATCH 094/395] Enabled -Wstrict-prototypes and fixed warnings.

---
 configure.ac                             |  1 +
 include/jemalloc/internal/ehooks.h       |  2 +-
 include/jemalloc/internal/hook.h         |  2 +-
 include/jemalloc/internal/hpa.h          |  2 +-
 include/jemalloc/internal/prof_data.h    |  2 +-
 include/jemalloc/internal/prof_externs.h |  8 ++++----
 include/jemalloc/internal/prof_inlines.h |  2 +-
 include/jemalloc/internal/prof_recent.h  |  4 ++--
 include/jemalloc/internal/prof_sys.h     |  6 +++---
 include/jemalloc/internal/san_bump.h     |  2 +-
 include/jemalloc/internal/spin.h         |  2 +-
 include/jemalloc/internal/test_hooks.h   |  4 ++--
 include/jemalloc/internal/tsd.h          |  2 +-
 src/ehooks.c                             |  2 +-
 src/hook.c                               |  4 ++--
 src/hpa.c                                |  2 +-
 src/jemalloc.c                           |  4 ++--
 src/pages.c                              |  2 +-
 src/prof.c                               |  8 ++++----
 src/prof_data.c                          |  2 +-
 src/prof_recent.c                        |  8 ++++----
 src/prof_sys.c                           |  6 +++---
 src/test_hooks.c                         |  4 ++--
 src/tsd.c                                |  2 +-
 test/analyze/sizes.c                     |  2 +-
 test/include/test/bench.h                |  4 ++--
 test/include/test/test.h                 |  2 +-
 test/src/test.c                          |  6 +++---
 test/stress/batch_alloc.c                | 16 ++++++++--------
 test/unit/double_free.c                  |  4 ++--
 test/unit/fork.c                         |  2 +-
 test/unit/hook.c                         |  6 +++---
 test/unit/hpa_background_thread.c        |  4 ++--
 test/unit/junk.c                         |  2 +-
 test/unit/prof_log.c                     | 12 ++++++------
 test/unit/prof_mdump.c                   |  2 +-
 test/unit/prof_recent.c                  |  4 ++--
 test/unit/test_hooks.c                   |  2 +-
 test/unit/zero_realloc_abort.c           |  3 ++-
 test/unit/zero_realloc_alloc.c           |  4 ++--
 test/unit/zero_realloc_free.c            |  2 +-
 test/unit/zero_reallocs.c                |  2 +-
 42 files changed, 82 insertions(+), 80 deletions(-)

diff --git a/configure.ac b/configure.ac
index 687b221c..9686ac53 100644
--- a/configure.ac
+++ b/configure.ac
@@ -262,6 +262,7 @@ if test "x$GCC" = "xyes" ; then
   dnl This one too.
   JE_CFLAGS_ADD([-Wno-missing-field-initializers])
   JE_CFLAGS_ADD([-Wno-missing-attributes])
+  JE_CFLAGS_ADD([-Wstrict-prototypes])
   JE_CFLAGS_ADD([-pipe])
   JE_CFLAGS_ADD([-g3])
 elif test "x$je_cv_msvc" = "xyes" ; then
diff --git a/include/jemalloc/internal/ehooks.h b/include/jemalloc/internal/ehooks.h
index 8d9513e2..d583c521 100644
--- a/include/jemalloc/internal/ehooks.h
+++ b/include/jemalloc/internal/ehooks.h
@@ -53,7 +53,7 @@ bool ehooks_default_purge_lazy_impl(void *addr, size_t offset, size_t length);
 #ifdef PAGES_CAN_PURGE_FORCED
 bool ehooks_default_purge_forced_impl(void *addr, size_t offset, size_t length);
 #endif
-bool ehooks_default_split_impl();
+bool ehooks_default_split_impl(void);
 /*
  * Merge is the only default extent hook we declare -- see the comment in
  * ehooks_merge.
diff --git a/include/jemalloc/internal/hook.h b/include/jemalloc/internal/hook.h
index af03d2f5..41157207 100644
--- a/include/jemalloc/internal/hook.h
+++ b/include/jemalloc/internal/hook.h
@@ -144,7 +144,7 @@ struct hook_ralloc_args_s {
  * Returns an opaque handle to be used when removing the hook.  NULL means that
  * we couldn't install the hook.
  */
-bool hook_boot();
+bool hook_boot(void);
 
 void *hook_install(tsdn_t *tsdn, hooks_t *hooks);
 /* Uninstalls the hook with the handle previously returned from hook_install. */
diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index 0b3c76c6..01fe3166 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -143,7 +143,7 @@ struct hpa_shard_s {
  * is not necessarily a guarantee that it backs its allocations by hugepages,
  * just that it can function properly given the system it's running on.
  */
-bool hpa_supported();
+bool hpa_supported(void);
 bool hpa_central_init(hpa_central_t *central, base_t *base, const hpa_hooks_t *hooks);
 bool hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap,
     base_t *base, edata_cache_t *edata_cache, unsigned ind,
diff --git a/include/jemalloc/internal/prof_data.h b/include/jemalloc/internal/prof_data.h
index c4286b51..016b6507 100644
--- a/include/jemalloc/internal/prof_data.h
+++ b/include/jemalloc/internal/prof_data.h
@@ -19,7 +19,7 @@ bool prof_bt_keycomp(const void *k1, const void *k2);
 bool prof_data_init(tsd_t *tsd);
 prof_tctx_t *prof_lookup(tsd_t *tsd, prof_bt_t *bt);
 int prof_thread_name_set_impl(tsd_t *tsd, const char *thread_name);
-void prof_unbias_map_init();
+void prof_unbias_map_init(void);
 void prof_dump_impl(tsd_t *tsd, write_cb_t *prof_dump_write, void *cbopaque,
     prof_tdata_t *tdata, bool leakcheck);
 prof_tdata_t * prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid,
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 412378a2..cce5c8f5 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -51,16 +51,16 @@ extern size_t lg_prof_sample;
 extern bool prof_booted;
 
 void prof_backtrace_hook_set(prof_backtrace_hook_t hook);
-prof_backtrace_hook_t prof_backtrace_hook_get();
+prof_backtrace_hook_t prof_backtrace_hook_get(void);
 
 void prof_dump_hook_set(prof_dump_hook_t hook);
-prof_dump_hook_t prof_dump_hook_get();
+prof_dump_hook_t prof_dump_hook_get(void);
 
 void prof_sample_hook_set(prof_sample_hook_t hook);
-prof_sample_hook_t prof_sample_hook_get();
+prof_sample_hook_t prof_sample_hook_get(void);
 
 void prof_sample_free_hook_set(prof_sample_free_hook_t hook);
-prof_sample_free_hook_t prof_sample_free_hook_get();
+prof_sample_free_hook_t prof_sample_free_hook_get(void);
 
 /* Functions only accessed in prof_inlines.h */
 prof_tdata_t *prof_tdata_init(tsd_t *tsd);
diff --git a/include/jemalloc/internal/prof_inlines.h b/include/jemalloc/internal/prof_inlines.h
index b5273010..c0783fc1 100644
--- a/include/jemalloc/internal/prof_inlines.h
+++ b/include/jemalloc/internal/prof_inlines.h
@@ -7,7 +7,7 @@
 #include "jemalloc/internal/jemalloc_internal_inlines_c.h"
 
 JEMALLOC_ALWAYS_INLINE void
-prof_active_assert() {
+prof_active_assert(void) {
 	cassert(config_prof);
 	/*
 	 * If opt_prof is off, then prof_active must always be off, regardless
diff --git a/include/jemalloc/internal/prof_recent.h b/include/jemalloc/internal/prof_recent.h
index df410236..959e336b 100644
--- a/include/jemalloc/internal/prof_recent.h
+++ b/include/jemalloc/internal/prof_recent.h
@@ -7,7 +7,7 @@ extern malloc_mutex_t prof_recent_dump_mtx;
 bool prof_recent_alloc_prepare(tsd_t *tsd, prof_tctx_t *tctx);
 void prof_recent_alloc(tsd_t *tsd, edata_t *edata, size_t size, size_t usize);
 void prof_recent_alloc_reset(tsd_t *tsd, edata_t *edata);
-bool prof_recent_init();
+bool prof_recent_init(void);
 void edata_prof_recent_alloc_init(edata_t *edata);
 
 /* Used in unit tests. */
@@ -16,7 +16,7 @@ extern prof_recent_list_t prof_recent_alloc_list;
 edata_t *prof_recent_alloc_edata_get_no_lock_test(const prof_recent_t *node);
 prof_recent_t *edata_prof_recent_alloc_get_no_lock_test(const edata_t *edata);
 
-ssize_t prof_recent_alloc_max_ctl_read();
+ssize_t prof_recent_alloc_max_ctl_read(void);
 ssize_t prof_recent_alloc_max_ctl_write(tsd_t *tsd, ssize_t max);
 void prof_recent_alloc_dump(tsd_t *tsd, write_cb_t *write_cb, void *cbopaque);
 
diff --git a/include/jemalloc/internal/prof_sys.h b/include/jemalloc/internal/prof_sys.h
index 3d25a429..0eb50788 100644
--- a/include/jemalloc/internal/prof_sys.h
+++ b/include/jemalloc/internal/prof_sys.h
@@ -6,8 +6,8 @@ extern base_t *prof_base;
 
 void bt_init(prof_bt_t *bt, void **vec);
 void prof_backtrace(tsd_t *tsd, prof_bt_t *bt);
-void prof_hooks_init();
-void prof_unwind_init();
+void prof_hooks_init(void);
+void prof_unwind_init(void);
 void prof_sys_thread_name_fetch(tsd_t *tsd);
 int prof_getpid(void);
 void prof_get_default_filename(tsdn_t *tsdn, char *filename, uint64_t ind);
@@ -24,7 +24,7 @@ typedef int (prof_dump_open_file_t)(const char *, int);
 extern prof_dump_open_file_t *JET_MUTABLE prof_dump_open_file;
 typedef ssize_t (prof_dump_write_file_t)(int, const void *, size_t);
 extern prof_dump_write_file_t *JET_MUTABLE prof_dump_write_file;
-typedef int (prof_dump_open_maps_t)();
+typedef int (prof_dump_open_maps_t)(void);
 extern prof_dump_open_maps_t *JET_MUTABLE prof_dump_open_maps;
 
 #endif /* JEMALLOC_INTERNAL_PROF_SYS_H */
diff --git a/include/jemalloc/internal/san_bump.h b/include/jemalloc/internal/san_bump.h
index 8ec4a710..0a8e76e9 100644
--- a/include/jemalloc/internal/san_bump.h
+++ b/include/jemalloc/internal/san_bump.h
@@ -20,7 +20,7 @@ struct san_bump_alloc_s {
 };
 
 static inline bool
-san_bump_enabled() {
+san_bump_enabled(void) {
 	/*
 	 * We enable san_bump allocator only when it's possible to break up a
 	 * mapping and unmap a part of it (maps_coalesce). This is needed to
diff --git a/include/jemalloc/internal/spin.h b/include/jemalloc/internal/spin.h
index 22804c68..6940f15e 100644
--- a/include/jemalloc/internal/spin.h
+++ b/include/jemalloc/internal/spin.h
@@ -8,7 +8,7 @@ typedef struct {
 } spin_t;
 
 static inline void
-spin_cpu_spinwait() {
+spin_cpu_spinwait(void) {
 #  if HAVE_CPU_SPINWAIT
 	CPU_SPINWAIT;
 #  else
diff --git a/include/jemalloc/internal/test_hooks.h b/include/jemalloc/internal/test_hooks.h
index 3d530b5c..2b90afe1 100644
--- a/include/jemalloc/internal/test_hooks.h
+++ b/include/jemalloc/internal/test_hooks.h
@@ -1,8 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_TEST_HOOKS_H
 #define JEMALLOC_INTERNAL_TEST_HOOKS_H
 
-extern JEMALLOC_EXPORT void (*test_hooks_arena_new_hook)();
-extern JEMALLOC_EXPORT void (*test_hooks_libc_hook)();
+extern JEMALLOC_EXPORT void (*test_hooks_arena_new_hook)(void);
+extern JEMALLOC_EXPORT void (*test_hooks_libc_hook)(void);
 
 #if defined(JEMALLOC_JET) || defined(JEMALLOC_UNIT_TEST)
 #  define JEMALLOC_TEST_HOOK(fn, hook) ((void)(hook != NULL && (hook(), 0)), fn)
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index c6bf28fc..3dd52247 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -177,7 +177,7 @@ void tsd_postfork_child(tsd_t *tsd);
  */
 void tsd_global_slow_inc(tsdn_t *tsdn);
 void tsd_global_slow_dec(tsdn_t *tsdn);
-bool tsd_global_slow();
+bool tsd_global_slow(void);
 
 #define TSD_MIN_INIT_STATE_MAX_FETCHED (128)
 
diff --git a/src/ehooks.c b/src/ehooks.c
index 383e9de6..da759215 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -159,7 +159,7 @@ ehooks_default_purge_forced(extent_hooks_t *extent_hooks, void *addr,
 #endif
 
 bool
-ehooks_default_split_impl() {
+ehooks_default_split_impl(void) {
 	if (!maps_coalesce) {
 		/*
 		 * Without retain, only whole regions can be purged (required by
diff --git a/src/hook.c b/src/hook.c
index 493edbbe..77a988d7 100644
--- a/src/hook.c
+++ b/src/hook.c
@@ -19,7 +19,7 @@ static seq_hooks_t hooks[HOOK_MAX];
 static malloc_mutex_t hooks_mu;
 
 bool
-hook_boot() {
+hook_boot(void) {
 	return malloc_mutex_init(&hooks_mu, "hooks", WITNESS_RANK_HOOK,
 	    malloc_mutex_rank_exclusive);
 }
@@ -100,7 +100,7 @@ for (int for_each_hook_counter = 0;					\
 }
 
 static bool *
-hook_reentrantp() {
+hook_reentrantp(void) {
 	/*
 	 * We prevent user reentrancy within hooks.  This is basically just a
 	 * thread-local bool that triggers an early-exit.
diff --git a/src/hpa.c b/src/hpa.c
index 7462025c..ee41994f 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -24,7 +24,7 @@ static void hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self,
 static uint64_t hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self);
 
 bool
-hpa_supported() {
+hpa_supported(void) {
 #ifdef _WIN32
 	/*
 	 * At least until the API and implementation is somewhat settled, we
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 4e4e4bee..7771a731 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -766,7 +766,7 @@ malloc_ncpus(void) {
  * Since otherwise tricky things is possible with percpu arenas in use.
  */
 static bool
-malloc_cpu_count_is_deterministic()
+malloc_cpu_count_is_deterministic(void)
 {
 #ifdef _WIN32
 	return true;
@@ -1807,7 +1807,7 @@ malloc_init_hard_needed(void) {
 }
 
 static bool
-malloc_init_hard_a0_locked() {
+malloc_init_hard_a0_locked(void) {
 	malloc_initializer = INITIALIZER;
 
 	JEMALLOC_DIAGNOSTIC_PUSH
diff --git a/src/pages.c b/src/pages.c
index 41bbef57..e70c6e92 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -66,7 +66,7 @@ static int madvise_dont_need_zeros_is_faulty = -1;
  *
  *   [1]: https://patchwork.kernel.org/patch/10576637/
  */
-static int madvise_MADV_DONTNEED_zeroes_pages()
+static int madvise_MADV_DONTNEED_zeroes_pages(void)
 {
 	size_t size = PAGE;
 
diff --git a/src/prof.c b/src/prof.c
index 832aa528..e958349e 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -562,7 +562,7 @@ prof_backtrace_hook_set(prof_backtrace_hook_t hook) {
 }
 
 prof_backtrace_hook_t
-prof_backtrace_hook_get() {
+prof_backtrace_hook_get(void) {
 	return (prof_backtrace_hook_t)atomic_load_p(&prof_backtrace_hook,
 	    ATOMIC_ACQUIRE);
 }
@@ -573,7 +573,7 @@ prof_dump_hook_set(prof_dump_hook_t hook) {
 }
 
 prof_dump_hook_t
-prof_dump_hook_get() {
+prof_dump_hook_get(void) {
 	return (prof_dump_hook_t)atomic_load_p(&prof_dump_hook,
 	    ATOMIC_ACQUIRE);
 }
@@ -584,7 +584,7 @@ prof_sample_hook_set(prof_sample_hook_t hook) {
 }
 
 prof_sample_hook_t
-prof_sample_hook_get() {
+prof_sample_hook_get(void) {
 	return (prof_sample_hook_t)atomic_load_p(&prof_sample_hook,
 	    ATOMIC_ACQUIRE);
 }
@@ -595,7 +595,7 @@ prof_sample_free_hook_set(prof_sample_free_hook_t hook) {
 }
 
 prof_sample_free_hook_t
-prof_sample_free_hook_get() {
+prof_sample_free_hook_get(void) {
 	return (prof_sample_free_hook_t)atomic_load_p(&prof_sample_free_hook,
 	    ATOMIC_ACQUIRE);
 }
diff --git a/src/prof_data.c b/src/prof_data.c
index c33668ee..d52522b0 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -503,7 +503,7 @@ prof_double_uint64_cast(double d) {
 }
 #endif
 
-void prof_unbias_map_init() {
+void prof_unbias_map_init(void) {
 	/* See the comment in prof_sample_new_event_wait */
 #ifdef JEMALLOC_PROF
 	for (szind_t i = 0; i < SC_NSIZES; i++) {
diff --git a/src/prof_recent.c b/src/prof_recent.c
index e5b3fb17..b5639b4c 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -16,13 +16,13 @@ prof_recent_list_t prof_recent_alloc_list;
 malloc_mutex_t prof_recent_dump_mtx; /* Protects dumping. */
 
 static void
-prof_recent_alloc_max_init() {
+prof_recent_alloc_max_init(void) {
 	atomic_store_zd(&prof_recent_alloc_max, opt_prof_recent_alloc_max,
 	    ATOMIC_RELAXED);
 }
 
 static inline ssize_t
-prof_recent_alloc_max_get_no_lock() {
+prof_recent_alloc_max_get_no_lock(void) {
 	return atomic_load_zd(&prof_recent_alloc_max, ATOMIC_RELAXED);
 }
 
@@ -403,7 +403,7 @@ label_rollback:
 }
 
 ssize_t
-prof_recent_alloc_max_ctl_read() {
+prof_recent_alloc_max_ctl_read(void) {
 	cassert(config_prof);
 	/* Don't bother to acquire the lock. */
 	return prof_recent_alloc_max_get_no_lock();
@@ -582,7 +582,7 @@ prof_recent_alloc_dump(tsd_t *tsd, write_cb_t *write_cb, void *cbopaque) {
 #undef PROF_RECENT_PRINT_BUFSIZE
 
 bool
-prof_recent_init() {
+prof_recent_init(void) {
 	cassert(config_prof);
 	prof_recent_alloc_max_init();
 
diff --git a/src/prof_sys.c b/src/prof_sys.c
index 3cbb3a85..467394a5 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -428,7 +428,7 @@ prof_backtrace(tsd_t *tsd, prof_bt_t *bt) {
 }
 
 void
-prof_hooks_init() {
+prof_hooks_init(void) {
 	prof_backtrace_hook_set(&prof_backtrace_impl);
 	prof_dump_hook_set(NULL);
 	prof_sample_hook_set(NULL);
@@ -436,7 +436,7 @@ prof_hooks_init() {
 }
 
 void
-prof_unwind_init() {
+prof_unwind_init(void) {
 #ifdef JEMALLOC_PROF_LIBGCC
 	/*
 	 * Cause the backtracing machinery to allocate its internal
@@ -596,7 +596,7 @@ prof_open_maps_internal(const char *format, ...) {
 #endif
 
 static int
-prof_dump_open_maps_impl() {
+prof_dump_open_maps_impl(void) {
 	int mfd;
 
 	cassert(config_prof);
diff --git a/src/test_hooks.c b/src/test_hooks.c
index ace00d9c..40621199 100644
--- a/src/test_hooks.c
+++ b/src/test_hooks.c
@@ -6,7 +6,7 @@
  * from outside the generated library, so that we can use them in test code.
  */
 JEMALLOC_EXPORT
-void (*test_hooks_arena_new_hook)() = NULL;
+void (*test_hooks_arena_new_hook)(void) = NULL;
 
 JEMALLOC_EXPORT
-void (*test_hooks_libc_hook)() = NULL;
+void (*test_hooks_libc_hook)(void) = NULL;
diff --git a/src/tsd.c b/src/tsd.c
index cef7ba58..a4db8e36 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -148,7 +148,7 @@ tsd_local_slow(tsd_t *tsd) {
 }
 
 bool
-tsd_global_slow() {
+tsd_global_slow(void) {
 	return atomic_load_u32(&tsd_global_slow_count, ATOMIC_RELAXED) > 0;
 }
 
diff --git a/test/analyze/sizes.c b/test/analyze/sizes.c
index 44c9de5e..a48c4f48 100644
--- a/test/analyze/sizes.c
+++ b/test/analyze/sizes.c
@@ -29,7 +29,7 @@ do_print(const char *name, size_t sz_bytes) {
 }
 
 int
-main() {
+main(void) {
 #define P(type)								\
 	do_print(#type, sizeof(type))
 	P(arena_t);
diff --git a/test/include/test/bench.h b/test/include/test/bench.h
index 29c6801f..e2a9fc09 100644
--- a/test/include/test/bench.h
+++ b/test/include/test/bench.h
@@ -40,8 +40,8 @@ compare_funcs(uint64_t nwarmup, uint64_t niter, const char *name_a,
 		return;
 	}
 
-	time_func(&timer_a, nwarmup, niter, (void (*)())func_a);
-	time_func(&timer_b, nwarmup, niter, (void (*)())func_b);
+	time_func(&timer_a, nwarmup, niter, (void (*)(void))func_a);
+	time_func(&timer_b, nwarmup, niter, (void (*)(void))func_b);
 
 	uint64_t usec_a = timer_usec(&timer_a);
 	char buf_a[FMT_NSECS_BUF_SIZE];
diff --git a/test/include/test/test.h b/test/include/test/test.h
index 54610dab..183dfc00 100644
--- a/test/include/test/test.h
+++ b/test/include/test/test.h
@@ -569,7 +569,7 @@ label_test_end:								\
 	}								\
 } while (0)
 
-bool test_is_reentrant();
+bool test_is_reentrant(void);
 
 void	test_skip(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2);
 void	test_fail(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2);
diff --git a/test/src/test.c b/test/src/test.c
index 4cd803e5..e26dbfd4 100644
--- a/test/src/test.c
+++ b/test/src/test.c
@@ -35,7 +35,7 @@ reentrancy_t_str(reentrancy_t r) {
 }
 
 static void
-do_hook(bool *hook_ran, void (**hook)()) {
+do_hook(bool *hook_ran, void (**hook)(void)) {
 	*hook_ran = true;
 	*hook = NULL;
 
@@ -47,12 +47,12 @@ do_hook(bool *hook_ran, void (**hook)()) {
 }
 
 static void
-libc_reentrancy_hook() {
+libc_reentrancy_hook(void) {
 	do_hook(&libc_hook_ran, &test_hooks_libc_hook);
 }
 
 static void
-arena_new_reentrancy_hook() {
+arena_new_reentrancy_hook(void) {
 	do_hook(&arena_new_hook_ran, &test_hooks_arena_new_hook);
 }
 
diff --git a/test/stress/batch_alloc.c b/test/stress/batch_alloc.c
index 427e1cba..6b973bb1 100644
--- a/test/stress/batch_alloc.c
+++ b/test/stress/batch_alloc.c
@@ -124,12 +124,12 @@ compare_with_free(size_t batch, size_t iter,
 }
 
 static void
-batch_alloc_without_free_tiny() {
+batch_alloc_without_free_tiny(void) {
 	batch_alloc_without_free(TINY_BATCH);
 }
 
 static void
-item_alloc_without_free_tiny() {
+item_alloc_without_free_tiny(void) {
 	item_alloc_without_free(TINY_BATCH);
 }
 
@@ -140,12 +140,12 @@ TEST_BEGIN(test_tiny_batch_without_free) {
 TEST_END
 
 static void
-batch_alloc_with_free_tiny() {
+batch_alloc_with_free_tiny(void) {
 	batch_alloc_with_free(TINY_BATCH);
 }
 
 static void
-item_alloc_with_free_tiny() {
+item_alloc_with_free_tiny(void) {
 	item_alloc_with_free(TINY_BATCH);
 }
 
@@ -156,12 +156,12 @@ TEST_BEGIN(test_tiny_batch_with_free) {
 TEST_END
 
 static void
-batch_alloc_without_free_huge() {
+batch_alloc_without_free_huge(void) {
 	batch_alloc_without_free(HUGE_BATCH);
 }
 
 static void
-item_alloc_without_free_huge() {
+item_alloc_without_free_huge(void) {
 	item_alloc_without_free(HUGE_BATCH);
 }
 
@@ -172,12 +172,12 @@ TEST_BEGIN(test_huge_batch_without_free) {
 TEST_END
 
 static void
-batch_alloc_with_free_huge() {
+batch_alloc_with_free_huge(void) {
 	batch_alloc_with_free(HUGE_BATCH);
 }
 
 static void
-item_alloc_with_free_huge() {
+item_alloc_with_free_huge(void) {
 	item_alloc_with_free(HUGE_BATCH);
 }
 
diff --git a/test/unit/double_free.c b/test/unit/double_free.c
index f1e50cd2..b6ae8f75 100644
--- a/test/unit/double_free.c
+++ b/test/unit/double_free.c
@@ -16,13 +16,13 @@ test_double_free_pre(void) {
 }
 
 static void
-test_double_free_post() {
+test_double_free_post(void) {
 	expect_b_eq(fake_abort_called, true, "Double-free check didn't fire.");
 	safety_check_set_abort(NULL);
 }
 
 static bool
-tcache_enabled() {
+tcache_enabled(void) {
 	bool enabled;
 	size_t sz = sizeof(enabled);
 	assert_d_eq(
diff --git a/test/unit/fork.c b/test/unit/fork.c
index 4137423f..447eb191 100644
--- a/test/unit/fork.c
+++ b/test/unit/fork.c
@@ -95,7 +95,7 @@ do_fork_thd(void *arg) {
 
 #ifndef _WIN32
 static void
-do_test_fork_multithreaded() {
+do_test_fork_multithreaded(void) {
 	thd_t child;
 	thd_create(&child, do_fork_thd, NULL);
 	do_fork_thd(NULL);
diff --git a/test/unit/hook.c b/test/unit/hook.c
index 36dbd269..f2a7f190 100644
--- a/test/unit/hook.c
+++ b/test/unit/hook.c
@@ -14,7 +14,7 @@ static uintptr_t arg_args_raw[4];
 static int call_count = 0;
 
 static void
-reset_args() {
+reset_args(void) {
 	arg_extra = NULL;
 	arg_type = 12345;
 	arg_result = NULL;
@@ -40,7 +40,7 @@ alloc_free_size(size_t sz) {
  * allocation scenarios.
  */
 static void
-be_reentrant() {
+be_reentrant(void) {
 	/* Let's make sure the tcache is non-empty if enabled. */
 	alloc_free_size(1);
 	alloc_free_size(1024);
@@ -77,7 +77,7 @@ expect_args_raw(uintptr_t *args_raw_expected, int nargs) {
 }
 
 static void
-reset() {
+reset(void) {
 	call_count = 0;
 	reset_args();
 }
diff --git a/test/unit/hpa_background_thread.c b/test/unit/hpa_background_thread.c
index 81c25612..774ccb4a 100644
--- a/test/unit/hpa_background_thread.c
+++ b/test/unit/hpa_background_thread.c
@@ -2,7 +2,7 @@
 #include "test/sleep.h"
 
 static void
-sleep_for_background_thread_interval() {
+sleep_for_background_thread_interval(void) {
 	/*
 	 * The sleep interval set in our .sh file is 50ms.  So it likely will
 	 * run if we sleep for four times that.
@@ -11,7 +11,7 @@ sleep_for_background_thread_interval() {
 }
 
 static unsigned
-create_arena() {
+create_arena(void) {
 	unsigned arena_ind;
 	size_t sz;
 
diff --git a/test/unit/junk.c b/test/unit/junk.c
index 543092f1..6c5b8beb 100644
--- a/test/unit/junk.c
+++ b/test/unit/junk.c
@@ -7,7 +7,7 @@ static void *last_junked_ptr;
 static size_t last_junked_usize;
 
 static void
-reset() {
+reset(void) {
 	ptr_ind = 0;
 	last_junked_ptr = NULL;
 	last_junked_usize = 0;
diff --git a/test/unit/prof_log.c b/test/unit/prof_log.c
index 5ff208e2..a32fdd0b 100644
--- a/test/unit/prof_log.c
+++ b/test/unit/prof_log.c
@@ -4,11 +4,11 @@
 #define N_PARAM 100
 #define N_THREADS 10
 
-static void expect_rep() {
+static void expect_rep(void) {
 	expect_b_eq(prof_log_rep_check(), false, "Rep check failed");
 }
 
-static void expect_log_empty() {
+static void expect_log_empty(void) {
 	expect_zu_eq(prof_log_bt_count(), 0,
 	    "The log has backtraces; it isn't empty");
 	expect_zu_eq(prof_log_thr_count(), 0,
@@ -19,7 +19,7 @@ static void expect_log_empty() {
 
 void *buf[N_PARAM];
 
-static void f() {
+static void f(void) {
 	int i;
 	for (i = 0; i < N_PARAM; i++) {
 		buf[i] = malloc(100);
@@ -91,18 +91,18 @@ TEST_BEGIN(test_prof_log_many_threads) {
 }
 TEST_END
 
-static void f3() {
+static void f3(void) {
 	void *p = malloc(100);
 	free(p);
 }
 
-static void f1() {
+static void f1(void) {
 	void *p = malloc(100);
 	f3();
 	free(p);
 }
 
-static void f2() {
+static void f2(void) {
 	void *p = malloc(100);
 	free(p);
 }
diff --git a/test/unit/prof_mdump.c b/test/unit/prof_mdump.c
index 75b3a515..bcbb961a 100644
--- a/test/unit/prof_mdump.c
+++ b/test/unit/prof_mdump.c
@@ -129,7 +129,7 @@ TEST_BEGIN(test_mdump_output_error) {
 TEST_END
 
 static int
-prof_dump_open_maps_error() {
+prof_dump_open_maps_error(void) {
 	return -1;
 }
 
diff --git a/test/unit/prof_recent.c b/test/unit/prof_recent.c
index 2cf699d8..24ee6f42 100644
--- a/test/unit/prof_recent.c
+++ b/test/unit/prof_recent.c
@@ -9,7 +9,7 @@ const char *test_thread_name = "test_thread";
 
 /* Invariant before and after every test (when config_prof is on) */
 static void
-confirm_prof_setup() {
+confirm_prof_setup(void) {
 	/* Options */
 	assert_true(opt_prof, "opt_prof not on");
 	assert_true(opt_prof_active, "opt_prof_active not on");
@@ -356,7 +356,7 @@ test_dump_write_cb(void *not_used, const char *str) {
 }
 
 static void
-call_dump() {
+call_dump(void) {
 	static void *in[2] = {test_dump_write_cb, NULL};
 	dump_out_len = 0;
 	assert_d_eq(mallctl("experimental.prof_recent.alloc_dump",
diff --git a/test/unit/test_hooks.c b/test/unit/test_hooks.c
index 8cd2b3bb..41e7bf35 100644
--- a/test/unit/test_hooks.c
+++ b/test/unit/test_hooks.c
@@ -3,7 +3,7 @@
 static bool hook_called = false;
 
 static void
-hook() {
+hook(void) {
 	hook_called = true;
 }
 
diff --git a/test/unit/zero_realloc_abort.c b/test/unit/zero_realloc_abort.c
index a880d104..f014cdc2 100644
--- a/test/unit/zero_realloc_abort.c
+++ b/test/unit/zero_realloc_abort.c
@@ -4,7 +4,8 @@
 
 static bool abort_called = false;
 
-void set_abort_called() {
+void set_abort_called(const char *message) {
+	(void)message;
 	abort_called = true;
 };
 
diff --git a/test/unit/zero_realloc_alloc.c b/test/unit/zero_realloc_alloc.c
index 65e07bdb..6954818c 100644
--- a/test/unit/zero_realloc_alloc.c
+++ b/test/unit/zero_realloc_alloc.c
@@ -1,7 +1,7 @@
 #include "test/jemalloc_test.h"
 
 static uint64_t
-allocated() {
+allocated(void) {
 	if (!config_stats) {
 		return 0;
 	}
@@ -13,7 +13,7 @@ allocated() {
 }
 
 static uint64_t
-deallocated() {
+deallocated(void) {
 	if (!config_stats) {
 		return 0;
 	}
diff --git a/test/unit/zero_realloc_free.c b/test/unit/zero_realloc_free.c
index baed86c9..277f219d 100644
--- a/test/unit/zero_realloc_free.c
+++ b/test/unit/zero_realloc_free.c
@@ -1,7 +1,7 @@
 #include "test/jemalloc_test.h"
 
 static uint64_t
-deallocated() {
+deallocated(void) {
 	if (!config_stats) {
 		return 0;
 	}
diff --git a/test/unit/zero_reallocs.c b/test/unit/zero_reallocs.c
index 66c7a404..a9077222 100644
--- a/test/unit/zero_reallocs.c
+++ b/test/unit/zero_reallocs.c
@@ -1,7 +1,7 @@
 #include "test/jemalloc_test.h"
 
 static size_t
-zero_reallocs() {
+zero_reallocs(void) {
 	if (!config_stats) {
 		return 0;
 	}

From e249d1a2a1eef5bd0b329f0575f9d952a5e73522 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 5 Jul 2023 14:02:14 -0700
Subject: [PATCH 095/395] Remove unreachable code.

---
 include/jemalloc/internal/bit_util.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/jemalloc/internal/bit_util.h b/include/jemalloc/internal/bit_util.h
index bac59140..70fa4bc9 100644
--- a/include/jemalloc/internal/bit_util.h
+++ b/include/jemalloc/internal/bit_util.h
@@ -340,7 +340,6 @@ ffs_u32(uint32_t x) {
 #else
 #error No implementation for 32-bit ffs()
 #endif
-	return ffs_u(x);
 }
 
 static inline unsigned
@@ -350,7 +349,6 @@ fls_u32(uint32_t x) {
 #else
 #error No implementation for 32-bit fls()
 #endif
-	return fls_u(x);
 }
 
 static inline uint64_t

From 589c63b4244e60dcfe74861a2b110b545182216f Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Thu, 18 May 2023 10:22:58 -0700
Subject: [PATCH 096/395] Make eligible global variables `static` and/or
 `const`

For better or worse, Jemalloc has a significant number of global
variables. Making all eligible global variables `static` and/or `const`
at least makes it slightly easier to reason about them, as these
qualifications communicate to the programmer restrictions on their use
without having to `grep` the whole codebase.
---
 include/jemalloc/internal/arena_externs.h             | 3 +--
 include/jemalloc/internal/base.h                      | 2 +-
 include/jemalloc/internal/extent_dss.h                | 2 +-
 include/jemalloc/internal/hpa_hooks.h                 | 2 +-
 include/jemalloc/internal/jemalloc_internal_externs.h | 6 +++---
 include/jemalloc/internal/nstime.h                    | 2 +-
 include/jemalloc/internal/pa.h                        | 2 +-
 include/jemalloc/internal/pages.h                     | 2 +-
 src/arena.c                                           | 4 ++--
 src/base.c                                            | 2 +-
 src/extent_dss.c                                      | 2 +-
 src/hpa_hooks.c                                       | 2 +-
 src/jemalloc.c                                        | 8 ++++----
 src/nstime.c                                          | 2 +-
 src/pa.c                                              | 2 +-
 src/pages.c                                           | 2 +-
 src/prof.c                                            | 8 ++++----
 src/prof_log.c                                        | 2 +-
 src/prof_sys.c                                        | 2 --
 src/stats.c                                           | 4 ++--
 test/unit/hpa.c                                       | 2 +-
 21 files changed, 30 insertions(+), 33 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 2d82ad8f..8e323639 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -18,11 +18,10 @@ extern ssize_t opt_dirty_decay_ms;
 extern ssize_t opt_muzzy_decay_ms;
 
 extern percpu_arena_mode_t opt_percpu_arena;
-extern const char *percpu_arena_mode_names[];
+extern const char *const percpu_arena_mode_names[];
 
 extern div_info_t arena_binind_div_info[SC_NBINS];
 
-extern malloc_mutex_t arenas_lock;
 extern emap_t arena_emap_global;
 
 extern size_t opt_oversize_threshold;
diff --git a/include/jemalloc/internal/base.h b/include/jemalloc/internal/base.h
index 9b2c9fb1..23207563 100644
--- a/include/jemalloc/internal/base.h
+++ b/include/jemalloc/internal/base.h
@@ -23,7 +23,7 @@ typedef enum metadata_thp_mode_e metadata_thp_mode_t;
 
 #define METADATA_THP_DEFAULT metadata_thp_disabled
 extern metadata_thp_mode_t opt_metadata_thp;
-extern const char *metadata_thp_mode_names[];
+extern const char *const metadata_thp_mode_names[];
 
 
 /* Embedded at the beginning of every block of base-managed virtual memory. */
diff --git a/include/jemalloc/internal/extent_dss.h b/include/jemalloc/internal/extent_dss.h
index e8f02ce2..38f04340 100644
--- a/include/jemalloc/internal/extent_dss.h
+++ b/include/jemalloc/internal/extent_dss.h
@@ -11,7 +11,7 @@ typedef enum {
 #define DSS_PREC_DEFAULT dss_prec_secondary
 #define DSS_DEFAULT "secondary"
 
-extern const char *dss_prec_names[];
+extern const char *const dss_prec_names[];
 
 extern const char *opt_dss;
 
diff --git a/include/jemalloc/internal/hpa_hooks.h b/include/jemalloc/internal/hpa_hooks.h
index 4ea221cb..841f529e 100644
--- a/include/jemalloc/internal/hpa_hooks.h
+++ b/include/jemalloc/internal/hpa_hooks.h
@@ -12,6 +12,6 @@ struct hpa_hooks_s {
 	uint64_t (*ms_since)(nstime_t *r_time);
 };
 
-extern hpa_hooks_t hpa_hooks_default;
+extern const hpa_hooks_t hpa_hooks_default;
 
 #endif /* JEMALLOC_INTERNAL_HPA_HOOKS_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index d90f6ddb..b1e5bde9 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -22,8 +22,8 @@ extern sec_opts_t opt_hpa_sec_opts;
 extern const char *opt_junk;
 extern bool opt_junk_alloc;
 extern bool opt_junk_free;
-extern void (*junk_free_callback)(void *ptr, size_t size);
-extern void (*junk_alloc_callback)(void *ptr, size_t size);
+extern void (*JET_MUTABLE junk_free_callback)(void *ptr, size_t size);
+extern void (*JET_MUTABLE junk_alloc_callback)(void *ptr, size_t size);
 extern bool opt_utrace;
 extern bool opt_xmalloc;
 extern bool opt_experimental_infallible_new;
@@ -31,7 +31,7 @@ extern bool opt_zero;
 extern unsigned opt_narenas;
 extern zero_realloc_action_t opt_zero_realloc_action;
 extern malloc_init_t malloc_init_state;
-extern const char *zero_realloc_mode_names[];
+extern const char *const zero_realloc_mode_names[];
 extern atomic_zu_t zero_realloc_count;
 extern bool opt_cache_oblivious;
 extern unsigned opt_debug_double_free_max_scan;
diff --git a/include/jemalloc/internal/nstime.h b/include/jemalloc/internal/nstime.h
index 486e5cca..ad1ae532 100644
--- a/include/jemalloc/internal/nstime.h
+++ b/include/jemalloc/internal/nstime.h
@@ -56,7 +56,7 @@ enum prof_time_res_e {
 typedef enum prof_time_res_e prof_time_res_t;
 
 extern prof_time_res_t opt_prof_time_res;
-extern const char *prof_time_res_mode_names[];
+extern const char *const prof_time_res_mode_names[];
 
 JEMALLOC_ALWAYS_INLINE void
 nstime_init_zero(nstime_t *time) {
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 4748a05b..c8aed932 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -131,7 +131,7 @@ pa_shard_ehooks_get(pa_shard_t *shard) {
 
 /* Returns true on error. */
 bool pa_central_init(pa_central_t *central, base_t *base, bool hpa,
-    hpa_hooks_t *hpa_hooks);
+    const hpa_hooks_t *hpa_hooks);
 
 /* Returns true on error. */
 bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, pa_central_t *central,
diff --git a/include/jemalloc/internal/pages.h b/include/jemalloc/internal/pages.h
index 361de587..c9d10ce2 100644
--- a/include/jemalloc/internal/pages.h
+++ b/include/jemalloc/internal/pages.h
@@ -102,7 +102,7 @@ typedef enum {
 #define THP_MODE_DEFAULT thp_mode_default
 extern thp_mode_t opt_thp;
 extern thp_mode_t init_system_thp_mode; /* Initial system wide state. */
-extern const char *thp_mode_names[];
+extern const char *const thp_mode_names[];
 
 void *pages_map(void *addr, size_t size, size_t alignment, bool *commit);
 void pages_unmap(void *addr, size_t size);
diff --git a/src/arena.c b/src/arena.c
index b154b7a5..a8890e57 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -21,7 +21,7 @@ JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
  * Define names for both unininitialized and initialized phases, so that
  * options and mallctl processing are straightforward.
  */
-const char *percpu_arena_mode_names[] = {
+const char *const percpu_arena_mode_names[] = {
 	"percpu",
 	"phycpu",
 	"disabled",
@@ -37,7 +37,7 @@ static atomic_zd_t dirty_decay_ms_default;
 static atomic_zd_t muzzy_decay_ms_default;
 
 emap_t arena_emap_global;
-pa_central_t arena_pa_central_global;
+static pa_central_t arena_pa_central_global;
 
 div_info_t arena_binind_div_info[SC_NBINS];
 
diff --git a/src/base.c b/src/base.c
index 7f4d6756..16f90495 100644
--- a/src/base.c
+++ b/src/base.c
@@ -22,7 +22,7 @@ static base_t *b0;
 
 metadata_thp_mode_t opt_metadata_thp = METADATA_THP_DEFAULT;
 
-const char *metadata_thp_mode_names[] = {
+const char *const metadata_thp_mode_names[] = {
 	"disabled",
 	"auto",
 	"always"
diff --git a/src/extent_dss.c b/src/extent_dss.c
index 9a35bacf..0b846296 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -10,7 +10,7 @@
 
 const char	*opt_dss = DSS_DEFAULT;
 
-const char	*dss_prec_names[] = {
+const char	*const dss_prec_names[] = {
 	"disabled",
 	"primary",
 	"secondary",
diff --git a/src/hpa_hooks.c b/src/hpa_hooks.c
index ade581e8..6048f382 100644
--- a/src/hpa_hooks.c
+++ b/src/hpa_hooks.c
@@ -11,7 +11,7 @@ static void hpa_hooks_dehugify(void *ptr, size_t size);
 static void hpa_hooks_curtime(nstime_t *r_nstime, bool first_reading);
 static uint64_t hpa_hooks_ms_since(nstime_t *past_nstime);
 
-hpa_hooks_t hpa_hooks_default = {
+const hpa_hooks_t hpa_hooks_default = {
 	&hpa_hooks_map,
 	&hpa_hooks_unmap,
 	&hpa_hooks_purge,
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 7771a731..8de30279 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -121,7 +121,7 @@ zero_realloc_action_t opt_zero_realloc_action =
 
 atomic_zu_t zero_realloc_count = ATOMIC_INIT(0);
 
-const char *zero_realloc_mode_names[] = {
+const char *const zero_realloc_mode_names[] = {
 	"alloc",
 	"free",
 	"abort",
@@ -142,8 +142,8 @@ static void default_junk_free(void *ptr, size_t usize) {
 	memset(ptr, junk_free_byte, usize);
 }
 
-void (*junk_alloc_callback)(void *ptr, size_t size) = &default_junk_alloc;
-void (*junk_free_callback)(void *ptr, size_t size) = &default_junk_free;
+void (*JET_MUTABLE junk_alloc_callback)(void *ptr, size_t size) = &default_junk_alloc;
+void (*JET_MUTABLE junk_free_callback)(void *ptr, size_t size) = &default_junk_free;
 
 bool	opt_utrace = false;
 bool	opt_xmalloc = false;
@@ -158,7 +158,7 @@ unsigned opt_debug_double_free_max_scan =
     SAFETY_CHECK_DOUBLE_FREE_MAX_SCAN_DEFAULT;
 
 /* Protects arenas initialization. */
-malloc_mutex_t arenas_lock;
+static malloc_mutex_t arenas_lock;
 
 /* The global hpa, and whether it's on. */
 bool opt_hpa = false;
diff --git a/src/nstime.c b/src/nstime.c
index a1a53777..7fb9100e 100644
--- a/src/nstime.c
+++ b/src/nstime.c
@@ -228,7 +228,7 @@ nstime_monotonic_t *JET_MUTABLE nstime_monotonic = nstime_monotonic_impl;
 prof_time_res_t opt_prof_time_res =
 	prof_time_res_default;
 
-const char *prof_time_res_mode_names[] = {
+const char *const prof_time_res_mode_names[] = {
 	"default",
 	"high",
 };
diff --git a/src/pa.c b/src/pa.c
index 18c850d7..63eef2b5 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -17,7 +17,7 @@ pa_nactive_sub(pa_shard_t *shard, size_t sub_pages) {
 
 bool
 pa_central_init(pa_central_t *central, base_t *base, bool hpa,
-    hpa_hooks_t *hpa_hooks) {
+    const hpa_hooks_t *hpa_hooks) {
 	bool err;
 	if (hpa) {
 		err = hpa_central_init(&central->hpa, base, hpa_hooks);
diff --git a/src/pages.c b/src/pages.c
index e70c6e92..249d7c5b 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -42,7 +42,7 @@ static int	mmap_flags;
 #endif
 static bool	os_overcommits;
 
-const char *thp_mode_names[] = {
+const char *const thp_mode_names[] = {
 	"default",
 	"always",
 	"never",
diff --git a/src/prof.c b/src/prof.c
index e958349e..9986a329 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -73,16 +73,16 @@ static malloc_mutex_t next_thr_uid_mtx;
 bool prof_booted = false;
 
 /* Logically a prof_backtrace_hook_t. */
-atomic_p_t prof_backtrace_hook;
+static atomic_p_t prof_backtrace_hook;
 
 /* Logically a prof_dump_hook_t. */
-atomic_p_t prof_dump_hook;
+static atomic_p_t prof_dump_hook;
 
 /* Logically a prof_sample_hook_t. */
-atomic_p_t prof_sample_hook;
+static atomic_p_t prof_sample_hook;
 
 /* Logically a prof_sample_free_hook_t. */
-atomic_p_t prof_sample_free_hook;
+static atomic_p_t prof_sample_free_hook;
 
 /******************************************************************************/
 
diff --git a/src/prof_log.c b/src/prof_log.c
index 384d5e38..f4000aec 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -25,7 +25,7 @@ enum prof_logging_state_e {
  * - started: log_start called, log_stop not called yet. Allocations are logged.
  * - dumping: log_stop called but not finished; samples are not logged anymore.
  */
-prof_logging_state_t prof_logging_state = prof_logging_state_stopped;
+static prof_logging_state_t prof_logging_state = prof_logging_state_stopped;
 
 /* Used in unit tests. */
 static bool prof_log_dummy = false;
diff --git a/src/prof_sys.c b/src/prof_sys.c
index 467394a5..1f8ecb62 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -27,8 +27,6 @@
 
 malloc_mutex_t prof_dump_filename_mtx;
 
-bool prof_do_mock = false;
-
 static uint64_t prof_dump_seq;
 static uint64_t prof_dump_iseq;
 static uint64_t prof_dump_mseq;
diff --git a/src/stats.c b/src/stats.c
index 59db4f8e..d80af226 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -9,13 +9,13 @@
 #include "jemalloc/internal/mutex_prof.h"
 #include "jemalloc/internal/prof_stats.h"
 
-const char *global_mutex_names[mutex_prof_num_global_mutexes] = {
+static const char *const global_mutex_names[mutex_prof_num_global_mutexes] = {
 #define OP(mtx) #mtx,
 	MUTEX_PROF_GLOBAL_MUTEXES
 #undef OP
 };
 
-const char *arena_mutex_names[mutex_prof_num_arena_mutexes] = {
+static const char *const arena_mutex_names[mutex_prof_num_arena_mutexes] = {
 #define OP(mtx) #mtx,
 	MUTEX_PROF_ARENA_MUTEXES
 #undef OP
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index f7874281..64aef59e 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -37,7 +37,7 @@ static hpa_shard_opts_t test_hpa_shard_opts_default = {
 };
 
 static hpa_shard_t *
-create_test_data(hpa_hooks_t *hooks, hpa_shard_opts_t *opts) {
+create_test_data(const hpa_hooks_t *hooks, hpa_shard_opts_t *opts) {
 	bool err;
 	base_t *base = base_new(TSDN_NULL, /* ind */ SHARD_IND,
 	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);

From 5711dc31d87c5aa5b4dd17a0bda850516a45ae53 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Thu, 6 Jul 2023 16:51:51 -0700
Subject: [PATCH 097/395] Only enable `-Wstrict-prototypes` in CI to unbreak
 feature detection

Adding `-Wstrict-prototypes` to the default `CFLAGS` in PR #2473 had the
non-obvious side-effect of breaking configure-time feature detection,
because the [test-program `autoconf` generates for feature
detection](https://www.gnu.org/software/autoconf/manual/autoconf-2.67/html_node/Generating-Sources.html#:~:text=main%20())
defines `main` as:
```c
int main()
```
Which causes all feature checks to fail, since this triggers
`-Wstrict-prototypes` and the feature checks use `-Werror`.

Resolved by only adding `-Wstrict-prototypes` to
`EXTRA_{CFLAGS,CXXFLAGS}` in CI, since these flags are not used during
feature detection and we control which compiler is used.
---
 configure.ac                   | 1 -
 scripts/run_static_analysis.sh | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/configure.ac b/configure.ac
index 9686ac53..687b221c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -262,7 +262,6 @@ if test "x$GCC" = "xyes" ; then
   dnl This one too.
   JE_CFLAGS_ADD([-Wno-missing-field-initializers])
   JE_CFLAGS_ADD([-Wno-missing-attributes])
-  JE_CFLAGS_ADD([-Wstrict-prototypes])
   JE_CFLAGS_ADD([-pipe])
   JE_CFLAGS_ADD([-g3])
 elif test "x$je_cv_msvc" = "xyes" ; then
diff --git a/scripts/run_static_analysis.sh b/scripts/run_static_analysis.sh
index db870689..68ceae55 100755
--- a/scripts/run_static_analysis.sh
+++ b/scripts/run_static_analysis.sh
@@ -13,7 +13,7 @@ compile_time_malloc_conf='background_thread:true,'\
 'prof_unbias:false,'\
 'prof_time_resolution:high'
 
-./autogen.sh \
+EXTRA_CFLAGS='-Wstrict-prototypes' EXTRA_CXXFLAGS='-Wstrict-prototypes' ./autogen.sh \
 	--with-private-namespace=jemalloc_ \
 	--disable-cache-oblivious \
 	--enable-prof \

From 1d9e9c2ed6f0cb3bf168c0d602ae0a289ee27093 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Thu, 6 Jul 2023 16:27:56 -0700
Subject: [PATCH 098/395] Fix inconsistent parameter names between
 definition/declaration pairs

For the sake of consistency, function definitions and their
corresponding declarations should use the same names for parameters.
I've enabled this check in static analysis to prevent this issue from
occurring again in the future.
---
 include/jemalloc/internal/arena_externs.h  |  4 ++--
 include/jemalloc/internal/bin.h            |  2 +-
 include/jemalloc/internal/hook.h           |  2 +-
 include/jemalloc/internal/hpdata.h         |  2 +-
 include/jemalloc/internal/tcache_externs.h | 14 +++++++-------
 include/jemalloc/internal/tsd.h            |  2 +-
 scripts/run_static_analysis.sh             | 15 ++++++++++++---
 7 files changed, 25 insertions(+), 16 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 8e323639..22d7fff7 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -47,9 +47,9 @@ edata_t *arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena,
 void arena_extent_dalloc_large_prep(tsdn_t *tsdn, arena_t *arena,
     edata_t *edata);
 void arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena,
-    edata_t *edata, size_t oldsize);
+    edata_t *edata, size_t oldusize);
 void arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena,
-    edata_t *edata, size_t oldsize);
+    edata_t *edata, size_t oldusize);
 bool arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, extent_state_t state,
     ssize_t decay_ms);
 ssize_t arena_decay_ms_get(arena_t *arena, extent_state_t state);
diff --git a/include/jemalloc/internal/bin.h b/include/jemalloc/internal/bin.h
index 63f97395..027af088 100644
--- a/include/jemalloc/internal/bin.h
+++ b/include/jemalloc/internal/bin.h
@@ -48,7 +48,7 @@ struct bins_s {
 	bin_t *bin_shards;
 };
 
-void bin_shard_sizes_boot(unsigned bin_shards[SC_NBINS]);
+void bin_shard_sizes_boot(unsigned bin_shard_sizes[SC_NBINS]);
 bool bin_update_shard_size(unsigned bin_shards[SC_NBINS], size_t start_size,
     size_t end_size, size_t nshards);
 
diff --git a/include/jemalloc/internal/hook.h b/include/jemalloc/internal/hook.h
index 41157207..27f94841 100644
--- a/include/jemalloc/internal/hook.h
+++ b/include/jemalloc/internal/hook.h
@@ -146,7 +146,7 @@ struct hook_ralloc_args_s {
  */
 bool hook_boot(void);
 
-void *hook_install(tsdn_t *tsdn, hooks_t *hooks);
+void *hook_install(tsdn_t *tsdn, hooks_t *to_install);
 /* Uninstalls the hook with the handle previously returned from hook_install. */
 void hook_remove(tsdn_t *tsdn, void *opaque);
 
diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index 5bf7aae8..36918258 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -359,7 +359,7 @@ void hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age);
  * offset within that allocation.
  */
 void *hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz);
-void hpdata_unreserve(hpdata_t *hpdata, void *begin, size_t sz);
+void hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz);
 
 /*
  * The hpdata_purge_prepare_t allows grabbing the metadata required to purge
diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index a2ab7101..37f61646 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -36,14 +36,14 @@ extern tcaches_t	*tcaches;
 
 size_t tcache_salloc(tsdn_t *tsdn, const void *ptr);
 void *tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
-    cache_bin_t *tbin, szind_t binind, bool *tcache_success);
+    cache_bin_t *cache_bin, szind_t binind, bool *tcache_success);
 
-void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
-    szind_t binind, unsigned rem);
-void tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
-    szind_t binind, unsigned rem);
-void tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache, cache_bin_t *bin,
-    szind_t binind, bool is_small);
+void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache,
+    cache_bin_t *cache_bin, szind_t binind, unsigned rem);
+void tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache,
+    cache_bin_t *cache_bin, szind_t binind, unsigned rem);
+void tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache,
+    cache_bin_t *cache_bin, szind_t binind, bool is_small);
 void tcache_arena_reassociate(tsdn_t *tsdn, tcache_slow_t *tcache_slow,
     tcache_t *tcache, arena_t *arena);
 tcache_t *tcache_create_explicit(tsd_t *tsd);
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 3dd52247..c2f432e8 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -164,7 +164,7 @@ void malloc_tsd_dalloc(void *wrapper);
 tsd_t *malloc_tsd_boot0(void);
 void malloc_tsd_boot1(void);
 void tsd_cleanup(void *arg);
-tsd_t *tsd_fetch_slow(tsd_t *tsd, bool internal);
+tsd_t *tsd_fetch_slow(tsd_t *tsd, bool minimal);
 void tsd_state_set(tsd_t *tsd, uint8_t new_state);
 void tsd_slow_update(tsd_t *tsd);
 void tsd_prefork(tsd_t *tsd);
diff --git a/scripts/run_static_analysis.sh b/scripts/run_static_analysis.sh
index 68ceae55..4994fe64 100755
--- a/scripts/run_static_analysis.sh
+++ b/scripts/run_static_analysis.sh
@@ -27,7 +27,7 @@ EXTRA_CFLAGS='-Wstrict-prototypes' EXTRA_CXXFLAGS='-Wstrict-prototypes' ./autoge
 	               # otherwise you'll get tons of warnings for things
 	               # that are already covered by `assert`s.
 
-bear -- make -s -j $(nproc)
+bear -- make -s -j "$(nproc)"
 # We end up with lots of duplicate entries in the compilation database, one for
 # each output file type (e.g. .o, .d, .sym, etc.). There must be exactly one
 # entry for each file in the compilation database in order for
@@ -35,9 +35,18 @@ bear -- make -s -j $(nproc)
 jq '[.[] | select(.output | test("/[^./]*\\.o$"))]' compile_commands.json > compile_commands.json.tmp
 mv compile_commands.json.tmp compile_commands.json
 
-CC_ANALYZERS_FROM_PATH=1 CodeChecker analyze compile_commands.json --jobs $(nproc) \
+# CodeChecker has a bug where it freaks out if you supply the skipfile via process substitution,
+# so we resort to manually creating a temporary file
+skipfile=$(mktemp)
+# The single-quotes are deliberate here, you want `$skipfile` to be evaluated upon exit
+trap 'rm -f $skipfile' EXIT
+echo '-**/stdlib.h' > "$skipfile"
+CC_ANALYZERS_FROM_PATH=1 CodeChecker analyze compile_commands.json --jobs "$(nproc)" \
 	--ctu --compile-uniqueing strict --output static_analysis_raw_results \
-	--analyzers clang-tidy clangsa
+	--analyzers clangsa clang-tidy --skip "$skipfile" \
+	--enable readability-inconsistent-declaration-parameter-name
+	# `--enable` is additive, the vast majority of the checks we want are
+	# enabled by default.
 
 html_output_dir="${1:-static_analysis_results}"
 result=${2:-/dev/null}

From 65d3b5989b0afa44f0703bc1ca81f2ba74ed90a5 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Thu, 8 Jun 2023 12:56:16 -0700
Subject: [PATCH 099/395] Print test error messages in color when stderr is a
 terminal

When stderr is a terminal and supports color, print error messages
from tests in red to make them stand out from the surrounding output.
---
 test/test.sh.in | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/test/test.sh.in b/test/test.sh.in
index 39302fff..b4fbb355 100644
--- a/test/test.sh.in
+++ b/test/test.sh.in
@@ -63,8 +63,14 @@ for t in $@; do
       fail_count=$((fail_count+1))
       ;;
     *)
-      echo "Test harness error: ${t} w/ MALLOC_CONF=\"${MALLOC_CONF}\"" 1>&2
-      echo "Use prefix to debug, e.g. JEMALLOC_TEST_PREFIX=\"gdb --args\" sh test/test.sh ${t}" 1>&2
+      color_start=''
+      color_end=''
+      if [ -t 2 ] && tput colors >/dev/null 2>&1; then
+        color_start='\033[31m'
+        color_end='\033[0m'
+      fi
+      printf "${color_start}Test harness error: %s w/ MALLOC_CONF=\"%s\"${color_end}\n" "${t}" "${MALLOC_CONF}" 1>&2
+      printf "${color_start}Use prefix to debug, e.g. JEMALLOC_TEST_PREFIX=\"gdb --args\" sh test/test.sh %s${color_end}\n" "${t}" 1>&2
       exit 1
   esac
 done

From 314c073a38adfbfc97ed2913e287e8e642fc46ca Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Mon, 10 Jul 2023 14:25:53 -0700
Subject: [PATCH 100/395] Print the failed assertion before aborting in test
 cases

This makes it faster and easier to debug, so that you don't need to fire
up a debugger just to see which assertion triggered in a failing test.
---
 test/include/test/test.h | 38 +++++++-------------------------------
 test/src/test.c          |  5 ++++-
 2 files changed, 11 insertions(+), 32 deletions(-)

diff --git a/test/include/test/test.h b/test/include/test/test.h
index 183dfc00..80ca7cbb 100644
--- a/test/include/test/test.h
+++ b/test/include/test/test.h
@@ -13,11 +13,7 @@
 		    __func__, __FILE__, __LINE__,			\
 		    #a, #b, a_, b_);					\
 		malloc_snprintf(message, sizeof(message), __VA_ARGS__);	\
-		if (may_abort) {					\
-			abort();					\
-		} else {						\
-			p_test_fail(prefix, message);			\
-		}							\
+		p_test_fail(may_abort, prefix, message);		\
 	}								\
 } while (0)
 
@@ -230,11 +226,7 @@
 		    #a, #b, a_ ? "true" : "false",			\
 		    b_ ? "true" : "false");				\
 		malloc_snprintf(message, sizeof(message), __VA_ARGS__);	\
-		if (may_abort) {					\
-			abort();					\
-		} else {						\
-			p_test_fail(prefix, message);			\
-		}							\
+		p_test_fail(may_abort, prefix, message);		\
 	}								\
 } while (0)
 
@@ -251,11 +243,7 @@
 		    #a, #b, a_ ? "true" : "false",			\
 		    b_ ? "true" : "false");				\
 		malloc_snprintf(message, sizeof(message), __VA_ARGS__);	\
-		if (may_abort) {					\
-			abort();					\
-		} else {						\
-			p_test_fail(prefix, message);			\
-		}							\
+		p_test_fail(may_abort, prefix, message);		\
 	}								\
 } while (0)
 
@@ -275,11 +263,7 @@
 		    "\"%s\" differs from \"%s\": ",			\
 		    __func__, __FILE__, __LINE__, #a, #b, a, b);	\
 		malloc_snprintf(message, sizeof(message), __VA_ARGS__);	\
-		if (may_abort) {					\
-			abort();					\
-		} else {						\
-			p_test_fail(prefix, message);			\
-		}							\
+		p_test_fail(may_abort, prefix, message);		\
 	}								\
 } while (0)
 
@@ -293,11 +277,7 @@
 		    "\"%s\" same as \"%s\": ",				\
 		    __func__, __FILE__, __LINE__, #a, #b, a, b);	\
 		malloc_snprintf(message, sizeof(message), __VA_ARGS__);	\
-		if (may_abort) {					\
-			abort();					\
-		} else {						\
-			p_test_fail(prefix, message);			\
-		}							\
+		p_test_fail(may_abort, prefix, message);		\
 	}								\
 } while (0)
 
@@ -311,11 +291,7 @@
 	    "%s:%s:%d: Unreachable code reached: ",			\
 	    __func__, __FILE__, __LINE__);				\
 	malloc_snprintf(message, sizeof(message), __VA_ARGS__);		\
-	if (may_abort) {						\
-		abort();						\
-	} else {							\
-		p_test_fail(prefix, message);				\
-	}								\
+	p_test_fail(may_abort, prefix, message);			\
 } while (0)
 
 #define expect_not_reached(...) verify_not_reached(false, __VA_ARGS__)
@@ -580,4 +556,4 @@ test_status_t	p_test_no_reentrancy(test_t *t, ...);
 test_status_t	p_test_no_malloc_init(test_t *t, ...);
 void	p_test_init(const char *name);
 void	p_test_fini(void);
-void	p_test_fail(const char *prefix, const char *message);
+void	p_test_fail(bool may_abort, const char *prefix, const char *message);
diff --git a/test/src/test.c b/test/src/test.c
index e26dbfd4..8b69d74a 100644
--- a/test/src/test.c
+++ b/test/src/test.c
@@ -228,7 +228,10 @@ p_test_no_malloc_init(test_t *t, ...) {
 }
 
 void
-p_test_fail(const char *prefix, const char *message) {
+p_test_fail(bool may_abort, const char *prefix, const char *message) {
 	malloc_cprintf(NULL, NULL, "%s%s\n", prefix, message);
 	test_status = test_status_fail;
+	if (may_abort) {
+		abort();
+	}
 }

From 36ca0c1b7de5fc92e6be48f73f28a6dce0e8890e Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Wed, 12 Jul 2023 09:57:46 -0700
Subject: [PATCH 101/395] Stop concealing pointer provenance in `phn_link_get`

At least for LLVM, [casting from an integer to a pointer hides provenance information](https://clang.llvm.org/extra/clang-tidy/checks/performance/no-int-to-ptr.html)
and inhibits optimizations. Here's a [Godbolt link](https://godbolt.org/z/5bYPcKoWT)
showing how this change removes a couple unnecessary branches in
`phn_merge_siblings`, which is a very hot function. Canary profiles show
only minor improvements (since most of the cost of this function is in
cache misses), but there's no reason we shouldn't take it.
---
 include/jemalloc/internal/ph.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/ph.h b/include/jemalloc/internal/ph.h
index 1fabee5d..89de8663 100644
--- a/include/jemalloc/internal/ph.h
+++ b/include/jemalloc/internal/ph.h
@@ -73,7 +73,7 @@ struct ph_s {
 
 JEMALLOC_ALWAYS_INLINE phn_link_t *
 phn_link_get(void *phn, size_t offset) {
-	return (phn_link_t *)(((uintptr_t)phn) + offset);
+	return (phn_link_t *)(((char *)phn) + offset);
 }
 
 JEMALLOC_ALWAYS_INLINE void

From 856db56f6ec54f59491fa7897dab9a23d5bf9ff4 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Mon, 12 Jun 2023 16:05:18 -0700
Subject: [PATCH 102/395] Move tsd implementation details into
 `tsd_internals.h`

This is a prerequisite to achieving self-contained headers. Previously,
the various tsd implementation headers (`tsd_generic.h`,
`tsd_tls.h`, `tsd_malloc_thread_cleanup.h`, and `tsd_win.h`) relied
implicitly on being included in `tsd.h` after a variety of dependencies
had been defined above them. This commit instead makes these
dependencies explicit by splitting them out into a separate file,
`tsd_internals.h`, which each of the tsd implementation headers includes
directly.
---
 include/jemalloc/internal/tsd.h               | 298 -----------------
 include/jemalloc/internal/tsd_generic.h       |   2 +
 include/jemalloc/internal/tsd_internals.h     | 303 ++++++++++++++++++
 .../internal/tsd_malloc_thread_cleanup.h      |   2 +
 include/jemalloc/internal/tsd_tls.h           |   2 +
 include/jemalloc/internal/tsd_win.h           |   2 +
 6 files changed, 311 insertions(+), 298 deletions(-)
 create mode 100644 include/jemalloc/internal/tsd_internals.h

diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index c2f432e8..e36ffc6d 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -1,304 +1,6 @@
 #ifndef JEMALLOC_INTERNAL_TSD_H
 #define JEMALLOC_INTERNAL_TSD_H
 
-#include "jemalloc/internal/activity_callback.h"
-#include "jemalloc/internal/arena_types.h"
-#include "jemalloc/internal/assert.h"
-#include "jemalloc/internal/bin_types.h"
-#include "jemalloc/internal/jemalloc_internal_externs.h"
-#include "jemalloc/internal/peak.h"
-#include "jemalloc/internal/prof_types.h"
-#include "jemalloc/internal/ql.h"
-#include "jemalloc/internal/rtree_tsd.h"
-#include "jemalloc/internal/tcache_types.h"
-#include "jemalloc/internal/tcache_structs.h"
-#include "jemalloc/internal/util.h"
-#include "jemalloc/internal/witness.h"
-
-/*
- * Thread-Specific-Data layout
- *
- * At least some thread-local data gets touched on the fast-path of almost all
- * malloc operations.  But much of it is only necessary down slow-paths, or
- * testing.  We want to colocate the fast-path data so that it can live on the
- * same cacheline if possible.  So we define three tiers of hotness:
- * TSD_DATA_FAST: Touched on the alloc/dalloc fast paths.
- * TSD_DATA_SLOW: Touched down slow paths.  "Slow" here is sort of general;
- *     there are "semi-slow" paths like "not a sized deallocation, but can still
- *     live in the tcache".  We'll want to keep these closer to the fast-path
- *     data.
- * TSD_DATA_SLOWER: Only touched in test or debug modes, or not touched at all.
- *
- * An additional concern is that the larger tcache bins won't be used (we have a
- * bin per size class, but by default only cache relatively small objects).  So
- * the earlier bins are in the TSD_DATA_FAST tier, but the later ones are in the
- * TSD_DATA_SLOWER tier.
- *
- * As a result of all this, we put the slow data first, then the fast data, then
- * the slower data, while keeping the tcache as the last element of the fast
- * data (so that the fast -> slower transition happens midway through the
- * tcache).  While we don't yet play alignment tricks to guarantee it, this
- * increases our odds of getting some cache/page locality on fast paths.
- */
-
-#ifdef JEMALLOC_JET
-typedef void (*test_callback_t)(int *);
-#  define MALLOC_TSD_TEST_DATA_INIT 0x72b65c10
-#  define MALLOC_TEST_TSD \
-    O(test_data,		int,			int)		\
-    O(test_callback,		test_callback_t,	int)
-#  define MALLOC_TEST_TSD_INITIALIZER , MALLOC_TSD_TEST_DATA_INIT, NULL
-#else
-#  define MALLOC_TEST_TSD
-#  define MALLOC_TEST_TSD_INITIALIZER
-#endif
-
-typedef ql_elm(tsd_t) tsd_link_t;
-
-/*  O(name,			type,			nullable type) */
-#define TSD_DATA_SLOW							\
-    O(tcache_enabled,		bool,			bool)		\
-    O(reentrancy_level,		int8_t,			int8_t)		\
-    O(min_init_state_nfetched,		uint8_t,	uint8_t)	\
-    O(thread_allocated_last_event,	uint64_t,	uint64_t)	\
-    O(thread_allocated_next_event,	uint64_t,	uint64_t)	\
-    O(thread_deallocated_last_event,	uint64_t,	uint64_t)	\
-    O(thread_deallocated_next_event,	uint64_t,	uint64_t)	\
-    O(tcache_gc_event_wait,	uint64_t,		uint64_t)	\
-    O(tcache_gc_dalloc_event_wait,	uint64_t,	uint64_t)	\
-    O(prof_sample_event_wait,	uint64_t,		uint64_t)	\
-    O(prof_sample_last_event,	uint64_t,		uint64_t)	\
-    O(stats_interval_event_wait,	uint64_t,	uint64_t)	\
-    O(stats_interval_last_event,	uint64_t,	uint64_t)	\
-    O(peak_alloc_event_wait,	uint64_t,		uint64_t)	\
-    O(peak_dalloc_event_wait,	uint64_t,	uint64_t)		\
-    O(prof_tdata,		prof_tdata_t *,		prof_tdata_t *)	\
-    O(prng_state,		uint64_t,		uint64_t)	\
-    O(san_extents_until_guard_small,	uint64_t,	uint64_t)	\
-    O(san_extents_until_guard_large,	uint64_t,	uint64_t)	\
-    O(iarena,			arena_t *,		arena_t *)	\
-    O(arena,			arena_t *,		arena_t *)	\
-    O(arena_decay_ticker,	ticker_geom_t,		ticker_geom_t)	\
-    O(sec_shard,		uint8_t,		uint8_t)	\
-    O(binshards,		tsd_binshards_t,	tsd_binshards_t)\
-    O(tsd_link,			tsd_link_t,		tsd_link_t)	\
-    O(in_hook,			bool,			bool)		\
-    O(peak,			peak_t,			peak_t)		\
-    O(activity_callback_thunk,	activity_callback_thunk_t,		\
-	activity_callback_thunk_t)					\
-    O(tcache_slow,		tcache_slow_t,		tcache_slow_t)	\
-    O(rtree_ctx,		rtree_ctx_t,		rtree_ctx_t)
-
-#define TSD_DATA_SLOW_INITIALIZER					\
-    /* tcache_enabled */	TCACHE_ENABLED_ZERO_INITIALIZER,	\
-    /* reentrancy_level */	0,					\
-    /* min_init_state_nfetched */	0,				\
-    /* thread_allocated_last_event */	0,				\
-    /* thread_allocated_next_event */	0,				\
-    /* thread_deallocated_last_event */	0,				\
-    /* thread_deallocated_next_event */	0,				\
-    /* tcache_gc_event_wait */		0,				\
-    /* tcache_gc_dalloc_event_wait */	0,				\
-    /* prof_sample_event_wait */	0,				\
-    /* prof_sample_last_event */	0,				\
-    /* stats_interval_event_wait */	0,				\
-    /* stats_interval_last_event */	0,				\
-    /* peak_alloc_event_wait */		0,				\
-    /* peak_dalloc_event_wait */	0,				\
-    /* prof_tdata */		NULL,					\
-    /* prng_state */		0,					\
-    /* san_extents_until_guard_small */	0,				\
-    /* san_extents_until_guard_large */	0,				\
-    /* iarena */		NULL,					\
-    /* arena */			NULL,					\
-    /* arena_decay_ticker */						\
-	TICKER_GEOM_INIT(ARENA_DECAY_NTICKS_PER_UPDATE),		\
-    /* sec_shard */		(uint8_t)-1,				\
-    /* binshards */		TSD_BINSHARDS_ZERO_INITIALIZER,		\
-    /* tsd_link */		{NULL},					\
-    /* in_hook */		false,					\
-    /* peak */			PEAK_INITIALIZER,			\
-    /* activity_callback_thunk */					\
-	ACTIVITY_CALLBACK_THUNK_INITIALIZER,				\
-    /* tcache_slow */		TCACHE_SLOW_ZERO_INITIALIZER,		\
-    /* rtree_ctx */		RTREE_CTX_INITIALIZER,
-
-/*  O(name,			type,			nullable type) */
-#define TSD_DATA_FAST							\
-    O(thread_allocated,		uint64_t,		uint64_t)	\
-    O(thread_allocated_next_event_fast,	uint64_t,	uint64_t)	\
-    O(thread_deallocated,	uint64_t,		uint64_t)	\
-    O(thread_deallocated_next_event_fast, uint64_t,	uint64_t)	\
-    O(tcache,			tcache_t,		tcache_t)
-
-#define TSD_DATA_FAST_INITIALIZER					\
-    /* thread_allocated */	0,					\
-    /* thread_allocated_next_event_fast */ 0, 				\
-    /* thread_deallocated */	0,					\
-    /* thread_deallocated_next_event_fast */	0,			\
-    /* tcache */		TCACHE_ZERO_INITIALIZER,
-
-/*  O(name,			type,			nullable type) */
-#define TSD_DATA_SLOWER							\
-    O(witness_tsd,              witness_tsd_t,		witness_tsdn_t)	\
-    MALLOC_TEST_TSD
-
-#define TSD_DATA_SLOWER_INITIALIZER					\
-    /* witness */		WITNESS_TSD_INITIALIZER			\
-    /* test data */		MALLOC_TEST_TSD_INITIALIZER
-
-
-#define TSD_INITIALIZER {						\
-    				TSD_DATA_SLOW_INITIALIZER		\
-    /* state */			ATOMIC_INIT(tsd_state_uninitialized),	\
-    				TSD_DATA_FAST_INITIALIZER		\
-    				TSD_DATA_SLOWER_INITIALIZER		\
-}
-
-#if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32)
-void _malloc_tsd_cleanup_register(bool (*f)(void));
-#endif
-
-void *malloc_tsd_malloc(size_t size);
-void malloc_tsd_dalloc(void *wrapper);
-tsd_t *malloc_tsd_boot0(void);
-void malloc_tsd_boot1(void);
-void tsd_cleanup(void *arg);
-tsd_t *tsd_fetch_slow(tsd_t *tsd, bool minimal);
-void tsd_state_set(tsd_t *tsd, uint8_t new_state);
-void tsd_slow_update(tsd_t *tsd);
-void tsd_prefork(tsd_t *tsd);
-void tsd_postfork_parent(tsd_t *tsd);
-void tsd_postfork_child(tsd_t *tsd);
-
-/*
- * Call ..._inc when your module wants to take all threads down the slow paths,
- * and ..._dec when it no longer needs to.
- */
-void tsd_global_slow_inc(tsdn_t *tsdn);
-void tsd_global_slow_dec(tsdn_t *tsdn);
-bool tsd_global_slow(void);
-
-#define TSD_MIN_INIT_STATE_MAX_FETCHED (128)
-
-enum {
-	/* Common case --> jnz. */
-	tsd_state_nominal = 0,
-	/* Initialized but on slow path. */
-	tsd_state_nominal_slow = 1,
-	/*
-	 * Some thread has changed global state in such a way that all nominal
-	 * threads need to recompute their fast / slow status the next time they
-	 * get a chance.
-	 *
-	 * Any thread can change another thread's status *to* recompute, but
-	 * threads are the only ones who can change their status *from*
-	 * recompute.
-	 */
-	tsd_state_nominal_recompute = 2,
-	/*
-	 * The above nominal states should be lower values.  We use
-	 * tsd_nominal_max to separate nominal states from threads in the
-	 * process of being born / dying.
-	 */
-	tsd_state_nominal_max = 2,
-
-	/*
-	 * A thread might free() during its death as its only allocator action;
-	 * in such scenarios, we need tsd, but set up in such a way that no
-	 * cleanup is necessary.
-	 */
-	tsd_state_minimal_initialized = 3,
-	/* States during which we know we're in thread death. */
-	tsd_state_purgatory = 4,
-	tsd_state_reincarnated = 5,
-	/*
-	 * What it says on the tin; tsd that hasn't been initialized.  Note
-	 * that even when the tsd struct lives in TLS, when need to keep track
-	 * of stuff like whether or not our pthread destructors have been
-	 * scheduled, so this really truly is different than the nominal state.
-	 */
-	tsd_state_uninitialized = 6
-};
-
-/*
- * Some TSD accesses can only be done in a nominal state.  To enforce this, we
- * wrap TSD member access in a function that asserts on TSD state, and mangle
- * field names to prevent touching them accidentally.
- */
-#define TSD_MANGLE(n) cant_access_tsd_items_directly_use_a_getter_or_setter_##n
-
-#ifdef JEMALLOC_U8_ATOMICS
-#  define tsd_state_t atomic_u8_t
-#  define tsd_atomic_load atomic_load_u8
-#  define tsd_atomic_store atomic_store_u8
-#  define tsd_atomic_exchange atomic_exchange_u8
-#else
-#  define tsd_state_t atomic_u32_t
-#  define tsd_atomic_load atomic_load_u32
-#  define tsd_atomic_store atomic_store_u32
-#  define tsd_atomic_exchange atomic_exchange_u32
-#endif
-
-/* The actual tsd. */
-struct tsd_s {
-	/*
-	 * The contents should be treated as totally opaque outside the tsd
-	 * module.  Access any thread-local state through the getters and
-	 * setters below.
-	 */
-
-#define O(n, t, nt)							\
-	t TSD_MANGLE(n);
-
-	TSD_DATA_SLOW
-	/*
-	 * We manually limit the state to just a single byte.  Unless the 8-bit
-	 * atomics are unavailable (which is rare).
-	 */
-	tsd_state_t state;
-	TSD_DATA_FAST
-	TSD_DATA_SLOWER
-#undef O
-};
-
-JEMALLOC_ALWAYS_INLINE uint8_t
-tsd_state_get(tsd_t *tsd) {
-	/*
-	 * This should be atomic.  Unfortunately, compilers right now can't tell
-	 * that this can be done as a memory comparison, and forces a load into
-	 * a register that hurts fast-path performance.
-	 */
-	/* return atomic_load_u8(&tsd->state, ATOMIC_RELAXED); */
-	return *(uint8_t *)&tsd->state;
-}
-
-/*
- * Wrapper around tsd_t that makes it possible to avoid implicit conversion
- * between tsd_t and tsdn_t, where tsdn_t is "nullable" and has to be
- * explicitly converted to tsd_t, which is non-nullable.
- */
-struct tsdn_s {
-	tsd_t tsd;
-};
-#define TSDN_NULL ((tsdn_t *)0)
-JEMALLOC_ALWAYS_INLINE tsdn_t *
-tsd_tsdn(tsd_t *tsd) {
-	return (tsdn_t *)tsd;
-}
-
-JEMALLOC_ALWAYS_INLINE bool
-tsdn_null(const tsdn_t *tsdn) {
-	return tsdn == NULL;
-}
-
-JEMALLOC_ALWAYS_INLINE tsd_t *
-tsdn_tsd(tsdn_t *tsdn) {
-	assert(!tsdn_null(tsdn));
-
-	return &tsdn->tsd;
-}
-
 /*
  * We put the platform-specific data declarations and inlines into their own
  * header files to avoid cluttering this file.  They define tsd_boot0,
diff --git a/include/jemalloc/internal/tsd_generic.h b/include/jemalloc/internal/tsd_generic.h
index a718472f..c5648f63 100644
--- a/include/jemalloc/internal/tsd_generic.h
+++ b/include/jemalloc/internal/tsd_generic.h
@@ -3,6 +3,8 @@
 #endif
 #define JEMALLOC_INTERNAL_TSD_GENERIC_H
 
+#include "jemalloc/internal/tsd_internals.h"
+
 typedef struct tsd_init_block_s tsd_init_block_t;
 struct tsd_init_block_s {
 	ql_elm(tsd_init_block_t) link;
diff --git a/include/jemalloc/internal/tsd_internals.h b/include/jemalloc/internal/tsd_internals.h
new file mode 100644
index 00000000..813580c0
--- /dev/null
+++ b/include/jemalloc/internal/tsd_internals.h
@@ -0,0 +1,303 @@
+#ifdef JEMALLOC_INTERNAL_TSD_INTERNALS_H
+#error This file should be included only once, by one of tsd_malloc_thread_cleanup.h, tsd_tls.h, tsd_generic.h, or tsd_win.h
+#endif
+#define JEMALLOC_INTERNAL_TSD_INTERNALS_H
+
+#include "jemalloc/internal/activity_callback.h"
+#include "jemalloc/internal/arena_types.h"
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/bin_types.h"
+#include "jemalloc/internal/jemalloc_internal_externs.h"
+#include "jemalloc/internal/peak.h"
+#include "jemalloc/internal/prof_types.h"
+#include "jemalloc/internal/ql.h"
+#include "jemalloc/internal/rtree_tsd.h"
+#include "jemalloc/internal/tcache_types.h"
+#include "jemalloc/internal/tcache_structs.h"
+#include "jemalloc/internal/util.h"
+#include "jemalloc/internal/witness.h"
+#include "jemalloc/internal/tsd_types.h"
+
+/*
+ * Thread-Specific-Data layout
+ *
+ * At least some thread-local data gets touched on the fast-path of almost all
+ * malloc operations.  But much of it is only necessary down slow-paths, or
+ * testing.  We want to colocate the fast-path data so that it can live on the
+ * same cacheline if possible.  So we define three tiers of hotness:
+ * TSD_DATA_FAST: Touched on the alloc/dalloc fast paths.
+ * TSD_DATA_SLOW: Touched down slow paths.  "Slow" here is sort of general;
+ *     there are "semi-slow" paths like "not a sized deallocation, but can still
+ *     live in the tcache".  We'll want to keep these closer to the fast-path
+ *     data.
+ * TSD_DATA_SLOWER: Only touched in test or debug modes, or not touched at all.
+ *
+ * An additional concern is that the larger tcache bins won't be used (we have a
+ * bin per size class, but by default only cache relatively small objects).  So
+ * the earlier bins are in the TSD_DATA_FAST tier, but the later ones are in the
+ * TSD_DATA_SLOWER tier.
+ *
+ * As a result of all this, we put the slow data first, then the fast data, then
+ * the slower data, while keeping the tcache as the last element of the fast
+ * data (so that the fast -> slower transition happens midway through the
+ * tcache).  While we don't yet play alignment tricks to guarantee it, this
+ * increases our odds of getting some cache/page locality on fast paths.
+ */
+
+#ifdef JEMALLOC_JET
+typedef void (*test_callback_t)(int *);
+#  define MALLOC_TSD_TEST_DATA_INIT 0x72b65c10
+#  define MALLOC_TEST_TSD \
+    O(test_data,		int,			int)		\
+    O(test_callback,		test_callback_t,	int)
+#  define MALLOC_TEST_TSD_INITIALIZER , MALLOC_TSD_TEST_DATA_INIT, NULL
+#else
+#  define MALLOC_TEST_TSD
+#  define MALLOC_TEST_TSD_INITIALIZER
+#endif
+
+typedef ql_elm(tsd_t) tsd_link_t;
+
+/*  O(name,			type,			nullable type) */
+#define TSD_DATA_SLOW							\
+    O(tcache_enabled,		bool,			bool)		\
+    O(reentrancy_level,		int8_t,			int8_t)		\
+    O(min_init_state_nfetched,		uint8_t,	uint8_t)	\
+    O(thread_allocated_last_event,	uint64_t,	uint64_t)	\
+    O(thread_allocated_next_event,	uint64_t,	uint64_t)	\
+    O(thread_deallocated_last_event,	uint64_t,	uint64_t)	\
+    O(thread_deallocated_next_event,	uint64_t,	uint64_t)	\
+    O(tcache_gc_event_wait,	uint64_t,		uint64_t)	\
+    O(tcache_gc_dalloc_event_wait,	uint64_t,	uint64_t)	\
+    O(prof_sample_event_wait,	uint64_t,		uint64_t)	\
+    O(prof_sample_last_event,	uint64_t,		uint64_t)	\
+    O(stats_interval_event_wait,	uint64_t,	uint64_t)	\
+    O(stats_interval_last_event,	uint64_t,	uint64_t)	\
+    O(peak_alloc_event_wait,	uint64_t,		uint64_t)	\
+    O(peak_dalloc_event_wait,	uint64_t,	uint64_t)		\
+    O(prof_tdata,		prof_tdata_t *,		prof_tdata_t *)	\
+    O(prng_state,		uint64_t,		uint64_t)	\
+    O(san_extents_until_guard_small,	uint64_t,	uint64_t)	\
+    O(san_extents_until_guard_large,	uint64_t,	uint64_t)	\
+    O(iarena,			arena_t *,		arena_t *)	\
+    O(arena,			arena_t *,		arena_t *)	\
+    O(arena_decay_ticker,	ticker_geom_t,		ticker_geom_t)	\
+    O(sec_shard,		uint8_t,		uint8_t)	\
+    O(binshards,		tsd_binshards_t,	tsd_binshards_t)\
+    O(tsd_link,			tsd_link_t,		tsd_link_t)	\
+    O(in_hook,			bool,			bool)		\
+    O(peak,			peak_t,			peak_t)		\
+    O(activity_callback_thunk,	activity_callback_thunk_t,		\
+	activity_callback_thunk_t)					\
+    O(tcache_slow,		tcache_slow_t,		tcache_slow_t)	\
+    O(rtree_ctx,		rtree_ctx_t,		rtree_ctx_t)
+
+#define TSD_DATA_SLOW_INITIALIZER					\
+    /* tcache_enabled */	TCACHE_ENABLED_ZERO_INITIALIZER,	\
+    /* reentrancy_level */	0,					\
+    /* min_init_state_nfetched */	0,				\
+    /* thread_allocated_last_event */	0,				\
+    /* thread_allocated_next_event */	0,				\
+    /* thread_deallocated_last_event */	0,				\
+    /* thread_deallocated_next_event */	0,				\
+    /* tcache_gc_event_wait */		0,				\
+    /* tcache_gc_dalloc_event_wait */	0,				\
+    /* prof_sample_event_wait */	0,				\
+    /* prof_sample_last_event */	0,				\
+    /* stats_interval_event_wait */	0,				\
+    /* stats_interval_last_event */	0,				\
+    /* peak_alloc_event_wait */		0,				\
+    /* peak_dalloc_event_wait */	0,				\
+    /* prof_tdata */		NULL,					\
+    /* prng_state */		0,					\
+    /* san_extents_until_guard_small */	0,				\
+    /* san_extents_until_guard_large */	0,				\
+    /* iarena */		NULL,					\
+    /* arena */			NULL,					\
+    /* arena_decay_ticker */						\
+	TICKER_GEOM_INIT(ARENA_DECAY_NTICKS_PER_UPDATE),		\
+    /* sec_shard */		(uint8_t)-1,				\
+    /* binshards */		TSD_BINSHARDS_ZERO_INITIALIZER,		\
+    /* tsd_link */		{NULL},					\
+    /* in_hook */		false,					\
+    /* peak */			PEAK_INITIALIZER,			\
+    /* activity_callback_thunk */					\
+	ACTIVITY_CALLBACK_THUNK_INITIALIZER,				\
+    /* tcache_slow */		TCACHE_SLOW_ZERO_INITIALIZER,		\
+    /* rtree_ctx */		RTREE_CTX_INITIALIZER,
+
+/*  O(name,			type,			nullable type) */
+#define TSD_DATA_FAST							\
+    O(thread_allocated,		uint64_t,		uint64_t)	\
+    O(thread_allocated_next_event_fast,	uint64_t,	uint64_t)	\
+    O(thread_deallocated,	uint64_t,		uint64_t)	\
+    O(thread_deallocated_next_event_fast, uint64_t,	uint64_t)	\
+    O(tcache,			tcache_t,		tcache_t)
+
+#define TSD_DATA_FAST_INITIALIZER					\
+    /* thread_allocated */	0,					\
+    /* thread_allocated_next_event_fast */ 0, 				\
+    /* thread_deallocated */	0,					\
+    /* thread_deallocated_next_event_fast */	0,			\
+    /* tcache */		TCACHE_ZERO_INITIALIZER,
+
+/*  O(name,			type,			nullable type) */
+#define TSD_DATA_SLOWER							\
+    O(witness_tsd,              witness_tsd_t,		witness_tsdn_t)	\
+    MALLOC_TEST_TSD
+
+#define TSD_DATA_SLOWER_INITIALIZER					\
+    /* witness */		WITNESS_TSD_INITIALIZER			\
+    /* test data */		MALLOC_TEST_TSD_INITIALIZER
+
+
+#define TSD_INITIALIZER {						\
+    				TSD_DATA_SLOW_INITIALIZER		\
+    /* state */			ATOMIC_INIT(tsd_state_uninitialized),	\
+    				TSD_DATA_FAST_INITIALIZER		\
+    				TSD_DATA_SLOWER_INITIALIZER		\
+}
+
+#if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32)
+void _malloc_tsd_cleanup_register(bool (*f)(void));
+#endif
+
+void *malloc_tsd_malloc(size_t size);
+void malloc_tsd_dalloc(void *wrapper);
+tsd_t *malloc_tsd_boot0(void);
+void malloc_tsd_boot1(void);
+void tsd_cleanup(void *arg);
+tsd_t *tsd_fetch_slow(tsd_t *tsd, bool minimal);
+void tsd_state_set(tsd_t *tsd, uint8_t new_state);
+void tsd_slow_update(tsd_t *tsd);
+void tsd_prefork(tsd_t *tsd);
+void tsd_postfork_parent(tsd_t *tsd);
+void tsd_postfork_child(tsd_t *tsd);
+
+/*
+ * Call ..._inc when your module wants to take all threads down the slow paths,
+ * and ..._dec when it no longer needs to.
+ */
+void tsd_global_slow_inc(tsdn_t *tsdn);
+void tsd_global_slow_dec(tsdn_t *tsdn);
+bool tsd_global_slow(void);
+
+#define TSD_MIN_INIT_STATE_MAX_FETCHED (128)
+
+enum {
+	/* Common case --> jnz. */
+	tsd_state_nominal = 0,
+	/* Initialized but on slow path. */
+	tsd_state_nominal_slow = 1,
+	/*
+	 * Some thread has changed global state in such a way that all nominal
+	 * threads need to recompute their fast / slow status the next time they
+	 * get a chance.
+	 *
+	 * Any thread can change another thread's status *to* recompute, but
+	 * threads are the only ones who can change their status *from*
+	 * recompute.
+	 */
+	tsd_state_nominal_recompute = 2,
+	/*
+	 * The above nominal states should be lower values.  We use
+	 * tsd_nominal_max to separate nominal states from threads in the
+	 * process of being born / dying.
+	 */
+	tsd_state_nominal_max = 2,
+
+	/*
+	 * A thread might free() during its death as its only allocator action;
+	 * in such scenarios, we need tsd, but set up in such a way that no
+	 * cleanup is necessary.
+	 */
+	tsd_state_minimal_initialized = 3,
+	/* States during which we know we're in thread death. */
+	tsd_state_purgatory = 4,
+	tsd_state_reincarnated = 5,
+	/*
+	 * What it says on the tin; tsd that hasn't been initialized.  Note
+	 * that even when the tsd struct lives in TLS, when need to keep track
+	 * of stuff like whether or not our pthread destructors have been
+	 * scheduled, so this really truly is different than the nominal state.
+	 */
+	tsd_state_uninitialized = 6
+};
+
+/*
+ * Some TSD accesses can only be done in a nominal state.  To enforce this, we
+ * wrap TSD member access in a function that asserts on TSD state, and mangle
+ * field names to prevent touching them accidentally.
+ */
+#define TSD_MANGLE(n) cant_access_tsd_items_directly_use_a_getter_or_setter_##n
+
+#ifdef JEMALLOC_U8_ATOMICS
+#  define tsd_state_t atomic_u8_t
+#  define tsd_atomic_load atomic_load_u8
+#  define tsd_atomic_store atomic_store_u8
+#  define tsd_atomic_exchange atomic_exchange_u8
+#else
+#  define tsd_state_t atomic_u32_t
+#  define tsd_atomic_load atomic_load_u32
+#  define tsd_atomic_store atomic_store_u32
+#  define tsd_atomic_exchange atomic_exchange_u32
+#endif
+
+/* The actual tsd. */
+struct tsd_s {
+	/*
+	 * The contents should be treated as totally opaque outside the tsd
+	 * module.  Access any thread-local state through the getters and
+	 * setters below.
+	 */
+
+#define O(n, t, nt)							\
+	t TSD_MANGLE(n);
+
+	TSD_DATA_SLOW
+	/*
+	 * We manually limit the state to just a single byte.  Unless the 8-bit
+	 * atomics are unavailable (which is rare).
+	 */
+	tsd_state_t state;
+	TSD_DATA_FAST
+	TSD_DATA_SLOWER
+#undef O
+};
+
+JEMALLOC_ALWAYS_INLINE uint8_t
+tsd_state_get(tsd_t *tsd) {
+	/*
+	 * This should be atomic.  Unfortunately, compilers right now can't tell
+	 * that this can be done as a memory comparison, and forces a load into
+	 * a register that hurts fast-path performance.
+	 */
+	/* return atomic_load_u8(&tsd->state, ATOMIC_RELAXED); */
+	return *(uint8_t *)&tsd->state;
+}
+
+/*
+ * Wrapper around tsd_t that makes it possible to avoid implicit conversion
+ * between tsd_t and tsdn_t, where tsdn_t is "nullable" and has to be
+ * explicitly converted to tsd_t, which is non-nullable.
+ */
+struct tsdn_s {
+	tsd_t tsd;
+};
+#define TSDN_NULL ((tsdn_t *)0)
+JEMALLOC_ALWAYS_INLINE tsdn_t *
+tsd_tsdn(tsd_t *tsd) {
+	return (tsdn_t *)tsd;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsdn_null(const tsdn_t *tsdn) {
+	return tsdn == NULL;
+}
+
+JEMALLOC_ALWAYS_INLINE tsd_t *
+tsdn_tsd(tsdn_t *tsdn) {
+	assert(!tsdn_null(tsdn));
+
+	return &tsdn->tsd;
+}
diff --git a/include/jemalloc/internal/tsd_malloc_thread_cleanup.h b/include/jemalloc/internal/tsd_malloc_thread_cleanup.h
index d8f3ef13..a6bd3f58 100644
--- a/include/jemalloc/internal/tsd_malloc_thread_cleanup.h
+++ b/include/jemalloc/internal/tsd_malloc_thread_cleanup.h
@@ -3,6 +3,8 @@
 #endif
 #define JEMALLOC_INTERNAL_TSD_MALLOC_THREAD_CLEANUP_H
 
+#include "jemalloc/internal/tsd_internals.h"
+
 #define JEMALLOC_TSD_TYPE_ATTR(type) __thread type JEMALLOC_TLS_MODEL
 
 extern JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls;
diff --git a/include/jemalloc/internal/tsd_tls.h b/include/jemalloc/internal/tsd_tls.h
index 7d6c805b..cd0ddbd1 100644
--- a/include/jemalloc/internal/tsd_tls.h
+++ b/include/jemalloc/internal/tsd_tls.h
@@ -3,6 +3,8 @@
 #endif
 #define JEMALLOC_INTERNAL_TSD_TLS_H
 
+#include "jemalloc/internal/tsd_internals.h"
+
 #define JEMALLOC_TSD_TYPE_ATTR(type) __thread type JEMALLOC_TLS_MODEL
 
 extern JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls;
diff --git a/include/jemalloc/internal/tsd_win.h b/include/jemalloc/internal/tsd_win.h
index a91dac88..4efeffb6 100644
--- a/include/jemalloc/internal/tsd_win.h
+++ b/include/jemalloc/internal/tsd_win.h
@@ -3,6 +3,8 @@
 #endif
 #define JEMALLOC_INTERNAL_TSD_WIN_H
 
+#include "jemalloc/internal/tsd_internals.h"
+
 typedef struct {
 	bool initialized;
 	tsd_t val;

From 41e0b857bef0b787a581c7a8334b46981d5e06ed Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Fri, 9 Jun 2023 17:37:47 -0700
Subject: [PATCH 103/395] Make headers self-contained by fixing `#include`s
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Header files are now self-contained, which makes the relationships
between the files clearer, and crucially allows LSP tools like `clangd`
to function correctly in all of our header files. I have verified that
the headers are self-contained (aside from the various Windows shims) by
compiling them as if they were C files – in a follow-up commit I plan to
add this to CI to ensure we don't regress on this front.
---
 include/jemalloc/internal/activity_callback.h           | 2 ++
 include/jemalloc/internal/arena_externs.h               | 3 +++
 include/jemalloc/internal/arena_inlines_a.h             | 3 +++
 include/jemalloc/internal/arena_inlines_b.h             | 8 ++++++++
 include/jemalloc/internal/arena_stats.h                 | 1 +
 include/jemalloc/internal/arena_structs.h               | 1 +
 include/jemalloc/internal/arena_types.h                 | 1 +
 include/jemalloc/internal/assert.h                      | 1 +
 include/jemalloc/internal/atomic.h                      | 4 +++-
 include/jemalloc/internal/atomic_c11.h                  | 1 +
 include/jemalloc/internal/atomic_gcc_atomic.h           | 5 +++++
 include/jemalloc/internal/atomic_gcc_sync.h             | 6 ++++++
 include/jemalloc/internal/atomic_msvc.h                 | 6 ++++++
 include/jemalloc/internal/background_thread_externs.h   | 5 +++++
 include/jemalloc/internal/background_thread_inlines.h   | 5 +++++
 include/jemalloc/internal/background_thread_structs.h   | 3 +++
 include/jemalloc/internal/base.h                        | 1 +
 include/jemalloc/internal/bin.h                         | 1 +
 include/jemalloc/internal/bin_info.h                    | 1 +
 include/jemalloc/internal/bin_stats.h                   | 1 +
 include/jemalloc/internal/bin_types.h                   | 1 +
 include/jemalloc/internal/bit_util.h                    | 1 +
 include/jemalloc/internal/bitmap.h                      | 1 +
 include/jemalloc/internal/buf_writer.h                  | 4 ++++
 include/jemalloc/internal/cache_bin.h                   | 2 ++
 include/jemalloc/internal/ckh.h                         | 1 +
 include/jemalloc/internal/counter.h                     | 2 ++
 include/jemalloc/internal/ctl.h                         | 4 ++++
 include/jemalloc/internal/decay.h                       | 2 ++
 include/jemalloc/internal/div.h                         | 1 +
 include/jemalloc/internal/ecache.h                      | 3 ++-
 include/jemalloc/internal/edata.h                       | 2 ++
 include/jemalloc/internal/edata_cache.h                 | 1 +
 include/jemalloc/internal/ehooks.h                      | 3 +++
 include/jemalloc/internal/emap.h                        | 1 +
 include/jemalloc/internal/emitter.h                     | 4 ++++
 include/jemalloc/internal/eset.h                        | 3 ++-
 include/jemalloc/internal/exp_grow.h                    | 2 ++
 include/jemalloc/internal/extent.h                      | 2 ++
 include/jemalloc/internal/extent_dss.h                  | 4 ++++
 include/jemalloc/internal/extent_mmap.h                 | 2 ++
 include/jemalloc/internal/fb.h                          | 4 ++++
 include/jemalloc/internal/fxp.h                         | 3 +++
 include/jemalloc/internal/hash.h                        | 1 +
 include/jemalloc/internal/hook.h                        | 1 +
 include/jemalloc/internal/hpa.h                         | 5 +++++
 include/jemalloc/internal/hpa_hooks.h                   | 3 +++
 include/jemalloc/internal/hpa_opts.h                    | 1 +
 include/jemalloc/internal/hpdata.h                      | 3 +++
 include/jemalloc/internal/inspect.h                     | 3 +++
 include/jemalloc/internal/jemalloc_internal_externs.h   | 3 ++-
 include/jemalloc/internal/jemalloc_internal_inlines_a.h | 4 ++++
 include/jemalloc/internal/jemalloc_internal_inlines_b.h | 3 +++
 include/jemalloc/internal/jemalloc_internal_inlines_c.h | 6 ++++--
 include/jemalloc/internal/jemalloc_preamble.h.in        | 2 +-
 include/jemalloc/internal/large_externs.h               | 2 ++
 include/jemalloc/internal/lockedint.h                   | 5 +++++
 include/jemalloc/internal/log.h                         | 1 +
 include/jemalloc/internal/malloc_io.h                   | 1 +
 include/jemalloc/internal/mpsc_queue.h                  | 1 +
 include/jemalloc/internal/mutex.h                       | 1 +
 include/jemalloc/internal/mutex_prof.h                  | 1 +
 include/jemalloc/internal/nstime.h                      | 3 +++
 include/jemalloc/internal/pa.h                          | 1 +
 include/jemalloc/internal/pac.h                         | 6 +++++-
 include/jemalloc/internal/pages.h                       | 2 ++
 include/jemalloc/internal/pai.h                         | 4 ++++
 include/jemalloc/internal/peak.h                        | 2 ++
 include/jemalloc/internal/peak_event.h                  | 3 +++
 include/jemalloc/internal/ph.h                          | 4 ++++
 include/jemalloc/internal/prng.h                        | 1 +
 include/jemalloc/internal/prof_data.h                   | 1 +
 include/jemalloc/internal/prof_externs.h                | 2 ++
 include/jemalloc/internal/prof_hook.h                   | 2 ++
 include/jemalloc/internal/prof_inlines.h                | 6 +++++-
 include/jemalloc/internal/prof_log.h                    | 1 +
 include/jemalloc/internal/prof_recent.h                 | 4 ++++
 include/jemalloc/internal/prof_stats.h                  | 3 +++
 include/jemalloc/internal/prof_structs.h                | 1 +
 include/jemalloc/internal/prof_sys.h                    | 4 ++++
 include/jemalloc/internal/psset.h                       | 1 +
 include/jemalloc/internal/ql.h                          | 1 +
 include/jemalloc/internal/rb.h                          | 3 +++
 include/jemalloc/internal/rtree.h                       | 3 +++
 include/jemalloc/internal/rtree_tsd.h                   | 2 ++
 include/jemalloc/internal/safety_check.h                | 6 ++++--
 include/jemalloc/internal/san.h                         | 3 +++
 include/jemalloc/internal/san_bump.h                    | 2 ++
 include/jemalloc/internal/sc.h                          | 1 +
 include/jemalloc/internal/sec.h                         | 4 ++++
 include/jemalloc/internal/sec_opts.h                    | 2 ++
 include/jemalloc/internal/seq.h                         | 1 +
 include/jemalloc/internal/slab_data.h                   | 1 +
 include/jemalloc/internal/spin.h                        | 2 ++
 include/jemalloc/internal/stats.h                       | 4 ++++
 include/jemalloc/internal/sz.h                          | 1 +
 include/jemalloc/internal/tcache_externs.h              | 6 ++++++
 include/jemalloc/internal/tcache_inlines.h              | 5 +++++
 include/jemalloc/internal/tcache_structs.h              | 3 ++-
 include/jemalloc/internal/tcache_types.h                | 1 +
 include/jemalloc/internal/test_hooks.h                  | 2 ++
 include/jemalloc/internal/thread_event.h                | 1 +
 include/jemalloc/internal/ticker.h                      | 1 +
 include/jemalloc/internal/tsd.h                         | 1 +
 include/jemalloc/internal/tsd_generic.h                 | 3 +++
 include/jemalloc/internal/tsd_internals.h               | 5 +++--
 include/jemalloc/internal/tsd_malloc_thread_cleanup.h   | 2 ++
 include/jemalloc/internal/tsd_tls.h                     | 2 ++
 include/jemalloc/internal/tsd_types.h                   | 2 ++
 include/jemalloc/internal/tsd_win.h                     | 2 ++
 include/jemalloc/internal/util.h                        | 3 +++
 include/jemalloc/internal/witness.h                     | 2 ++
 include/jemalloc/jemalloc_protos.h.in                   | 3 +++
 test/include/test/jemalloc_test.h.in                    | 9 +--------
 test/integration/MALLOCX_ARENA.c                        | 8 --------
 test/integration/allocated.c                            | 8 --------
 116 files changed, 277 insertions(+), 38 deletions(-)

diff --git a/include/jemalloc/internal/activity_callback.h b/include/jemalloc/internal/activity_callback.h
index 6c2e84e3..0f4f3962 100644
--- a/include/jemalloc/internal/activity_callback.h
+++ b/include/jemalloc/internal/activity_callback.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_ACTIVITY_CALLBACK_H
 #define JEMALLOC_INTERNAL_ACTIVITY_CALLBACK_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+
 /*
  * The callback to be executed "periodically", in response to some amount of
  * allocator activity.
diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 22d7fff7..d79b607a 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -1,8 +1,11 @@
 #ifndef JEMALLOC_INTERNAL_ARENA_EXTERNS_H
 #define JEMALLOC_INTERNAL_ARENA_EXTERNS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/arena_stats.h"
 #include "jemalloc/internal/bin.h"
 #include "jemalloc/internal/div.h"
+#include "jemalloc/internal/emap.h"
 #include "jemalloc/internal/extent_dss.h"
 #include "jemalloc/internal/hook.h"
 #include "jemalloc/internal/pages.h"
diff --git a/include/jemalloc/internal/arena_inlines_a.h b/include/jemalloc/internal/arena_inlines_a.h
index 8568358c..214ce80b 100644
--- a/include/jemalloc/internal/arena_inlines_a.h
+++ b/include/jemalloc/internal/arena_inlines_a.h
@@ -1,6 +1,9 @@
 #ifndef JEMALLOC_INTERNAL_ARENA_INLINES_A_H
 #define JEMALLOC_INTERNAL_ARENA_INLINES_A_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/arena_structs.h"
+
 static inline unsigned
 arena_ind_get(const arena_t *arena) {
 	return arena->ind;
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 420a62b2..c7d08227 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -1,14 +1,22 @@
 #ifndef JEMALLOC_INTERNAL_ARENA_INLINES_B_H
 #define JEMALLOC_INTERNAL_ARENA_INLINES_B_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/arena_externs.h"
+#include "jemalloc/internal/arena_structs.h"
 #include "jemalloc/internal/div.h"
 #include "jemalloc/internal/emap.h"
+#include "jemalloc/internal/jemalloc_internal_inlines_b.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
+#include "jemalloc/internal/large_externs.h"
 #include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/prof_externs.h"
+#include "jemalloc/internal/prof_structs.h"
 #include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/safety_check.h"
 #include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/sz.h"
+#include "jemalloc/internal/tcache_inlines.h"
 #include "jemalloc/internal/ticker.h"
 
 static inline arena_t *
diff --git a/include/jemalloc/internal/arena_stats.h b/include/jemalloc/internal/arena_stats.h
index 15f1d345..3407b023 100644
--- a/include/jemalloc/internal/arena_stats.h
+++ b/include/jemalloc/internal/arena_stats.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_ARENA_STATS_H
 #define JEMALLOC_INTERNAL_ARENA_STATS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/lockedint.h"
 #include "jemalloc/internal/mutex.h"
diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index e6868fce..0fffa7eb 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_ARENA_STRUCTS_H
 #define JEMALLOC_INTERNAL_ARENA_STRUCTS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/arena_stats.h"
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/bin.h"
diff --git a/include/jemalloc/internal/arena_types.h b/include/jemalloc/internal/arena_types.h
index 45eec69f..a1fc8926 100644
--- a/include/jemalloc/internal/arena_types.h
+++ b/include/jemalloc/internal/arena_types.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_ARENA_TYPES_H
 #define JEMALLOC_INTERNAL_ARENA_TYPES_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/sc.h"
 
 /* Default decay times in milliseconds. */
diff --git a/include/jemalloc/internal/assert.h b/include/jemalloc/internal/assert.h
index be4d45b3..38eb2a2c 100644
--- a/include/jemalloc/internal/assert.h
+++ b/include/jemalloc/internal/assert.h
@@ -1,3 +1,4 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/util.h"
 
diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h
index c0f73122..6dd2a7c6 100644
--- a/include/jemalloc/internal/atomic.h
+++ b/include/jemalloc/internal/atomic.h
@@ -1,7 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_ATOMIC_H
 #define JEMALLOC_INTERNAL_ATOMIC_H
 
-#define ATOMIC_INLINE JEMALLOC_ALWAYS_INLINE
+#include "jemalloc/internal/jemalloc_preamble.h"
 
 #define JEMALLOC_U8_ATOMICS
 #if defined(JEMALLOC_GCC_ATOMIC_ATOMICS)
@@ -22,6 +22,8 @@
 #  error "Don't have atomics implemented on this platform."
 #endif
 
+#define ATOMIC_INLINE JEMALLOC_ALWAYS_INLINE
+
 /*
  * This header gives more or less a backport of C11 atomics. The user can write
  * JEMALLOC_GENERATE_ATOMICS(type, short_type, lg_sizeof_type); to generate
diff --git a/include/jemalloc/internal/atomic_c11.h b/include/jemalloc/internal/atomic_c11.h
index a5f9313a..74173b03 100644
--- a/include/jemalloc/internal/atomic_c11.h
+++ b/include/jemalloc/internal/atomic_c11.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_ATOMIC_C11_H
 #define JEMALLOC_INTERNAL_ATOMIC_C11_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include <stdatomic.h>
 
 #define ATOMIC_INIT(...) ATOMIC_VAR_INIT(__VA_ARGS__)
diff --git a/include/jemalloc/internal/atomic_gcc_atomic.h b/include/jemalloc/internal/atomic_gcc_atomic.h
index 471515e8..0819fde1 100644
--- a/include/jemalloc/internal/atomic_gcc_atomic.h
+++ b/include/jemalloc/internal/atomic_gcc_atomic.h
@@ -1,8 +1,11 @@
 #ifndef JEMALLOC_INTERNAL_ATOMIC_GCC_ATOMIC_H
 #define JEMALLOC_INTERNAL_ATOMIC_GCC_ATOMIC_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/assert.h"
 
+#define ATOMIC_INLINE JEMALLOC_ALWAYS_INLINE
+
 #define ATOMIC_INIT(...) {__VA_ARGS__}
 
 typedef enum {
@@ -126,4 +129,6 @@ atomic_fetch_xor_##short_type(atomic_##short_type##_t *a, type val,	\
 	    atomic_enum_to_builtin(mo));				\
 }
 
+#undef ATOMIC_INLINE
+
 #endif /* JEMALLOC_INTERNAL_ATOMIC_GCC_ATOMIC_H */
diff --git a/include/jemalloc/internal/atomic_gcc_sync.h b/include/jemalloc/internal/atomic_gcc_sync.h
index e02b7cbe..21136bd0 100644
--- a/include/jemalloc/internal/atomic_gcc_sync.h
+++ b/include/jemalloc/internal/atomic_gcc_sync.h
@@ -1,6 +1,10 @@
 #ifndef JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H
 #define JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+
+#define ATOMIC_INLINE JEMALLOC_ALWAYS_INLINE
+
 #define ATOMIC_INIT(...) {__VA_ARGS__}
 
 typedef enum {
@@ -192,4 +196,6 @@ atomic_fetch_xor_##short_type(atomic_##short_type##_t *a, type val,	\
 	return __sync_fetch_and_xor(&a->repr, val);			\
 }
 
+#undef ATOMIC_INLINE
+
 #endif /* JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H */
diff --git a/include/jemalloc/internal/atomic_msvc.h b/include/jemalloc/internal/atomic_msvc.h
index 67057ce5..a429f1ab 100644
--- a/include/jemalloc/internal/atomic_msvc.h
+++ b/include/jemalloc/internal/atomic_msvc.h
@@ -1,6 +1,10 @@
 #ifndef JEMALLOC_INTERNAL_ATOMIC_MSVC_H
 #define JEMALLOC_INTERNAL_ATOMIC_MSVC_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+
+#define ATOMIC_INLINE JEMALLOC_ALWAYS_INLINE
+
 #define ATOMIC_INIT(...) {__VA_ARGS__}
 
 typedef enum {
@@ -155,4 +159,6 @@ atomic_fetch_xor_##short_type(atomic_##short_type##_t *a,		\
 	    &a->repr, (ATOMIC_INTERLOCKED_REPR(lg_size))val);		\
 }
 
+#undef ATOMIC_INLINE
+
 #endif /* JEMALLOC_INTERNAL_ATOMIC_MSVC_H */
diff --git a/include/jemalloc/internal/background_thread_externs.h b/include/jemalloc/internal/background_thread_externs.h
index 6ae3c8d8..0d34ee55 100644
--- a/include/jemalloc/internal/background_thread_externs.h
+++ b/include/jemalloc/internal/background_thread_externs.h
@@ -1,6 +1,11 @@
 #ifndef JEMALLOC_INTERNAL_BACKGROUND_THREAD_EXTERNS_H
 #define JEMALLOC_INTERNAL_BACKGROUND_THREAD_EXTERNS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/background_thread_structs.h"
+#include "jemalloc/internal/base.h"
+#include "jemalloc/internal/mutex.h"
+
 extern bool opt_background_thread;
 extern size_t opt_max_background_threads;
 extern malloc_mutex_t background_thread_lock;
diff --git a/include/jemalloc/internal/background_thread_inlines.h b/include/jemalloc/internal/background_thread_inlines.h
index 92c5febe..4ed05d1b 100644
--- a/include/jemalloc/internal/background_thread_inlines.h
+++ b/include/jemalloc/internal/background_thread_inlines.h
@@ -1,6 +1,11 @@
 #ifndef JEMALLOC_INTERNAL_BACKGROUND_THREAD_INLINES_H
 #define JEMALLOC_INTERNAL_BACKGROUND_THREAD_INLINES_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/arena_inlines_a.h"
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/background_thread_externs.h"
+
 JEMALLOC_ALWAYS_INLINE bool
 background_thread_enabled(void) {
 	return atomic_load_b(&background_thread_enabled_state, ATOMIC_RELAXED);
diff --git a/include/jemalloc/internal/background_thread_structs.h b/include/jemalloc/internal/background_thread_structs.h
index 83a91984..67b68797 100644
--- a/include/jemalloc/internal/background_thread_structs.h
+++ b/include/jemalloc/internal/background_thread_structs.h
@@ -1,6 +1,9 @@
 #ifndef JEMALLOC_INTERNAL_BACKGROUND_THREAD_STRUCTS_H
 #define JEMALLOC_INTERNAL_BACKGROUND_THREAD_STRUCTS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/mutex.h"
+
 /* This file really combines "structs" and "types", but only transitionally. */
 
 #if defined(JEMALLOC_BACKGROUND_THREAD) || defined(JEMALLOC_LAZY_LOCK)
diff --git a/include/jemalloc/internal/base.h b/include/jemalloc/internal/base.h
index 23207563..6b41aa6f 100644
--- a/include/jemalloc/internal/base.h
+++ b/include/jemalloc/internal/base.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_BASE_H
 #define JEMALLOC_INTERNAL_BASE_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/edata.h"
 #include "jemalloc/internal/ehooks.h"
 #include "jemalloc/internal/mutex.h"
diff --git a/include/jemalloc/internal/bin.h b/include/jemalloc/internal/bin.h
index 027af088..ed27c18f 100644
--- a/include/jemalloc/internal/bin.h
+++ b/include/jemalloc/internal/bin.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_BIN_H
 #define JEMALLOC_INTERNAL_BIN_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/bin_stats.h"
 #include "jemalloc/internal/bin_types.h"
 #include "jemalloc/internal/edata.h"
diff --git a/include/jemalloc/internal/bin_info.h b/include/jemalloc/internal/bin_info.h
index 7fe65c86..b6175550 100644
--- a/include/jemalloc/internal/bin_info.h
+++ b/include/jemalloc/internal/bin_info.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_BIN_INFO_H
 #define JEMALLOC_INTERNAL_BIN_INFO_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/bitmap.h"
 
 /*
diff --git a/include/jemalloc/internal/bin_stats.h b/include/jemalloc/internal/bin_stats.h
index 0b99297c..f95b9e9c 100644
--- a/include/jemalloc/internal/bin_stats.h
+++ b/include/jemalloc/internal/bin_stats.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_BIN_STATS_H
 #define JEMALLOC_INTERNAL_BIN_STATS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/mutex_prof.h"
 
 typedef struct bin_stats_s bin_stats_t;
diff --git a/include/jemalloc/internal/bin_types.h b/include/jemalloc/internal/bin_types.h
index 945e8326..5ec22dfd 100644
--- a/include/jemalloc/internal/bin_types.h
+++ b/include/jemalloc/internal/bin_types.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_BIN_TYPES_H
 #define JEMALLOC_INTERNAL_BIN_TYPES_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/sc.h"
 
 #define BIN_SHARDS_MAX (1 << EDATA_BITS_BINSHARD_WIDTH)
diff --git a/include/jemalloc/internal/bit_util.h b/include/jemalloc/internal/bit_util.h
index 70fa4bc9..c413a75d 100644
--- a/include/jemalloc/internal/bit_util.h
+++ b/include/jemalloc/internal/bit_util.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_BIT_UTIL_H
 #define JEMALLOC_INTERNAL_BIT_UTIL_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/assert.h"
 
 /* Sanity check. */
diff --git a/include/jemalloc/internal/bitmap.h b/include/jemalloc/internal/bitmap.h
index dc19454d..e501da47 100644
--- a/include/jemalloc/internal/bitmap.h
+++ b/include/jemalloc/internal/bitmap.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_BITMAP_H
 #define JEMALLOC_INTERNAL_BITMAP_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/bit_util.h"
 #include "jemalloc/internal/sc.h"
 
diff --git a/include/jemalloc/internal/buf_writer.h b/include/jemalloc/internal/buf_writer.h
index 37aa6de5..fa0ac99c 100644
--- a/include/jemalloc/internal/buf_writer.h
+++ b/include/jemalloc/internal/buf_writer.h
@@ -1,6 +1,10 @@
 #ifndef JEMALLOC_INTERNAL_BUF_WRITER_H
 #define JEMALLOC_INTERNAL_BUF_WRITER_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_types.h"
+#include "jemalloc/internal/tsd_types.h"
+
 /*
  * Note: when using the buffered writer, cbopaque is passed to write_cb only
  * when the buffer is flushed.  It would make a difference if cbopaque points
diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index c9c8f865..218e368e 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_CACHE_BIN_H
 #define JEMALLOC_INTERNAL_CACHE_BIN_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_externs.h"
 #include "jemalloc/internal/ql.h"
 #include "jemalloc/internal/safety_check.h"
 #include "jemalloc/internal/sz.h"
diff --git a/include/jemalloc/internal/ckh.h b/include/jemalloc/internal/ckh.h
index 7b3850bc..8e9d7fed 100644
--- a/include/jemalloc/internal/ckh.h
+++ b/include/jemalloc/internal/ckh.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_CKH_H
 #define JEMALLOC_INTERNAL_CKH_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/tsd.h"
 
 /* Cuckoo hashing implementation.  Skip to the end for the interface. */
diff --git a/include/jemalloc/internal/counter.h b/include/jemalloc/internal/counter.h
index 79abf064..74e30701 100644
--- a/include/jemalloc/internal/counter.h
+++ b/include/jemalloc/internal/counter.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_COUNTER_H
 #define JEMALLOC_INTERNAL_COUNTER_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/lockedint.h"
 #include "jemalloc/internal/mutex.h"
 
 typedef struct counter_accum_s {
diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index 63d27f8a..1d3e6140 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -1,6 +1,10 @@
 #ifndef JEMALLOC_INTERNAL_CTL_H
 #define JEMALLOC_INTERNAL_CTL_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/arena_stats.h"
+#include "jemalloc/internal/background_thread_structs.h"
+#include "jemalloc/internal/bin_stats.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/mutex_prof.h"
diff --git a/include/jemalloc/internal/decay.h b/include/jemalloc/internal/decay.h
index cf6a9d22..74be55da 100644
--- a/include/jemalloc/internal/decay.h
+++ b/include/jemalloc/internal/decay.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_DECAY_H
 #define JEMALLOC_INTERNAL_DECAY_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/smoothstep.h"
 
 #define DECAY_UNBOUNDED_TIME_TO_PURGE ((uint64_t)-1)
diff --git a/include/jemalloc/internal/div.h b/include/jemalloc/internal/div.h
index aebae939..56d5f463 100644
--- a/include/jemalloc/internal/div.h
+++ b/include/jemalloc/internal/div.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_DIV_H
 #define JEMALLOC_INTERNAL_DIV_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/assert.h"
 
 /*
diff --git a/include/jemalloc/internal/ecache.h b/include/jemalloc/internal/ecache.h
index 71cae3e3..2bd74fde 100644
--- a/include/jemalloc/internal/ecache.h
+++ b/include/jemalloc/internal/ecache.h
@@ -1,9 +1,10 @@
 #ifndef JEMALLOC_INTERNAL_ECACHE_H
 #define JEMALLOC_INTERNAL_ECACHE_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/eset.h"
-#include "jemalloc/internal/san.h"
 #include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/san.h"
 
 typedef struct ecache_s ecache_t;
 struct ecache_s {
diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index d2d16c46..5fe4e14d 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -1,12 +1,14 @@
 #ifndef JEMALLOC_INTERNAL_EDATA_H
 #define JEMALLOC_INTERNAL_EDATA_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/bin_info.h"
 #include "jemalloc/internal/bit_util.h"
 #include "jemalloc/internal/hpdata.h"
 #include "jemalloc/internal/nstime.h"
 #include "jemalloc/internal/ph.h"
+#include "jemalloc/internal/prof_types.h"
 #include "jemalloc/internal/ql.h"
 #include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/slab_data.h"
diff --git a/include/jemalloc/internal/edata_cache.h b/include/jemalloc/internal/edata_cache.h
index 8b6c0ef7..b2c7b4f1 100644
--- a/include/jemalloc/internal/edata_cache.h
+++ b/include/jemalloc/internal/edata_cache.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_EDATA_CACHE_H
 #define JEMALLOC_INTERNAL_EDATA_CACHE_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/base.h"
 
 /* For tests only. */
diff --git a/include/jemalloc/internal/ehooks.h b/include/jemalloc/internal/ehooks.h
index d583c521..947e056c 100644
--- a/include/jemalloc/internal/ehooks.h
+++ b/include/jemalloc/internal/ehooks.h
@@ -1,8 +1,11 @@
 #ifndef JEMALLOC_INTERNAL_EHOOKS_H
 #define JEMALLOC_INTERNAL_EHOOKS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/extent_mmap.h"
+#include "jemalloc/internal/tsd.h"
+#include "jemalloc/internal/tsd_types.h"
 
 /*
  * This module is the internal interface to the extent hooks (both
diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index 847af327..08262f1f 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_EMAP_H
 #define JEMALLOC_INTERNAL_EMAP_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/base.h"
 #include "jemalloc/internal/rtree.h"
 
diff --git a/include/jemalloc/internal/emitter.h b/include/jemalloc/internal/emitter.h
index 9482f68b..bc12fe92 100644
--- a/include/jemalloc/internal/emitter.h
+++ b/include/jemalloc/internal/emitter.h
@@ -1,6 +1,10 @@
 #ifndef JEMALLOC_INTERNAL_EMITTER_H
 #define JEMALLOC_INTERNAL_EMITTER_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/jemalloc_internal_types.h"
+#include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/ql.h"
 
 typedef enum emitter_output_e emitter_output_t;
diff --git a/include/jemalloc/internal/eset.h b/include/jemalloc/internal/eset.h
index 4f689b47..9b7c4a89 100644
--- a/include/jemalloc/internal/eset.h
+++ b/include/jemalloc/internal/eset.h
@@ -1,9 +1,10 @@
 #ifndef JEMALLOC_INTERNAL_ESET_H
 #define JEMALLOC_INTERNAL_ESET_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/atomic.h"
-#include "jemalloc/internal/fb.h"
 #include "jemalloc/internal/edata.h"
+#include "jemalloc/internal/fb.h"
 #include "jemalloc/internal/mutex.h"
 
 /*
diff --git a/include/jemalloc/internal/exp_grow.h b/include/jemalloc/internal/exp_grow.h
index 8566b8a4..40a1add0 100644
--- a/include/jemalloc/internal/exp_grow.h
+++ b/include/jemalloc/internal/exp_grow.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_EXP_GROW_H
 #define JEMALLOC_INTERNAL_EXP_GROW_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/sz.h"
 typedef struct exp_grow_s exp_grow_t;
 struct exp_grow_s {
 	/*
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index 367793db..17feb703 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -1,8 +1,10 @@
 #ifndef JEMALLOC_INTERNAL_EXTENT_H
 #define JEMALLOC_INTERNAL_EXTENT_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/ecache.h"
 #include "jemalloc/internal/ehooks.h"
+#include "jemalloc/internal/pac.h"
 #include "jemalloc/internal/ph.h"
 #include "jemalloc/internal/rtree.h"
 
diff --git a/include/jemalloc/internal/extent_dss.h b/include/jemalloc/internal/extent_dss.h
index 38f04340..c8e71e82 100644
--- a/include/jemalloc/internal/extent_dss.h
+++ b/include/jemalloc/internal/extent_dss.h
@@ -1,6 +1,10 @@
 #ifndef JEMALLOC_INTERNAL_EXTENT_DSS_H
 #define JEMALLOC_INTERNAL_EXTENT_DSS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/arena_types.h"
+#include "jemalloc/internal/tsd_types.h"
+
 typedef enum {
 	dss_prec_disabled  = 0,
 	dss_prec_primary   = 1,
diff --git a/include/jemalloc/internal/extent_mmap.h b/include/jemalloc/internal/extent_mmap.h
index 55f17ee4..e6a4649e 100644
--- a/include/jemalloc/internal/extent_mmap.h
+++ b/include/jemalloc/internal/extent_mmap.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_EXTENT_MMAP_EXTERNS_H
 #define JEMALLOC_INTERNAL_EXTENT_MMAP_EXTERNS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+
 extern bool opt_retain;
 
 void *extent_alloc_mmap(void *new_addr, size_t size, size_t alignment,
diff --git a/include/jemalloc/internal/fb.h b/include/jemalloc/internal/fb.h
index 90c4091f..e38095af 100644
--- a/include/jemalloc/internal/fb.h
+++ b/include/jemalloc/internal/fb.h
@@ -1,6 +1,10 @@
 #ifndef JEMALLOC_INTERNAL_FB_H
 #define JEMALLOC_INTERNAL_FB_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/bit_util.h"
+
 /*
  * The flat bitmap module.  This has a larger API relative to the bitmap module
  * (supporting things like backwards searches, and searching for both set and
diff --git a/include/jemalloc/internal/fxp.h b/include/jemalloc/internal/fxp.h
index 415a9828..e42425f9 100644
--- a/include/jemalloc/internal/fxp.h
+++ b/include/jemalloc/internal/fxp.h
@@ -1,6 +1,9 @@
 #ifndef JEMALLOC_INTERNAL_FXP_H
 #define JEMALLOC_INTERNAL_FXP_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/assert.h"
+
 /*
  * A simple fixed-point math implementation, supporting only unsigned values
  * (with overflow being an error).
diff --git a/include/jemalloc/internal/hash.h b/include/jemalloc/internal/hash.h
index 7f945679..15162b94 100644
--- a/include/jemalloc/internal/hash.h
+++ b/include/jemalloc/internal/hash.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_HASH_H
 #define JEMALLOC_INTERNAL_HASH_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/assert.h"
 
 /*
diff --git a/include/jemalloc/internal/hook.h b/include/jemalloc/internal/hook.h
index 27f94841..76b9130d 100644
--- a/include/jemalloc/internal/hook.h
+++ b/include/jemalloc/internal/hook.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_HOOK_H
 #define JEMALLOC_INTERNAL_HOOK_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/tsd.h"
 
 /*
diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index 01fe3166..4805efaf 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -1,9 +1,14 @@
 #ifndef JEMALLOC_INTERNAL_HPA_H
 #define JEMALLOC_INTERNAL_HPA_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/base.h"
+#include "jemalloc/internal/edata_cache.h"
+#include "jemalloc/internal/emap.h"
 #include "jemalloc/internal/exp_grow.h"
 #include "jemalloc/internal/hpa_hooks.h"
 #include "jemalloc/internal/hpa_opts.h"
+#include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/pai.h"
 #include "jemalloc/internal/psset.h"
 
diff --git a/include/jemalloc/internal/hpa_hooks.h b/include/jemalloc/internal/hpa_hooks.h
index 841f529e..72f3a43c 100644
--- a/include/jemalloc/internal/hpa_hooks.h
+++ b/include/jemalloc/internal/hpa_hooks.h
@@ -1,6 +1,9 @@
 #ifndef JEMALLOC_INTERNAL_HPA_HOOKS_H
 #define JEMALLOC_INTERNAL_HPA_HOOKS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/nstime.h"
+
 typedef struct hpa_hooks_s hpa_hooks_t;
 struct hpa_hooks_s {
 	void *(*map)(size_t size);
diff --git a/include/jemalloc/internal/hpa_opts.h b/include/jemalloc/internal/hpa_opts.h
index ee84fea1..6e58c86b 100644
--- a/include/jemalloc/internal/hpa_opts.h
+++ b/include/jemalloc/internal/hpa_opts.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_HPA_OPTS_H
 #define JEMALLOC_INTERNAL_HPA_OPTS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/fxp.h"
 
 /*
diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index 36918258..7ba92112 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -1,7 +1,10 @@
 #ifndef JEMALLOC_INTERNAL_HPDATA_H
 #define JEMALLOC_INTERNAL_HPDATA_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/fb.h"
+#include "jemalloc/internal/nstime.h"
+#include "jemalloc/internal/pages.h"
 #include "jemalloc/internal/ph.h"
 #include "jemalloc/internal/ql.h"
 #include "jemalloc/internal/typed_list.h"
diff --git a/include/jemalloc/internal/inspect.h b/include/jemalloc/internal/inspect.h
index 65fef51d..0da920ca 100644
--- a/include/jemalloc/internal/inspect.h
+++ b/include/jemalloc/internal/inspect.h
@@ -1,6 +1,9 @@
 #ifndef JEMALLOC_INTERNAL_INSPECT_H
 #define JEMALLOC_INTERNAL_INSPECT_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/tsd_types.h"
+
 /*
  * This module contains the heap introspection capabilities.  For now they are
  * exposed purely through mallctl APIs in the experimental namespace, but this
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index b1e5bde9..ae03c644 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -1,11 +1,12 @@
 #ifndef JEMALLOC_INTERNAL_EXTERNS_H
 #define JEMALLOC_INTERNAL_EXTERNS_H
 
+#include "jemalloc/internal/arena_types.h"
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/hpa_opts.h"
+#include "jemalloc/internal/nstime.h"
 #include "jemalloc/internal/sec_opts.h"
 #include "jemalloc/internal/tsd_types.h"
-#include "jemalloc/internal/nstime.h"
 
 /* TSD checks this to set thread local slow state accordingly. */
 extern bool malloc_slow;
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
index cb6d78fa..8d5e22fd 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -1,10 +1,14 @@
 #ifndef JEMALLOC_INTERNAL_INLINES_A_H
 #define JEMALLOC_INTERNAL_INLINES_A_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/arena_externs.h"
+#include "jemalloc/internal/arena_types.h"
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/bit_util.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/sc.h"
+#include "jemalloc/internal/tcache_externs.h"
 #include "jemalloc/internal/ticker.h"
 
 JEMALLOC_ALWAYS_INLINE malloc_cpuid_t
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_b.h b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
index 152f8a03..b2cab228 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_b.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
@@ -1,7 +1,10 @@
 #ifndef JEMALLOC_INTERNAL_INLINES_B_H
 #define JEMALLOC_INTERNAL_INLINES_B_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/arena_inlines_a.h"
 #include "jemalloc/internal/extent.h"
+#include "jemalloc/internal/jemalloc_internal_inlines_a.h"
 
 static inline void
 percpu_arena_update(tsd_t *tsd, unsigned cpu) {
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index ae9cb0c2..1dac668a 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -1,14 +1,16 @@
 #ifndef JEMALLOC_INTERNAL_INLINES_C_H
 #define JEMALLOC_INTERNAL_INLINES_C_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/arena_externs.h"
+#include "jemalloc/internal/arena_inlines_b.h"
+#include "jemalloc/internal/emap.h"
 #include "jemalloc/internal/hook.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/log.h"
 #include "jemalloc/internal/sz.h"
 #include "jemalloc/internal/thread_event.h"
 #include "jemalloc/internal/witness.h"
-#include "jemalloc/internal/arena_externs.h"
-#include "jemalloc/internal/emap.h"
 
 /*
  * These correspond to the macros in jemalloc/jemalloc_macros.h.  Broadly, we
diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in
index d7086302..6b55e47f 100644
--- a/include/jemalloc/internal/jemalloc_preamble.h.in
+++ b/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -1,7 +1,7 @@
 #ifndef JEMALLOC_PREAMBLE_H
 #define JEMALLOC_PREAMBLE_H
 
-#include "jemalloc_internal_defs.h"
+#include "jemalloc/internal/jemalloc_internal_defs.h"
 #include "jemalloc/internal/jemalloc_internal_decls.h"
 
 #if defined(JEMALLOC_UTRACE) || defined(JEMALLOC_UTRACE_LABEL)
diff --git a/include/jemalloc/internal/large_externs.h b/include/jemalloc/internal/large_externs.h
index 8e09122d..ce9c8689 100644
--- a/include/jemalloc/internal/large_externs.h
+++ b/include/jemalloc/internal/large_externs.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_LARGE_EXTERNS_H
 #define JEMALLOC_INTERNAL_LARGE_EXTERNS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/edata.h"
 #include "jemalloc/internal/hook.h"
 
 void *large_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero);
diff --git a/include/jemalloc/internal/lockedint.h b/include/jemalloc/internal/lockedint.h
index d020ebec..062dedbf 100644
--- a/include/jemalloc/internal/lockedint.h
+++ b/include/jemalloc/internal/lockedint.h
@@ -1,6 +1,11 @@
 #ifndef JEMALLOC_INTERNAL_LOCKEDINT_H
 #define JEMALLOC_INTERNAL_LOCKEDINT_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/tsd_types.h"
+
 /*
  * In those architectures that support 64-bit atomics, we use atomic updates for
  * our 64-bit values.  Otherwise, we use a plain uint64_t and synchronize
diff --git a/include/jemalloc/internal/log.h b/include/jemalloc/internal/log.h
index f39c598a..921985c8 100644
--- a/include/jemalloc/internal/log.h
+++ b/include/jemalloc/internal/log.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_LOG_H
 #define JEMALLOC_INTERNAL_LOG_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/mutex.h"
diff --git a/include/jemalloc/internal/malloc_io.h b/include/jemalloc/internal/malloc_io.h
index a375bdae..0afb0429 100644
--- a/include/jemalloc/internal/malloc_io.h
+++ b/include/jemalloc/internal/malloc_io.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_MALLOC_IO_H
 #define JEMALLOC_INTERNAL_MALLOC_IO_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
 
 #ifdef _WIN32
diff --git a/include/jemalloc/internal/mpsc_queue.h b/include/jemalloc/internal/mpsc_queue.h
index 316ea9b1..d8aa624b 100644
--- a/include/jemalloc/internal/mpsc_queue.h
+++ b/include/jemalloc/internal/mpsc_queue.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_MPSC_QUEUE_H
 #define JEMALLOC_INTERNAL_MPSC_QUEUE_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/atomic.h"
 
 /*
diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h
index 03d3557b..46f22aec 100644
--- a/include/jemalloc/internal/mutex.h
+++ b/include/jemalloc/internal/mutex.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_MUTEX_H
 #define JEMALLOC_INTERNAL_MUTEX_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/mutex_prof.h"
 #include "jemalloc/internal/tsd.h"
diff --git a/include/jemalloc/internal/mutex_prof.h b/include/jemalloc/internal/mutex_prof.h
index 4a526a5a..14e4340b 100644
--- a/include/jemalloc/internal/mutex_prof.h
+++ b/include/jemalloc/internal/mutex_prof.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_MUTEX_PROF_H
 #define JEMALLOC_INTERNAL_MUTEX_PROF_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/nstime.h"
 #include "jemalloc/internal/tsd_types.h"
diff --git a/include/jemalloc/internal/nstime.h b/include/jemalloc/internal/nstime.h
index ad1ae532..440a4d15 100644
--- a/include/jemalloc/internal/nstime.h
+++ b/include/jemalloc/internal/nstime.h
@@ -1,6 +1,9 @@
 #ifndef JEMALLOC_INTERNAL_NSTIME_H
 #define JEMALLOC_INTERNAL_NSTIME_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/assert.h"
+
 /* Maximum supported number of seconds (~584 years). */
 #define NSTIME_SEC_MAX KQU(18446744072)
 
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index c8aed932..5f43244d 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_PA_H
 #define JEMALLOC_INTERNAL_PA_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/base.h"
 #include "jemalloc/internal/decay.h"
 #include "jemalloc/internal/ecache.h"
diff --git a/include/jemalloc/internal/pac.h b/include/jemalloc/internal/pac.h
index 01c4e6af..0b173a58 100644
--- a/include/jemalloc/internal/pac.h
+++ b/include/jemalloc/internal/pac.h
@@ -1,11 +1,15 @@
 #ifndef JEMALLOC_INTERNAL_PAC_H
 #define JEMALLOC_INTERNAL_PAC_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/decay.h"
+#include "jemalloc/internal/ecache.h"
+#include "jemalloc/internal/edata_cache.h"
 #include "jemalloc/internal/exp_grow.h"
+#include "jemalloc/internal/lockedint.h"
 #include "jemalloc/internal/pai.h"
 #include "san_bump.h"
 
-
 /*
  * Page allocator classic; an implementation of the PAI interface that:
  * - Can be used for arenas with custom extent hooks.
diff --git a/include/jemalloc/internal/pages.h b/include/jemalloc/internal/pages.h
index c9d10ce2..0ecc2cd0 100644
--- a/include/jemalloc/internal/pages.h
+++ b/include/jemalloc/internal/pages.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_PAGES_EXTERNS_H
 #define JEMALLOC_INTERNAL_PAGES_EXTERNS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+
 /* Actual operating system page size, detected during bootstrap, <= PAGE. */
 extern size_t	os_page;
 
diff --git a/include/jemalloc/internal/pai.h b/include/jemalloc/internal/pai.h
index d978cd7d..dd64ee59 100644
--- a/include/jemalloc/internal/pai.h
+++ b/include/jemalloc/internal/pai.h
@@ -1,6 +1,10 @@
 #ifndef JEMALLOC_INTERNAL_PAI_H
 #define JEMALLOC_INTERNAL_PAI_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/edata.h"
+#include "jemalloc/internal/tsd_types.h"
+
 /* An interface for page allocation. */
 
 typedef struct pai_s pai_t;
diff --git a/include/jemalloc/internal/peak.h b/include/jemalloc/internal/peak.h
index 59da3e41..2a973cb8 100644
--- a/include/jemalloc/internal/peak.h
+++ b/include/jemalloc/internal/peak.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_PEAK_H
 #define JEMALLOC_INTERNAL_PEAK_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+
 typedef struct peak_s peak_t;
 struct peak_s {
 	/* The highest recorded peak value, after adjustment (see below). */
diff --git a/include/jemalloc/internal/peak_event.h b/include/jemalloc/internal/peak_event.h
index b808ce04..cc2a1401 100644
--- a/include/jemalloc/internal/peak_event.h
+++ b/include/jemalloc/internal/peak_event.h
@@ -1,6 +1,9 @@
 #ifndef JEMALLOC_INTERNAL_PEAK_EVENT_H
 #define JEMALLOC_INTERNAL_PEAK_EVENT_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/tsd_types.h"
+
 /*
  * While peak.h contains the simple helper struct that tracks state, this
  * contains the allocator tie-ins (and knows about tsd, the event module, etc.).
diff --git a/include/jemalloc/internal/ph.h b/include/jemalloc/internal/ph.h
index 89de8663..3ae38710 100644
--- a/include/jemalloc/internal/ph.h
+++ b/include/jemalloc/internal/ph.h
@@ -1,6 +1,10 @@
 #ifndef JEMALLOC_INTERNAL_PH_H
 #define JEMALLOC_INTERNAL_PH_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/bit_util.h"
+
 /*
  * A Pairing Heap implementation.
  *
diff --git a/include/jemalloc/internal/prng.h b/include/jemalloc/internal/prng.h
index 14542aa1..81060d32 100644
--- a/include/jemalloc/internal/prng.h
+++ b/include/jemalloc/internal/prng.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_PRNG_H
 #define JEMALLOC_INTERNAL_PRNG_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/bit_util.h"
 
 /*
diff --git a/include/jemalloc/internal/prof_data.h b/include/jemalloc/internal/prof_data.h
index 016b6507..43e8d7e7 100644
--- a/include/jemalloc/internal/prof_data.h
+++ b/include/jemalloc/internal/prof_data.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_PROF_DATA_H
 #define JEMALLOC_INTERNAL_PROF_DATA_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/mutex.h"
 
 extern malloc_mutex_t bt2gctx_mtx;
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index cce5c8f5..514c5804 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_PROF_EXTERNS_H
 #define JEMALLOC_INTERNAL_PROF_EXTERNS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/base.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/prof_hook.h"
 
diff --git a/include/jemalloc/internal/prof_hook.h b/include/jemalloc/internal/prof_hook.h
index 8615dc53..3c5ff8bf 100644
--- a/include/jemalloc/internal/prof_hook.h
+++ b/include/jemalloc/internal/prof_hook.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_PROF_HOOK_H
 #define JEMALLOC_INTERNAL_PROF_HOOK_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+
 /*
  * The hooks types of which are declared in this file are experimental and
  * undocumented, thus the typedefs are located in an 'internal' header.
diff --git a/include/jemalloc/internal/prof_inlines.h b/include/jemalloc/internal/prof_inlines.h
index c0783fc1..6cb73735 100644
--- a/include/jemalloc/internal/prof_inlines.h
+++ b/include/jemalloc/internal/prof_inlines.h
@@ -1,10 +1,14 @@
 #ifndef JEMALLOC_INTERNAL_PROF_INLINES_H
 #define JEMALLOC_INTERNAL_PROF_INLINES_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/arena_inlines_b.h"
+#include "jemalloc/internal/jemalloc_internal_inlines_c.h"
+#include "jemalloc/internal/prof_externs.h"
+#include "jemalloc/internal/prof_structs.h"
 #include "jemalloc/internal/safety_check.h"
 #include "jemalloc/internal/sz.h"
 #include "jemalloc/internal/thread_event.h"
-#include "jemalloc/internal/jemalloc_internal_inlines_c.h"
 
 JEMALLOC_ALWAYS_INLINE void
 prof_active_assert(void) {
diff --git a/include/jemalloc/internal/prof_log.h b/include/jemalloc/internal/prof_log.h
index ccb557dd..0b1271c8 100644
--- a/include/jemalloc/internal/prof_log.h
+++ b/include/jemalloc/internal/prof_log.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_PROF_LOG_H
 #define JEMALLOC_INTERNAL_PROF_LOG_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/mutex.h"
 
 extern malloc_mutex_t log_mtx;
diff --git a/include/jemalloc/internal/prof_recent.h b/include/jemalloc/internal/prof_recent.h
index 959e336b..33649e6d 100644
--- a/include/jemalloc/internal/prof_recent.h
+++ b/include/jemalloc/internal/prof_recent.h
@@ -1,6 +1,10 @@
 #ifndef JEMALLOC_INTERNAL_PROF_RECENT_H
 #define JEMALLOC_INTERNAL_PROF_RECENT_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/edata.h"
+#include "jemalloc/internal/mutex.h"
+
 extern malloc_mutex_t prof_recent_alloc_mtx;
 extern malloc_mutex_t prof_recent_dump_mtx;
 
diff --git a/include/jemalloc/internal/prof_stats.h b/include/jemalloc/internal/prof_stats.h
index 7954e82d..c4d269e5 100644
--- a/include/jemalloc/internal/prof_stats.h
+++ b/include/jemalloc/internal/prof_stats.h
@@ -1,6 +1,9 @@
 #ifndef JEMALLOC_INTERNAL_PROF_STATS_H
 #define JEMALLOC_INTERNAL_PROF_STATS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/mutex.h"
+
 typedef struct prof_stats_s prof_stats_t;
 struct prof_stats_s {
 	uint64_t req_sum;
diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h
index 49061f02..084a549d 100644
--- a/include/jemalloc/internal/prof_structs.h
+++ b/include/jemalloc/internal/prof_structs.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_PROF_STRUCTS_H
 #define JEMALLOC_INTERNAL_PROF_STRUCTS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/ckh.h"
 #include "jemalloc/internal/edata.h"
 #include "jemalloc/internal/mutex.h"
diff --git a/include/jemalloc/internal/prof_sys.h b/include/jemalloc/internal/prof_sys.h
index 0eb50788..e6e7f06f 100644
--- a/include/jemalloc/internal/prof_sys.h
+++ b/include/jemalloc/internal/prof_sys.h
@@ -1,6 +1,10 @@
 #ifndef JEMALLOC_INTERNAL_PROF_SYS_H
 #define JEMALLOC_INTERNAL_PROF_SYS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/base.h"
+#include "jemalloc/internal/mutex.h"
+
 extern malloc_mutex_t prof_dump_filename_mtx;
 extern base_t *prof_base;
 
diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
index e1d64970..7e510b7f 100644
--- a/include/jemalloc/internal/psset.h
+++ b/include/jemalloc/internal/psset.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_PSSET_H
 #define JEMALLOC_INTERNAL_PSSET_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/hpdata.h"
 
 /*
diff --git a/include/jemalloc/internal/ql.h b/include/jemalloc/internal/ql.h
index c7f52f86..ebe69988 100644
--- a/include/jemalloc/internal/ql.h
+++ b/include/jemalloc/internal/ql.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_QL_H
 #define JEMALLOC_INTERNAL_QL_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/qr.h"
 
 /*
diff --git a/include/jemalloc/internal/rb.h b/include/jemalloc/internal/rb.h
index 343e7c13..5f2771a9 100644
--- a/include/jemalloc/internal/rb.h
+++ b/include/jemalloc/internal/rb.h
@@ -1,6 +1,9 @@
 #ifndef JEMALLOC_INTERNAL_RB_H
 #define JEMALLOC_INTERNAL_RB_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/safety_check.h"
+
 /*-
  *******************************************************************************
  *
diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index 22f5f9dc..f559c94f 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -1,7 +1,10 @@
 #ifndef JEMALLOC_INTERNAL_RTREE_H
 #define JEMALLOC_INTERNAL_RTREE_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/base.h"
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/edata.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/rtree_tsd.h"
 #include "jemalloc/internal/sc.h"
diff --git a/include/jemalloc/internal/rtree_tsd.h b/include/jemalloc/internal/rtree_tsd.h
index e45525c5..59f18570 100644
--- a/include/jemalloc/internal/rtree_tsd.h
+++ b/include/jemalloc/internal/rtree_tsd.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_RTREE_CTX_H
 #define JEMALLOC_INTERNAL_RTREE_CTX_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+
 /*
  * Number of leafkey/leaf pairs to cache in L1 and L2 level respectively.  Each
  * entry supports an entire leaf, so the cache hit rate is typically high even
diff --git a/include/jemalloc/internal/safety_check.h b/include/jemalloc/internal/safety_check.h
index 7854c1bf..ef778dae 100644
--- a/include/jemalloc/internal/safety_check.h
+++ b/include/jemalloc/internal/safety_check.h
@@ -1,10 +1,12 @@
 #ifndef JEMALLOC_INTERNAL_SAFETY_CHECK_H
 #define JEMALLOC_INTERNAL_SAFETY_CHECK_H
 
-#define SAFETY_CHECK_DOUBLE_FREE_MAX_SCAN_DEFAULT 32
-
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/pages.h"
 
+#define SAFETY_CHECK_DOUBLE_FREE_MAX_SCAN_DEFAULT 32
+
 void safety_check_fail_sized_dealloc(bool current_dealloc, const void *ptr,
     size_t true_size, size_t input_size);
 void safety_check_fail(const char *format, ...);
diff --git a/include/jemalloc/internal/san.h b/include/jemalloc/internal/san.h
index 8813d6bb..79723965 100644
--- a/include/jemalloc/internal/san.h
+++ b/include/jemalloc/internal/san.h
@@ -1,8 +1,11 @@
 #ifndef JEMALLOC_INTERNAL_GUARD_H
 #define JEMALLOC_INTERNAL_GUARD_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/ehooks.h"
 #include "jemalloc/internal/emap.h"
+#include "jemalloc/internal/jemalloc_internal_externs.h"
+#include "jemalloc/internal/tsd.h"
 
 #define SAN_PAGE_GUARD PAGE
 #define SAN_PAGE_GUARDS_SIZE (SAN_PAGE_GUARD * 2)
diff --git a/include/jemalloc/internal/san_bump.h b/include/jemalloc/internal/san_bump.h
index 0a8e76e9..d6e9cfc5 100644
--- a/include/jemalloc/internal/san_bump.h
+++ b/include/jemalloc/internal/san_bump.h
@@ -1,9 +1,11 @@
 #ifndef JEMALLOC_INTERNAL_SAN_BUMP_H
 #define JEMALLOC_INTERNAL_SAN_BUMP_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/edata.h"
 #include "jemalloc/internal/exp_grow.h"
 #include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/witness.h"
 
 #define SBA_RETAINED_ALLOC_SIZE ((size_t)4 << 20)
 
diff --git a/include/jemalloc/internal/sc.h b/include/jemalloc/internal/sc.h
index 9bab347b..770835cc 100644
--- a/include/jemalloc/internal/sc.h
+++ b/include/jemalloc/internal/sc.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_SC_H
 #define JEMALLOC_INTERNAL_SC_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
 
 /*
diff --git a/include/jemalloc/internal/sec.h b/include/jemalloc/internal/sec.h
index fa863382..8ef1e9fb 100644
--- a/include/jemalloc/internal/sec.h
+++ b/include/jemalloc/internal/sec.h
@@ -1,8 +1,12 @@
 #ifndef JEMALLOC_INTERNAL_SEC_H
 #define JEMALLOC_INTERNAL_SEC_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/base.h"
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/pai.h"
+#include "jemalloc/internal/sec_opts.h"
 
 /*
  * Small extent cache.
diff --git a/include/jemalloc/internal/sec_opts.h b/include/jemalloc/internal/sec_opts.h
index a3ad72fb..19ed1492 100644
--- a/include/jemalloc/internal/sec_opts.h
+++ b/include/jemalloc/internal/sec_opts.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_SEC_OPTS_H
 #define JEMALLOC_INTERNAL_SEC_OPTS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+
 /*
  * The configuration settings used by an sec_t.  Morally, this is part of the
  * SEC interface, but we put it here for header-ordering reasons.
diff --git a/include/jemalloc/internal/seq.h b/include/jemalloc/internal/seq.h
index ef2df4c6..9bb6b235 100644
--- a/include/jemalloc/internal/seq.h
+++ b/include/jemalloc/internal/seq.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_SEQ_H
 #define JEMALLOC_INTERNAL_SEQ_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/atomic.h"
 
 /*
diff --git a/include/jemalloc/internal/slab_data.h b/include/jemalloc/internal/slab_data.h
index e821863d..724c71e3 100644
--- a/include/jemalloc/internal/slab_data.h
+++ b/include/jemalloc/internal/slab_data.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_SLAB_DATA_H
 #define JEMALLOC_INTERNAL_SLAB_DATA_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/bitmap.h"
 
 typedef struct slab_data_s slab_data_t;
diff --git a/include/jemalloc/internal/spin.h b/include/jemalloc/internal/spin.h
index 6940f15e..87c400d5 100644
--- a/include/jemalloc/internal/spin.h
+++ b/include/jemalloc/internal/spin.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_SPIN_H
 #define JEMALLOC_INTERNAL_SPIN_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+
 #define SPIN_INITIALIZER {0U}
 
 typedef struct {
diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h
index 727f7dcb..310178ea 100644
--- a/include/jemalloc/internal/stats.h
+++ b/include/jemalloc/internal/stats.h
@@ -1,6 +1,10 @@
 #ifndef JEMALLOC_INTERNAL_STATS_H
 #define JEMALLOC_INTERNAL_STATS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_types.h"
+#include "jemalloc/internal/tsd_types.h"
+
 /*  OPTION(opt,		var_name,	default,	set_value_to) */
 #define STATS_PRINT_OPTIONS						\
     OPTION('J',		json,		false,		true)		\
diff --git a/include/jemalloc/internal/sz.h b/include/jemalloc/internal/sz.h
index a799cea9..955d8ec0 100644
--- a/include/jemalloc/internal/sz.h
+++ b/include/jemalloc/internal/sz.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_SIZE_H
 #define JEMALLOC_INTERNAL_SIZE_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/bit_util.h"
 #include "jemalloc/internal/pages.h"
 #include "jemalloc/internal/sc.h"
diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index 37f61646..af6fd970 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -1,6 +1,12 @@
 #ifndef JEMALLOC_INTERNAL_TCACHE_EXTERNS_H
 #define JEMALLOC_INTERNAL_TCACHE_EXTERNS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/base.h"
+#include "jemalloc/internal/cache_bin.h"
+#include "jemalloc/internal/sz.h"
+#include "jemalloc/internal/tcache_types.h"
+
 extern bool opt_tcache;
 extern size_t opt_tcache_max;
 extern ssize_t	opt_lg_tcache_nslots_mul;
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 2b8db0a3..b69d89ad 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -1,11 +1,16 @@
 #ifndef JEMALLOC_INTERNAL_TCACHE_INLINES_H
 #define JEMALLOC_INTERNAL_TCACHE_INLINES_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/arena_externs.h"
 #include "jemalloc/internal/bin.h"
+#include "jemalloc/internal/jemalloc_internal_inlines_b.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
+#include "jemalloc/internal/large_externs.h"
 #include "jemalloc/internal/san.h"
 #include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/sz.h"
+#include "jemalloc/internal/tcache_externs.h"
 #include "jemalloc/internal/util.h"
 
 static inline bool
diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h
index 176d73de..75918158 100644
--- a/include/jemalloc/internal/tcache_structs.h
+++ b/include/jemalloc/internal/tcache_structs.h
@@ -1,11 +1,12 @@
 #ifndef JEMALLOC_INTERNAL_TCACHE_STRUCTS_H
 #define JEMALLOC_INTERNAL_TCACHE_STRUCTS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/cache_bin.h"
 #include "jemalloc/internal/ql.h"
 #include "jemalloc/internal/sc.h"
+#include "jemalloc/internal/tcache_types.h"
 #include "jemalloc/internal/ticker.h"
-#include "jemalloc/internal/tsd_types.h"
 
 /*
  * The tcache state is split into the slow and hot path data.  Each has a
diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h
index 583677ea..cea86fb7 100644
--- a/include/jemalloc/internal/tcache_types.h
+++ b/include/jemalloc/internal/tcache_types.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_TCACHE_TYPES_H
 #define JEMALLOC_INTERNAL_TCACHE_TYPES_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/sc.h"
 
 typedef struct tcache_slow_s tcache_slow_t;
diff --git a/include/jemalloc/internal/test_hooks.h b/include/jemalloc/internal/test_hooks.h
index 2b90afe1..af3f2755 100644
--- a/include/jemalloc/internal/test_hooks.h
+++ b/include/jemalloc/internal/test_hooks.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_TEST_HOOKS_H
 #define JEMALLOC_INTERNAL_TEST_HOOKS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+
 extern JEMALLOC_EXPORT void (*test_hooks_arena_new_hook)(void);
 extern JEMALLOC_EXPORT void (*test_hooks_libc_hook)(void);
 
diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h
index 2f4e1b39..46c57ed5 100644
--- a/include/jemalloc/internal/thread_event.h
+++ b/include/jemalloc/internal/thread_event.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_THREAD_EVENT_H
 #define JEMALLOC_INTERNAL_THREAD_EVENT_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/tsd.h"
 
 /* "te" is short for "thread_event" */
diff --git a/include/jemalloc/internal/ticker.h b/include/jemalloc/internal/ticker.h
index de034995..dca9bd10 100644
--- a/include/jemalloc/internal/ticker.h
+++ b/include/jemalloc/internal/ticker.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_TICKER_H
 #define JEMALLOC_INTERNAL_TICKER_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/prng.h"
 #include "jemalloc/internal/util.h"
 
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index e36ffc6d..4f22dcff 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -7,6 +7,7 @@
  * tsd_boot1, tsd_boot, tsd_booted_get, tsd_get_allocates, tsd_get, and tsd_set.
  */
 #ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/tsd_malloc_thread_cleanup.h"
 #elif (defined(JEMALLOC_TLS))
 #include "jemalloc/internal/tsd_tls.h"
diff --git a/include/jemalloc/internal/tsd_generic.h b/include/jemalloc/internal/tsd_generic.h
index c5648f63..aa8042a4 100644
--- a/include/jemalloc/internal/tsd_generic.h
+++ b/include/jemalloc/internal/tsd_generic.h
@@ -3,7 +3,10 @@
 #endif
 #define JEMALLOC_INTERNAL_TSD_GENERIC_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/ql.h"
 #include "jemalloc/internal/tsd_internals.h"
+#include "jemalloc/internal/tsd_types.h"
 
 typedef struct tsd_init_block_s tsd_init_block_t;
 struct tsd_init_block_s {
diff --git a/include/jemalloc/internal/tsd_internals.h b/include/jemalloc/internal/tsd_internals.h
index 813580c0..439f1d10 100644
--- a/include/jemalloc/internal/tsd_internals.h
+++ b/include/jemalloc/internal/tsd_internals.h
@@ -3,6 +3,7 @@
 #endif
 #define JEMALLOC_INTERNAL_TSD_INTERNALS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/activity_callback.h"
 #include "jemalloc/internal/arena_types.h"
 #include "jemalloc/internal/assert.h"
@@ -12,11 +13,11 @@
 #include "jemalloc/internal/prof_types.h"
 #include "jemalloc/internal/ql.h"
 #include "jemalloc/internal/rtree_tsd.h"
-#include "jemalloc/internal/tcache_types.h"
 #include "jemalloc/internal/tcache_structs.h"
+#include "jemalloc/internal/tcache_types.h"
+#include "jemalloc/internal/tsd_types.h"
 #include "jemalloc/internal/util.h"
 #include "jemalloc/internal/witness.h"
-#include "jemalloc/internal/tsd_types.h"
 
 /*
  * Thread-Specific-Data layout
diff --git a/include/jemalloc/internal/tsd_malloc_thread_cleanup.h b/include/jemalloc/internal/tsd_malloc_thread_cleanup.h
index a6bd3f58..fb9ea1b4 100644
--- a/include/jemalloc/internal/tsd_malloc_thread_cleanup.h
+++ b/include/jemalloc/internal/tsd_malloc_thread_cleanup.h
@@ -3,7 +3,9 @@
 #endif
 #define JEMALLOC_INTERNAL_TSD_MALLOC_THREAD_CLEANUP_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/tsd_internals.h"
+#include "jemalloc/internal/tsd_types.h"
 
 #define JEMALLOC_TSD_TYPE_ATTR(type) __thread type JEMALLOC_TLS_MODEL
 
diff --git a/include/jemalloc/internal/tsd_tls.h b/include/jemalloc/internal/tsd_tls.h
index cd0ddbd1..5e5a6e5e 100644
--- a/include/jemalloc/internal/tsd_tls.h
+++ b/include/jemalloc/internal/tsd_tls.h
@@ -3,7 +3,9 @@
 #endif
 #define JEMALLOC_INTERNAL_TSD_TLS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/tsd_internals.h"
+#include "jemalloc/internal/tsd_types.h"
 
 #define JEMALLOC_TSD_TYPE_ATTR(type) __thread type JEMALLOC_TLS_MODEL
 
diff --git a/include/jemalloc/internal/tsd_types.h b/include/jemalloc/internal/tsd_types.h
index a6ae37da..73bbe486 100644
--- a/include/jemalloc/internal/tsd_types.h
+++ b/include/jemalloc/internal/tsd_types.h
@@ -3,6 +3,8 @@
 
 #define MALLOC_TSD_CLEANUPS_MAX	4
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+
 typedef struct tsd_s tsd_t;
 typedef struct tsdn_s tsdn_t;
 typedef bool (*malloc_tsd_cleanup_t)(void);
diff --git a/include/jemalloc/internal/tsd_win.h b/include/jemalloc/internal/tsd_win.h
index 4efeffb6..8ec7eda7 100644
--- a/include/jemalloc/internal/tsd_win.h
+++ b/include/jemalloc/internal/tsd_win.h
@@ -3,7 +3,9 @@
 #endif
 #define JEMALLOC_INTERNAL_TSD_WIN_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/tsd_internals.h"
+#include "jemalloc/internal/tsd_types.h"
 
 typedef struct {
 	bool initialized;
diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h
index dcb1c0a5..939f3891 100644
--- a/include/jemalloc/internal/util.h
+++ b/include/jemalloc/internal/util.h
@@ -1,6 +1,9 @@
 #ifndef JEMALLOC_INTERNAL_UTIL_H
 #define JEMALLOC_INTERNAL_UTIL_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_types.h"
+
 #define UTIL_INLINE static inline
 
 /* Junk fill patterns. */
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index fbe5f943..937ca2d5 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_WITNESS_H
 #define JEMALLOC_INTERNAL_WITNESS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/ql.h"
 
 /******************************************************************************/
diff --git a/include/jemalloc/jemalloc_protos.h.in b/include/jemalloc/jemalloc_protos.h.in
index 3f9fc848..aff2d88f 100644
--- a/include/jemalloc/jemalloc_protos.h.in
+++ b/include/jemalloc/jemalloc_protos.h.in
@@ -1,3 +1,6 @@
+#include "jemalloc/jemalloc_defs.h"
+#include "jemalloc/jemalloc_macros.h"
+
 /*
  * The @je_@ prefix on the following public symbol declarations is an artifact
  * of namespace management, and should be omitted in application code unless
diff --git a/test/include/test/jemalloc_test.h.in b/test/include/test/jemalloc_test.h.in
index 600d993c..f9c506da 100644
--- a/test/include/test/jemalloc_test.h.in
+++ b/test/include/test/jemalloc_test.h.in
@@ -58,14 +58,7 @@ extern "C" {
 #  include "jemalloc/jemalloc@install_suffix@.h"
 #  include "jemalloc/internal/jemalloc_internal_defs.h"
 #  include "jemalloc/internal/jemalloc_internal_macros.h"
-
-static const bool config_debug =
-#ifdef JEMALLOC_DEBUG
-    true
-#else
-    false
-#endif
-    ;
+#  include "jemalloc/internal/jemalloc_preamble.h"
 
 #  define JEMALLOC_N(n) @private_namespace@##n
 #  include "jemalloc/internal/private_namespace.h"
diff --git a/test/integration/MALLOCX_ARENA.c b/test/integration/MALLOCX_ARENA.c
index 7e61df08..440ad9ef 100644
--- a/test/integration/MALLOCX_ARENA.c
+++ b/test/integration/MALLOCX_ARENA.c
@@ -2,14 +2,6 @@
 
 #define NTHREADS 10
 
-static bool have_dss =
-#ifdef JEMALLOC_DSS
-    true
-#else
-    false
-#endif
-    ;
-
 void *
 thd_start(void *arg) {
 	unsigned thread_ind = (unsigned)(uintptr_t)arg;
diff --git a/test/integration/allocated.c b/test/integration/allocated.c
index 0c64272c..967e0108 100644
--- a/test/integration/allocated.c
+++ b/test/integration/allocated.c
@@ -1,13 +1,5 @@
 #include "test/jemalloc_test.h"
 
-static const bool config_stats =
-#ifdef JEMALLOC_STATS
-    true
-#else
-    false
-#endif
-    ;
-
 void *
 thd_start(void *arg) {
 	int err;

From cdb2c0e02fc303fd56aa525ef63eb71136e62b2d Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Fri, 14 Jul 2023 13:14:06 -0700
Subject: [PATCH 104/395] Implement C23's `free_sized` and `free_aligned_sized`

[N2699 - Sized Memory Deallocation](https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2699.htm)
introduced two new functions which were incorporated into the C23
standard, `free_sized` and `free_aligned_sized`. Both already have
analogues in Jemalloc, all we are doing here is adding the appropriate
wrappers.
---
 configure.ac                                  | 12 +++++-
 doc/jemalloc.xml.in                           | 42 +++++++++++++++++++
 .../internal/jemalloc_internal_defs.h.in      |  2 +
 include/jemalloc/jemalloc_protos.h.in         |  3 ++
 src/jemalloc.c                                | 17 ++++++++
 5 files changed, 75 insertions(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index 687b221c..f820d14a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1105,7 +1105,7 @@ AC_ARG_WITH([export],
 fi]
 )
 
-public_syms="aligned_alloc calloc dallocx free mallctl mallctlbymib mallctlnametomib malloc malloc_conf malloc_conf_2_conf_harder malloc_message malloc_stats_print malloc_usable_size mallocx smallocx_${jemalloc_version_gid} nallocx posix_memalign rallocx realloc sallocx sdallocx xallocx"
+public_syms="aligned_alloc calloc dallocx free free_sized free_aligned_sized mallctl mallctlbymib mallctlnametomib malloc malloc_conf malloc_conf_2_conf_harder malloc_message malloc_stats_print malloc_usable_size mallocx smallocx_${jemalloc_version_gid} nallocx posix_memalign rallocx realloc sallocx sdallocx xallocx"
 dnl Check for additional platform-specific public API functions.
 AC_CHECK_FUNC([memalign],
 	      [AC_DEFINE([JEMALLOC_OVERRIDE_MEMALIGN], [ ], [ ])
@@ -1129,6 +1129,16 @@ if test "x${JEMALLOC_PREFIX}" = "x" ; then
   AC_CHECK_FUNC([__libc_free],
 		[AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_FREE], [ ], [ ])
 		 wrap_syms="${wrap_syms} __libc_free"])
+  dnl __libc_free_sized and __libc_free_aligned_sized are here speculatively
+  dnl under the assumption that glibc will eventually define symbols with these
+  dnl names. In the event glibc chooses different names for these symbols,
+  dnl these will need to be amended to match.
+  AC_CHECK_FUNC([__libc_free_sized],
+		[AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_FREE_SIZED], [ ], [ ])
+		 wrap_syms="${wrap_syms} __libc_free_sized"])
+  AC_CHECK_FUNC([__libc_free_aligned_sized],
+		[AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_FREE_ALIGNED_SIZED], [ ], [ ])
+		 wrap_syms="${wrap_syms} __libc_free_aligned_sized"])
   AC_CHECK_FUNC([__libc_malloc],
 		[AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_MALLOC], [ ], [ ])
 		 wrap_syms="${wrap_syms} __libc_malloc"])
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 98f86f95..bdebd433 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -33,6 +33,8 @@
     <refname>aligned_alloc</refname>
     <refname>realloc</refname>
     <refname>free</refname>
+    <refname>free_sized</refname>
+    <refname>free_aligned_sized</refname>
     <refname>mallocx</refname>
     <refname>rallocx</refname>
     <refname>xallocx</refname>
@@ -89,6 +91,17 @@
           <funcdef>void <function>free</function></funcdef>
           <paramdef>void *<parameter>ptr</parameter></paramdef>
         </funcprototype>
+        <funcprototype>
+          <funcdef>void <function>free_sized</function></funcdef>
+          <paramdef>void *<parameter>ptr</parameter></paramdef>
+          <paramdef>size_t <parameter>size</parameter></paramdef>
+        </funcprototype>
+        <funcprototype>
+          <funcdef>void <function>free_aligned_sized</function></funcdef>
+          <paramdef>void *<parameter>ptr</parameter></paramdef>
+          <paramdef>size_t <parameter>alignment</parameter></paramdef>
+          <paramdef>size_t <parameter>size</parameter></paramdef>
+        </funcprototype>
       </refsect2>
       <refsect2>
         <title>Non-standard API</title>
@@ -227,6 +240,17 @@
       allocated memory referenced by <parameter>ptr</parameter> to be made
       available for future allocations.  If <parameter>ptr</parameter> is
       <constant>NULL</constant>, no action occurs.</para>
+
+      <para>The <function>free_sized()</function> function is an extension of
+      <function>free()</function> with a <parameter>size</parameter> parameter
+      to allow the caller to pass in the allocation size as an optimization.
+      </para>
+
+      <para>The <function>free_aligned_sized()</function> function accepts a
+      <parameter>ptr</parameter> which was allocated with a requested
+      <parameter>size</parameter> and <parameter>alignment</parameter>, causing
+      the allocated memory referenced by <parameter>ptr</parameter> to be made
+      available for future allocations.</para>
     </refsect2>
     <refsect2>
       <title>Non-standard API</title>
@@ -451,6 +475,24 @@ for (i = 0; i < nbins; i++) {
       depended on, since such behavior is entirely implementation-dependent.
       </para>
     </refsect2>
+    <refsect2>
+      <title>Interactions Between the Standard and Non-standard APIs</title>
+      <para>Generally speaking it is permissible to pass pointers obtained from
+      the standard API to the non-standard API and vice versa (e.g. calling
+      <function>free()</function> with a pointer returned by a call to
+      <function>mallocx()</function>, calling <function>sdallocx()</function>
+      with a pointer returned by a call to <function>calloc()</function>).
+      There are however a few exceptions. In keeping with the C23 standard –
+      which forbids calling <function>free_sized()</function> on a pointer
+      returned by <function>aligned_alloc()</function>, mandating that either
+      <function>free_aligned_sized()</function> or <function>free()</function>
+      be used instead – using any combination of the standard and non-standard
+      APIs in an equivalent fashion (i.e. taking a pointer which was allocated
+      with an explicitly requested alignment and attempting to free it via an
+      API that accepts a size hint, without also providing the alignment hint)
+      is likewise forbidden.
+      </para>
+    </refsect2>
   </refsect1>
   <refsect1 id="tuning">
     <title>TUNING</title>
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index cce638d3..7498bc48 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -14,6 +14,8 @@
  */
 #undef JEMALLOC_OVERRIDE___LIBC_CALLOC
 #undef JEMALLOC_OVERRIDE___LIBC_FREE
+#undef JEMALLOC_OVERRIDE___LIBC_FREE_SIZED
+#undef JEMALLOC_OVERRIDE___LIBC_FREE_ALIGNED_SIZED
 #undef JEMALLOC_OVERRIDE___LIBC_MALLOC
 #undef JEMALLOC_OVERRIDE___LIBC_MEMALIGN
 #undef JEMALLOC_OVERRIDE___LIBC_REALLOC
diff --git a/include/jemalloc/jemalloc_protos.h.in b/include/jemalloc/jemalloc_protos.h.in
index aff2d88f..170493dd 100644
--- a/include/jemalloc/jemalloc_protos.h.in
+++ b/include/jemalloc/jemalloc_protos.h.in
@@ -28,6 +28,9 @@ JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
     JEMALLOC_CXX_THROW JEMALLOC_ALLOC_SIZE(2);
 JEMALLOC_EXPORT void JEMALLOC_SYS_NOTHROW	@je_@free(void *ptr)
     JEMALLOC_CXX_THROW;
+JEMALLOC_EXPORT void JEMALLOC_NOTHROW	@je_@free_sized(void *ptr, size_t size);
+JEMALLOC_EXPORT void JEMALLOC_NOTHROW	@je_@free_aligned_sized(
+    void *ptr, size_t alignment, size_t size);
 
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
     void JEMALLOC_NOTHROW	*@je_@mallocx(size_t size, int flags)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 8de30279..3961683a 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2990,6 +2990,16 @@ je_free(void *ptr) {
 	LOG("core.free.exit", "");
 }
 
+JEMALLOC_EXPORT void JEMALLOC_NOTHROW
+je_free_sized(void *ptr, size_t size) {
+	return je_sdallocx_noflags(ptr, size);
+}
+
+JEMALLOC_EXPORT void JEMALLOC_NOTHROW
+je_free_aligned_sized(void *ptr, size_t alignment, size_t size) {
+	return je_sdallocx(ptr, size, /* flags */ MALLOCX_ALIGN(alignment));
+}
+
 /*
  * End malloc(3)-compatible functions.
  */
@@ -3153,6 +3163,13 @@ void *__libc_calloc(size_t n, size_t size) PREALIAS(je_calloc);
 #    ifdef JEMALLOC_OVERRIDE___LIBC_FREE
 void __libc_free(void* ptr) PREALIAS(je_free);
 #    endif
+#    ifdef JEMALLOC_OVERRIDE___LIBC_FREE_SIZED
+void __libc_free_sized(void* ptr, size_t size) PREALIAS(je_free_sized);
+#    endif
+#    ifdef JEMALLOC_OVERRIDE___LIBC_FREE_ALIGNED_SIZED
+void __libc_free_aligned_sized(
+    void* ptr, size_t alignment, size_t size) PREALIAS(je_free_aligned_sized);
+#    endif
 #    ifdef JEMALLOC_OVERRIDE___LIBC_MALLOC
 void *__libc_malloc(size_t size) PREALIAS(je_malloc);
 #    endif

From c49c17f128cc757c6bd4d026af181f01e28f3b41 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Thu, 20 Jul 2023 11:30:59 -0700
Subject: [PATCH 105/395] Suppress verbose frame address warnings

These warnings are not useful, and make the output of some CI jobs
enormous and difficult to read, so let's suppress them.
---
 include/jemalloc/internal/jemalloc_internal_macros.h | 4 ++++
 src/prof_sys.c                                       | 3 +++
 2 files changed, 7 insertions(+)

diff --git a/include/jemalloc/internal/jemalloc_internal_macros.h b/include/jemalloc/internal/jemalloc_internal_macros.h
index e97b5f90..a08b7e7a 100644
--- a/include/jemalloc/internal/jemalloc_internal_macros.h
+++ b/include/jemalloc/internal/jemalloc_internal_macros.h
@@ -50,6 +50,7 @@
 #  define JEMALLOC_DIAGNOSTIC_POP __pragma(warning(pop))
 #  define JEMALLOC_DIAGNOSTIC_IGNORE(W) __pragma(warning(disable:W))
 #  define JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
+#  define JEMALLOC_DIAGNOSTIC_IGNORE_FRAME_ADDRESS
 #  define JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS
 #  define JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
 #  define JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
@@ -79,6 +80,8 @@
 #    define JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
 #  endif
 
+#  define JEMALLOC_DIAGNOSTIC_IGNORE_FRAME_ADDRESS  \
+     JEMALLOC_DIAGNOSTIC_IGNORE("-Wframe-address")
 #  define JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS  \
      JEMALLOC_DIAGNOSTIC_IGNORE("-Wtype-limits")
 #  define JEMALLOC_DIAGNOSTIC_IGNORE_UNUSED_PARAMETER \
@@ -97,6 +100,7 @@
 #  define JEMALLOC_DIAGNOSTIC_POP
 #  define JEMALLOC_DIAGNOSTIC_IGNORE(W)
 #  define JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
+#  define JEMALLOC_DIAGNOSTIC_IGNORE_FRAME_ADDRESS
 #  define JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS
 #  define JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
 #  define JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
diff --git a/src/prof_sys.c b/src/prof_sys.c
index 1f8ecb62..dbb4c80a 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -100,6 +100,8 @@ prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
 	_Unwind_Backtrace(prof_unwind_callback, &data);
 }
 #elif (defined(JEMALLOC_PROF_GCC))
+JEMALLOC_DIAGNOSTIC_PUSH
+JEMALLOC_DIAGNOSTIC_IGNORE_FRAME_ADDRESS
 static void
 prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
 /* The input arg must be a constant for __builtin_return_address. */
@@ -405,6 +407,7 @@ prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
 	BT_FRAME(254)
 	BT_FRAME(255)
 #undef BT_FRAME
+JEMALLOC_DIAGNOSTIC_POP
 }
 #else
 static void

From 7e54dd1ddb0953093fc640cca9a45897b33cf84d Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Fri, 21 Jul 2023 18:13:58 -0700
Subject: [PATCH 106/395] Define `PROF_TCTX_SENTINEL` instead of using magic
 numbers

This makes the code more readable on its own, and also sets the stage
for more cleanly handling the pointer provenance lints in a following
commit.
---
 include/jemalloc/internal/arena_inlines_b.h |  6 ++---
 include/jemalloc/internal/prof_inlines.h    | 25 ++++++++++++---------
 include/jemalloc/internal/prof_types.h      |  2 ++
 src/jemalloc.c                              |  8 +++----
 src/large.c                                 |  4 ++--
 src/prof.c                                  | 14 +++++++++---
 test/unit/prof_tctx.c                       |  4 ++--
 7 files changed, 39 insertions(+), 24 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index c7d08227..44a73373 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -104,15 +104,15 @@ arena_prof_info_get(tsd_t *tsd, const void *ptr, emap_alloc_ctx_t *alloc_ctx,
 		if (reset_recent &&
 		    large_dalloc_safety_checks(edata, ptr,
 		    edata_szind_get(edata))) {
-			prof_info->alloc_tctx = (prof_tctx_t *)(uintptr_t)1U;
+			prof_info->alloc_tctx = PROF_TCTX_SENTINEL;
 			return;
 		}
 		large_prof_info_get(tsd, edata, prof_info, reset_recent);
 	} else {
-		prof_info->alloc_tctx = (prof_tctx_t *)(uintptr_t)1U;
+		prof_info->alloc_tctx = PROF_TCTX_SENTINEL;
 		/*
 		 * No need to set other fields in prof_info; they will never be
-		 * accessed if (uintptr_t)alloc_tctx == (uintptr_t)1U.
+		 * accessed if alloc_tctx == PROF_TCTX_SENTINEL.
 		 */
 	}
 }
diff --git a/include/jemalloc/internal/prof_inlines.h b/include/jemalloc/internal/prof_inlines.h
index 6cb73735..75300ee4 100644
--- a/include/jemalloc/internal/prof_inlines.h
+++ b/include/jemalloc/internal/prof_inlines.h
@@ -106,6 +106,11 @@ prof_info_get_and_reset_recent(tsd_t *tsd, const void *ptr,
 	arena_prof_info_get(tsd, ptr, alloc_ctx, prof_info, true);
 }
 
+JEMALLOC_ALWAYS_INLINE bool
+prof_tctx_is_valid(const prof_tctx_t *tctx) {
+	return tctx != NULL && tctx != PROF_TCTX_SENTINEL;
+}
+
 JEMALLOC_ALWAYS_INLINE void
 prof_tctx_reset(tsd_t *tsd, const void *ptr, emap_alloc_ctx_t *alloc_ctx) {
 	cassert(config_prof);
@@ -126,7 +131,7 @@ JEMALLOC_ALWAYS_INLINE void
 prof_info_set(tsd_t *tsd, edata_t *edata, prof_tctx_t *tctx, size_t size) {
 	cassert(config_prof);
 	assert(edata != NULL);
-	assert((uintptr_t)tctx > (uintptr_t)1U);
+	assert(prof_tctx_is_valid(tctx));
 
 	arena_prof_info_set(tsd, edata, tctx, size);
 }
@@ -161,7 +166,7 @@ prof_alloc_prep(tsd_t *tsd, bool prof_active, bool sample_event) {
 
 	if (!prof_active ||
 	    likely(prof_sample_should_skip(tsd, sample_event))) {
-		ret = (prof_tctx_t *)(uintptr_t)1U;
+		ret = PROF_TCTX_SENTINEL;
 	} else {
 		ret = prof_tctx_create(tsd);
 	}
@@ -176,7 +181,7 @@ prof_malloc(tsd_t *tsd, const void *ptr, size_t size, size_t usize,
 	assert(ptr != NULL);
 	assert(usize == isalloc(tsd_tsdn(tsd), ptr));
 
-	if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) {
+	if (unlikely(prof_tctx_is_valid(tctx))) {
 		prof_malloc_sample_object(tsd, ptr, size, usize, tctx);
 	} else {
 		prof_tctx_reset(tsd, ptr, alloc_ctx);
@@ -190,7 +195,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t size, size_t usize,
 	bool sampled, old_sampled, moved;
 
 	cassert(config_prof);
-	assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U);
+	assert(ptr != NULL || !prof_tctx_is_valid(tctx));
 
 	if (prof_active && ptr != NULL) {
 		assert(usize == isalloc(tsd_tsdn(tsd), ptr));
@@ -203,12 +208,12 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t size, size_t usize,
 			 * sample threshold.
 			 */
 			prof_alloc_rollback(tsd, tctx);
-			tctx = (prof_tctx_t *)(uintptr_t)1U;
+			tctx = PROF_TCTX_SENTINEL;
 		}
 	}
 
-	sampled = ((uintptr_t)tctx > (uintptr_t)1U);
-	old_sampled = ((uintptr_t)old_prof_info->alloc_tctx > (uintptr_t)1U);
+	sampled = prof_tctx_is_valid(tctx);
+	old_sampled = prof_tctx_is_valid(old_prof_info->alloc_tctx);
 	moved = (ptr != old_ptr);
 
 	if (unlikely(sampled)) {
@@ -226,7 +231,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t size, size_t usize,
 	} else {
 		prof_info_t prof_info;
 		prof_info_get(tsd, ptr, NULL, &prof_info);
-		assert((uintptr_t)prof_info.alloc_tctx == (uintptr_t)1U);
+		assert(prof_info.alloc_tctx == PROF_TCTX_SENTINEL);
 	}
 
 	/*
@@ -258,7 +263,7 @@ JEMALLOC_ALWAYS_INLINE bool
 prof_sampled(tsd_t *tsd, const void *ptr) {
 	prof_info_t prof_info;
 	prof_info_get(tsd, ptr, NULL, &prof_info);
-	bool sampled = (uintptr_t)prof_info.alloc_tctx > (uintptr_t)1U;
+	bool sampled = prof_tctx_is_valid(prof_info.alloc_tctx);
 	if (sampled) {
 		assert(prof_sample_aligned(ptr));
 	}
@@ -274,7 +279,7 @@ prof_free(tsd_t *tsd, const void *ptr, size_t usize,
 	cassert(config_prof);
 	assert(usize == isalloc(tsd_tsdn(tsd), ptr));
 
-	if (unlikely((uintptr_t)prof_info.alloc_tctx > (uintptr_t)1U)) {
+	if (unlikely(prof_tctx_is_valid(prof_info.alloc_tctx))) {
 		assert(prof_sample_aligned(ptr));
 		prof_free_sampled_object(tsd, ptr, usize, &prof_info);
 	}
diff --git a/include/jemalloc/internal/prof_types.h b/include/jemalloc/internal/prof_types.h
index 046ea204..921b16fe 100644
--- a/include/jemalloc/internal/prof_types.h
+++ b/include/jemalloc/internal/prof_types.h
@@ -88,4 +88,6 @@ typedef struct prof_recent_s prof_recent_t;
 #define PROF_SAMPLE_ALIGNMENT PAGE
 #define PROF_SAMPLE_ALIGNMENT_MASK PAGE_MASK
 
+#define PROF_TCTX_SENTINEL ((prof_tctx_t *)((uintptr_t)1U))
+
 #endif /* JEMALLOC_INTERNAL_PROF_TYPES_H */
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 3961683a..a36b4974 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2529,12 +2529,12 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 		    sample_event);
 
 		emap_alloc_ctx_t alloc_ctx;
-		if (likely((uintptr_t)tctx == (uintptr_t)1U)) {
+		if (likely(tctx == PROF_TCTX_SENTINEL)) {
 			alloc_ctx.slab = sz_can_use_slab(usize);
 			allocation = imalloc_no_sample(
 			    sopts, dopts, tsd, usize, usize, ind,
 			    alloc_ctx.slab);
-		} else if ((uintptr_t)tctx > (uintptr_t)1U) {
+		} else if (tctx != NULL) {
 			allocation = imalloc_sample(
 			    sopts, dopts, tsd, usize, ind);
 			alloc_ctx.slab = false;
@@ -3366,7 +3366,7 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
 	bool sample_event = te_prof_sample_event_lookahead(tsd, usize);
 	prof_tctx_t *tctx = prof_alloc_prep(tsd, prof_active, sample_event);
 	void *p;
-	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
+	if (unlikely(tctx != PROF_TCTX_SENTINEL)) {
 		p = irallocx_prof_sample(tsd_tsdn(tsd), old_ptr, old_usize,
 		    usize, alignment, zero, tcache, arena, tctx, hook_args);
 	} else {
@@ -3612,7 +3612,7 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 	prof_tctx_t *tctx = prof_alloc_prep(tsd, prof_active, sample_event);
 
 	size_t usize;
-	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
+	if (unlikely(tctx != PROF_TCTX_SENTINEL)) {
 		usize = ixallocx_prof_sample(tsd_tsdn(tsd), ptr, old_usize,
 		    size, extra, alignment, zero, tctx);
 	} else {
diff --git a/src/large.c b/src/large.c
index 5fc4bf58..10fa652e 100644
--- a/src/large.c
+++ b/src/large.c
@@ -287,7 +287,7 @@ large_prof_info_get(tsd_t *tsd, edata_t *edata, prof_info_t *prof_info,
 	prof_tctx_t *alloc_tctx = edata_prof_tctx_get(edata);
 	prof_info->alloc_tctx = alloc_tctx;
 
-	if ((uintptr_t)alloc_tctx > (uintptr_t)1U) {
+	if (prof_tctx_is_valid(alloc_tctx)) {
 		nstime_copy(&prof_info->alloc_time,
 		    edata_prof_alloc_time_get(edata));
 		prof_info->alloc_size = edata_prof_alloc_size_get(edata);
@@ -308,7 +308,7 @@ large_prof_tctx_set(edata_t *edata, prof_tctx_t *tctx) {
 
 void
 large_prof_tctx_reset(edata_t *edata) {
-	large_prof_tctx_set(edata, (prof_tctx_t *)(uintptr_t)1U);
+	large_prof_tctx_set(edata, PROF_TCTX_SENTINEL);
 }
 
 void
diff --git a/src/prof.c b/src/prof.c
index 9986a329..52869375 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -91,11 +91,19 @@ prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx) {
 	cassert(config_prof);
 
 	if (tsd_reentrancy_level_get(tsd) > 0) {
-		assert((uintptr_t)tctx == (uintptr_t)1U);
+		assert(tctx == PROF_TCTX_SENTINEL);
 		return;
 	}
 
-	if ((uintptr_t)tctx > (uintptr_t)1U) {
+	if (prof_tctx_is_valid(tctx)) {
+		/*
+		 * This `assert` really shouldn't be necessary. It's here
+		 * because there's a bug in the clang static analyzer; it
+		 * somehow does not realize that by `prof_tctx_is_valid(tctx)`
+		 * being true that we've already ensured that `tctx` is not
+		 * `NULL`.
+		 */
+		assert(tctx != NULL);
 		malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
 		tctx->prepared = false;
 		prof_tctx_try_destroy(tsd, tctx);
@@ -169,7 +177,7 @@ prof_free_sampled_object(tsd_t *tsd, const void *ptr, size_t usize,
 
 	assert(prof_info != NULL);
 	prof_tctx_t *tctx = prof_info->alloc_tctx;
-	assert((uintptr_t)tctx > (uintptr_t)1U);
+	assert(prof_tctx_is_valid(tctx));
 
 	szind_t szind = sz_size2index(usize);
 
diff --git a/test/unit/prof_tctx.c b/test/unit/prof_tctx.c
index e0efdc36..d19dd395 100644
--- a/test/unit/prof_tctx.c
+++ b/test/unit/prof_tctx.c
@@ -18,7 +18,7 @@ TEST_BEGIN(test_prof_realloc) {
 	p = mallocx(1024, flags);
 	expect_ptr_not_null(p, "Unexpected mallocx() failure");
 	prof_info_get(tsd, p, NULL, &prof_info_p);
-	expect_ptr_ne(prof_info_p.alloc_tctx, (prof_tctx_t *)(uintptr_t)1U,
+	expect_ptr_ne(prof_info_p.alloc_tctx, PROF_TCTX_SENTINEL,
 	    "Expected valid tctx");
 	prof_cnt_all(&cnt_1);
 	expect_u64_eq(cnt_0.curobjs + 1, cnt_1.curobjs,
@@ -28,7 +28,7 @@ TEST_BEGIN(test_prof_realloc) {
 	expect_ptr_ne(p, q, "Expected move");
 	expect_ptr_not_null(p, "Unexpected rmallocx() failure");
 	prof_info_get(tsd, q, NULL, &prof_info_q);
-	expect_ptr_ne(prof_info_q.alloc_tctx, (prof_tctx_t *)(uintptr_t)1U,
+	expect_ptr_ne(prof_info_q.alloc_tctx, PROF_TCTX_SENTINEL,
 	    "Expected valid tctx");
 	prof_cnt_all(&cnt_2);
 	expect_u64_eq(cnt_1.curobjs, cnt_2.curobjs,

From 14311536959457d10e9307a580afeb0af1a8838b Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Mon, 24 Jul 2023 10:36:32 -0700
Subject: [PATCH 107/395] Define `SBRK_INVALID` instead of using a magic number

---
 src/extent_dss.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/extent_dss.c b/src/extent_dss.c
index 0b846296..f8bd8f60 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -8,6 +8,8 @@
 /******************************************************************************/
 /* Data. */
 
+#define SBRK_INVALID ((void *)-1)
+
 const char	*opt_dss = DSS_DEFAULT;
 
 const char	*const dss_prec_names[] = {
@@ -94,7 +96,7 @@ extent_dss_max_update(void *new_addr) {
 	 * up to date.
 	 */
 	void *max_cur = extent_dss_sbrk(0);
-	if (max_cur == (void *)-1) {
+	if (max_cur == SBRK_INVALID) {
 		return NULL;
 	}
 	atomic_store_p(&dss_max, max_cur, ATOMIC_RELEASE);
@@ -220,7 +222,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 			 * Failure, whether due to OOM or a race with a raw
 			 * sbrk() call from outside the allocator.
 			 */
-			if (dss_prev == (void *)-1) {
+			if (dss_prev == SBRK_INVALID) {
 				/* OOM. */
 				atomic_store_b(&dss_exhausted, true,
 				    ATOMIC_RELEASE);
@@ -270,7 +272,7 @@ extent_dss_boot(void) {
 
 	dss_base = extent_dss_sbrk(0);
 	atomic_store_b(&dss_extending, false, ATOMIC_RELAXED);
-	atomic_store_b(&dss_exhausted, dss_base == (void *)-1, ATOMIC_RELAXED);
+	atomic_store_b(&dss_exhausted, dss_base == SBRK_INVALID, ATOMIC_RELAXED);
 	atomic_store_p(&dss_max, dss_base, ATOMIC_RELAXED);
 }
 

From 4827bb17bdd5a25921c5b091ffadf3039d297b17 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Mon, 24 Jul 2023 10:38:42 -0700
Subject: [PATCH 108/395] Remove vestigial `TCACHE_STATE_*` macros

---
 include/jemalloc/internal/tcache_types.h | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h
index cea86fb7..50f1fbcd 100644
--- a/include/jemalloc/internal/tcache_types.h
+++ b/include/jemalloc/internal/tcache_types.h
@@ -8,16 +8,6 @@ typedef struct tcache_slow_s tcache_slow_t;
 typedef struct tcache_s tcache_t;
 typedef struct tcaches_s tcaches_t;
 
-/*
- * tcache pointers close to NULL are used to encode state information that is
- * used for two purposes: preventing thread caching on a per thread basis and
- * cleaning up during thread shutdown.
- */
-#define TCACHE_STATE_DISABLED		((tcache_t *)(uintptr_t)1)
-#define TCACHE_STATE_REINCARNATED	((tcache_t *)(uintptr_t)2)
-#define TCACHE_STATE_PURGATORY		((tcache_t *)(uintptr_t)3)
-#define TCACHE_STATE_MAX		TCACHE_STATE_PURGATORY
-
 /* Used in TSD static initializer only. Real init in tsd_tcache_data_init(). */
 #define TCACHE_ZERO_INITIALIZER {0}
 #define TCACHE_SLOW_ZERO_INITIALIZER {0}

From 3e82f357bb218194df5ba1acee39cd6a7d6fe6f6 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Mon, 24 Jul 2023 10:33:36 -0700
Subject: [PATCH 109/395] Fix all optimization-inhibiting integer-to-pointer
 casts

Following from PR #2481, we replace all integer-to-pointer casts [which
hide pointer provenance information (and thus inhibit
optimizations)](https://clang.llvm.org/extra/clang-tidy/checks/performance/no-int-to-ptr.html)
with equivalent operations that preserve this information. I have
enabled the corresponding clang-tidy check in our static analysis CI so
that we do not get bitten by this again in the future.
---
 include/jemalloc/internal/arena_inlines_b.h   |  4 +-
 include/jemalloc/internal/cache_bin.h         |  4 +-
 include/jemalloc/internal/edata.h             |  6 +--
 .../internal/jemalloc_internal_decls.h        | 17 +++++++++
 .../internal/jemalloc_internal_types.h        | 16 +++++++-
 include/jemalloc/internal/pages.h             |  5 ++-
 include/jemalloc/internal/prof_types.h        |  1 +
 include/jemalloc/internal/rtree.h             |  5 +++
 include/jemalloc/internal/safety_check.h      |  2 +-
 include/jemalloc/internal/san.h               |  4 +-
 include/jemalloc/internal/tcache_types.h      |  1 +
 include/jemalloc/internal/util.h              |  4 +-
 scripts/run_static_analysis.sh                |  3 +-
 src/arena.c                                   |  3 +-
 src/background_thread.c                       |  2 +
 src/base.c                                    |  6 +--
 src/cache_bin.c                               |  8 ++--
 src/ehooks.c                                  |  8 ++--
 src/extent.c                                  |  4 +-
 src/extent_dss.c                              | 11 +++---
 src/hpdata.c                                  |  4 +-
 src/jemalloc.c                                |  4 +-
 src/large.c                                   |  6 +--
 src/pages.c                                   |  4 +-
 src/prof_data.c                               |  8 +++-
 src/san.c                                     | 38 +++++++++----------
 src/tcache.c                                  |  4 +-
 27 files changed, 116 insertions(+), 66 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 44a73373..b1cd84b4 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -513,7 +513,7 @@ arena_cache_oblivious_randomize(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
 		}
 		uintptr_t random_offset = ((uintptr_t)r) << (LG_PAGE -
 		    lg_range);
-		edata->e_addr = (void *)((uintptr_t)edata->e_addr +
+		edata->e_addr = (void *)((byte_t *)edata->e_addr +
 		    random_offset);
 		assert(ALIGNMENT_ADDR2BASE(edata->e_addr, alignment) ==
 		    edata->e_addr);
@@ -599,7 +599,7 @@ arena_dalloc_bin_locked_finish(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
 
 static inline bin_t *
 arena_get_bin(arena_t *arena, szind_t binind, unsigned binshard) {
-	bin_t *shard0 = (bin_t *)((uintptr_t)arena + arena_bin_offsets[binind]);
+	bin_t *shard0 = (bin_t *)((byte_t *)arena + arena_bin_offsets[binind]);
 	return shard0 + binshard;
 }
 
diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 218e368e..2e95c33c 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -247,7 +247,7 @@ static inline void **
 cache_bin_empty_position_get(cache_bin_t *bin) {
 	cache_bin_sz_t diff = cache_bin_diff(bin,
 	    (uint16_t)(uintptr_t)bin->stack_head, bin->low_bits_empty);
-	uintptr_t empty_bits = (uintptr_t)bin->stack_head + diff;
+	byte_t *empty_bits = (byte_t *)bin->stack_head + diff;
 	void **ret = (void **)empty_bits;
 
 	assert(ret >= bin->stack_head);
@@ -479,7 +479,7 @@ cache_bin_stash(cache_bin_t *bin, void *ptr) {
 	uint16_t low_bits_head = (uint16_t)(uintptr_t)bin->stack_head;
 	/* Wraparound handled as well. */
 	uint16_t diff = cache_bin_diff(bin, bin->low_bits_full, low_bits_head);
-	*(void **)((uintptr_t)bin->stack_head - diff) = ptr;
+	*(void **)((byte_t *)bin->stack_head - diff) = ptr;
 
 	assert(!cache_bin_full(bin));
 	bin->low_bits_full += sizeof(void *);
diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index 5fe4e14d..baf5187f 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -377,18 +377,18 @@ edata_ps_get(const edata_t *edata) {
 
 static inline void *
 edata_before_get(const edata_t *edata) {
-	return (void *)((uintptr_t)edata_base_get(edata) - PAGE);
+	return (void *)((byte_t *)edata_base_get(edata) - PAGE);
 }
 
 static inline void *
 edata_last_get(const edata_t *edata) {
-	return (void *)((uintptr_t)edata_base_get(edata) +
+	return (void *)((byte_t *)edata_base_get(edata) +
 	    edata_size_get(edata) - PAGE);
 }
 
 static inline void *
 edata_past_get(const edata_t *edata) {
-	return (void *)((uintptr_t)edata_base_get(edata) +
+	return (void *)((byte_t *)edata_base_get(edata) +
 	    edata_size_get(edata));
 }
 
diff --git a/include/jemalloc/internal/jemalloc_internal_decls.h b/include/jemalloc/internal/jemalloc_internal_decls.h
index 77ba1c9a..0bca9133 100644
--- a/include/jemalloc/internal/jemalloc_internal_decls.h
+++ b/include/jemalloc/internal/jemalloc_internal_decls.h
@@ -105,4 +105,21 @@ isblank(int c) {
 #  undef small
 #endif
 
+/*
+ * Oftentimes we'd like to perform some kind of arithmetic to obtain
+ * a pointer from another pointer but with some offset or mask applied.
+ * Naively you would accomplish this by casting the source pointer to
+ * `uintptr_t`, performing all of the relevant arithmetic, and then casting
+ * the result to the desired pointer type. However, this has the unfortunate
+ * side-effect of concealing pointer provenance, hiding useful information for
+ * optimization from the compiler (see here for details:
+ * https://clang.llvm.org/extra/clang-tidy/checks/performance/no-int-to-ptr.html
+ * )
+ * Instead what one should do is cast the source pointer to `char *` and perform
+ * the equivalent arithmetic (since `char` of course represents one byte). But
+ * because `char *` has the semantic meaning of "string", we define this typedef
+ * simply to make it clearer where we are performing such pointer arithmetic.
+ */
+typedef char byte_t;
+
 #endif /* JEMALLOC_INTERNAL_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_types.h b/include/jemalloc/internal/jemalloc_internal_types.h
index b1c48be9..4ab5a0cf 100644
--- a/include/jemalloc/internal/jemalloc_internal_types.h
+++ b/include/jemalloc/internal/jemalloc_internal_types.h
@@ -99,7 +99,8 @@ typedef enum malloc_init_e malloc_init_t;
 
 /* Return the nearest aligned address at or below a. */
 #define ALIGNMENT_ADDR2BASE(a, alignment)				\
-	((void *)((uintptr_t)(a) & ((~(alignment)) + 1)))
+	((void *)(((byte_t *)(a)) - (((uintptr_t)(a)) -			\
+	    ((uintptr_t)(a) & ((~(alignment)) + 1)))))
 
 /* Return the offset between a and the nearest aligned address at or below a. */
 #define ALIGNMENT_ADDR2OFFSET(a, alignment)				\
@@ -109,6 +110,19 @@ typedef enum malloc_init_e malloc_init_t;
 #define ALIGNMENT_CEILING(s, alignment)					\
 	(((s) + (alignment - 1)) & ((~(alignment)) + 1))
 
+/*
+ * Return the nearest aligned address at or above a.
+ *
+ * While at first glance this would appear to be merely a more complicated
+ * way to perform the same computation as `ALIGNMENT_CEILING`,
+ * this has the important additional property of not concealing pointer
+ * provenance from the compiler. See the block-comment on the
+ * definition of `byte_t` for more details.
+ */
+#define ALIGNMENT_ADDR2CEILING(a, alignment)				\
+	((void *)(((byte_t *)(a)) + (((((uintptr_t)(a)) +		\
+	    (alignment - 1)) & ((~(alignment)) + 1)) - ((uintptr_t)(a)))))
+
 /* Declare a variable-length array. */
 #if __STDC_VERSION__ < 199901L || defined(__STDC_NO_VLA__)
 #  ifdef _MSC_VER
diff --git a/include/jemalloc/internal/pages.h b/include/jemalloc/internal/pages.h
index 0ecc2cd0..b4e9678e 100644
--- a/include/jemalloc/internal/pages.h
+++ b/include/jemalloc/internal/pages.h
@@ -2,6 +2,7 @@
 #define JEMALLOC_INTERNAL_PAGES_EXTERNS_H
 
 #include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_types.h"
 
 /* Actual operating system page size, detected during bootstrap, <= PAGE. */
 extern size_t	os_page;
@@ -14,7 +15,7 @@ extern size_t	os_page;
 #define PAGE_MASK	((size_t)(PAGE - 1))
 /* Return the page base address for the page containing address a. */
 #define PAGE_ADDR2BASE(a)						\
-	((void *)((uintptr_t)(a) & ~PAGE_MASK))
+	ALIGNMENT_ADDR2BASE(a, PAGE)
 /* Return the smallest pagesize multiple that is >= s. */
 #define PAGE_CEILING(s)							\
 	(((s) + PAGE_MASK) & ~PAGE_MASK)
@@ -41,7 +42,7 @@ extern size_t	os_page;
 
 /* Return the huge page base address for the huge page containing address a. */
 #define HUGEPAGE_ADDR2BASE(a)						\
-	((void *)((uintptr_t)(a) & ~HUGEPAGE_MASK))
+	ALIGNMENT_ADDR2BASE(a, HUGEPAGE)
 /* Return the smallest pagesize multiple that is >= s. */
 #define HUGEPAGE_CEILING(s)						\
 	(((s) + HUGEPAGE_MASK) & ~HUGEPAGE_MASK)
diff --git a/include/jemalloc/internal/prof_types.h b/include/jemalloc/internal/prof_types.h
index 921b16fe..a27f7fb3 100644
--- a/include/jemalloc/internal/prof_types.h
+++ b/include/jemalloc/internal/prof_types.h
@@ -88,6 +88,7 @@ typedef struct prof_recent_s prof_recent_t;
 #define PROF_SAMPLE_ALIGNMENT PAGE
 #define PROF_SAMPLE_ALIGNMENT_MASK PAGE_MASK
 
+/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
 #define PROF_TCTX_SENTINEL ((prof_tctx_t *)((uintptr_t)1U))
 
 #endif /* JEMALLOC_INTERNAL_PROF_TYPES_H */
diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index f559c94f..f35368ae 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -226,9 +226,11 @@ rtree_leaf_elm_bits_decode(uintptr_t bits) {
 	uintptr_t high_bit_mask = ((uintptr_t)1 << LG_VADDR) - 1;
 	/* Mask off metadata. */
 	uintptr_t mask = high_bit_mask & low_bit_mask;
+	/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
 	contents.edata = (edata_t *)(bits & mask);
 #    else
 	/* Restore sign-extended high bits, mask metadata bits. */
+	/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
 	contents.edata = (edata_t *)((uintptr_t)((intptr_t)(bits << RTREE_NHIB)
 	    >> RTREE_NHIB) & low_bit_mask);
 #    endif
@@ -270,6 +272,7 @@ JEMALLOC_ALWAYS_INLINE void
 rtree_contents_encode(rtree_contents_t contents, void **bits,
     unsigned *additional) {
 #ifdef RTREE_LEAF_COMPACT
+	/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
 	*bits = (void *)rtree_leaf_elm_bits_encode(contents);
 	/* Suppress spurious warning from static analysis */
 	if (config_debug) {
@@ -320,8 +323,10 @@ rtree_leaf_elm_state_update(tsdn_t *tsdn, rtree_t *rtree,
 	    /* dependent */ true);
 	bits &= ~RTREE_LEAF_STATE_MASK;
 	bits |= state << RTREE_LEAF_STATE_SHIFT;
+	/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
 	atomic_store_p(&elm1->le_bits, (void *)bits, ATOMIC_RELEASE);
 	if (elm2 != NULL) {
+		/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
 		atomic_store_p(&elm2->le_bits, (void *)bits, ATOMIC_RELEASE);
 	}
 #else
diff --git a/include/jemalloc/internal/safety_check.h b/include/jemalloc/internal/safety_check.h
index ef778dae..194b7744 100644
--- a/include/jemalloc/internal/safety_check.h
+++ b/include/jemalloc/internal/safety_check.h
@@ -31,7 +31,7 @@ compute_redzone_end(const void *_ptr, size_t usize, size_t bumped_usize) {
 	const unsigned char *redzone_end = usize + REDZONE_SIZE < bumped_usize ?
 	    &ptr[usize + REDZONE_SIZE] : &ptr[bumped_usize];
 	const unsigned char *page_end = (const unsigned char *)
-	    ALIGNMENT_CEILING(((uintptr_t) (&ptr[usize])), os_page);
+	    ALIGNMENT_ADDR2CEILING(&ptr[usize], os_page);
 	return redzone_end < page_end ? redzone_end : page_end;
 }
 
diff --git a/include/jemalloc/internal/san.h b/include/jemalloc/internal/san.h
index 79723965..669f99dd 100644
--- a/include/jemalloc/internal/san.h
+++ b/include/jemalloc/internal/san.h
@@ -140,7 +140,7 @@ san_junk_ptr_locations(void *ptr, size_t usize, void **first, void **mid,
 
 	*first = ptr;
 
-	*mid = (void *)((uintptr_t)ptr + ((usize >> 1) & ~(ptr_sz - 1)));
+	*mid = (void *)((byte_t *)ptr + ((usize >> 1) & ~(ptr_sz - 1)));
 	assert(*first != *mid || usize == ptr_sz);
 	assert((uintptr_t)*first <= (uintptr_t)*mid);
 
@@ -151,7 +151,7 @@ san_junk_ptr_locations(void *ptr, size_t usize, void **first, void **mid,
 	 * default the tcache only goes up to the 32K size class, and is usually
 	 * tuned lower instead of higher, which makes it less of a concern.
 	 */
-	*last = (void *)((uintptr_t)ptr + usize - sizeof(uaf_detect_junk));
+	*last = (void *)((byte_t *)ptr + usize - sizeof(uaf_detect_junk));
 	assert(*first != *last || usize == ptr_sz);
 	assert(*mid != *last || usize <= ptr_sz * 2);
 	assert((uintptr_t)*mid <= (uintptr_t)*last);
diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h
index 50f1fbcd..a781f5a6 100644
--- a/include/jemalloc/internal/tcache_types.h
+++ b/include/jemalloc/internal/tcache_types.h
@@ -16,6 +16,7 @@ typedef struct tcaches_s tcaches_t;
 #define TCACHE_ENABLED_ZERO_INITIALIZER false
 
 /* Used for explicit tcache only. Means flushed but not destroyed. */
+/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
 #define TCACHES_ELM_NEED_REINIT ((tcache_t *)(uintptr_t)1)
 
 #define TCACHE_LG_MAXCLASS_LIMIT 23 /* tcache_maxclass = 8M */
diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h
index 939f3891..536c0970 100644
--- a/include/jemalloc/internal/util.h
+++ b/include/jemalloc/internal/util.h
@@ -110,14 +110,14 @@ util_prefetch_write(void *ptr) {
 JEMALLOC_ALWAYS_INLINE void
 util_prefetch_read_range(void *ptr, size_t sz) {
 	for (size_t i = 0; i < sz; i += CACHELINE) {
-		util_prefetch_read((void *)((uintptr_t)ptr + i));
+		util_prefetch_read((void *)((byte_t *)ptr + i));
 	}
 }
 
 JEMALLOC_ALWAYS_INLINE void
 util_prefetch_write_range(void *ptr, size_t sz) {
 	for (size_t i = 0; i < sz; i += CACHELINE) {
-		util_prefetch_write((void *)((uintptr_t)ptr + i));
+		util_prefetch_write((void *)((byte_t *)ptr + i));
 	}
 }
 
diff --git a/scripts/run_static_analysis.sh b/scripts/run_static_analysis.sh
index 4994fe64..70c813d1 100755
--- a/scripts/run_static_analysis.sh
+++ b/scripts/run_static_analysis.sh
@@ -44,7 +44,8 @@ echo '-**/stdlib.h' > "$skipfile"
 CC_ANALYZERS_FROM_PATH=1 CodeChecker analyze compile_commands.json --jobs "$(nproc)" \
 	--ctu --compile-uniqueing strict --output static_analysis_raw_results \
 	--analyzers clangsa clang-tidy --skip "$skipfile" \
-	--enable readability-inconsistent-declaration-parameter-name
+	--enable readability-inconsistent-declaration-parameter-name \
+	--enable performance-no-int-to-ptr
 	# `--enable` is additive, the vast majority of the checks we want are
 	# enabled by default.
 
diff --git a/src/arena.c b/src/arena.c
index a8890e57..65eef864 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -236,7 +236,7 @@ arena_slab_reg_alloc(edata_t *slab, const bin_info_t *bin_info) {
 	assert(!bitmap_full(slab_data->bitmap, &bin_info->bitmap_info));
 
 	regind = bitmap_sfu(slab_data->bitmap, &bin_info->bitmap_info);
-	ret = (void *)((uintptr_t)edata_addr_get(slab) +
+	ret = (void *)((byte_t *)edata_addr_get(slab) +
 	    (uintptr_t)(bin_info->reg_size * regind));
 	edata_nfree_dec(slab);
 	return ret;
@@ -280,6 +280,7 @@ arena_slab_reg_alloc_batch(edata_t *slab, const bin_info_t *bin_info,
 		while (pop--) {
 			size_t bit = cfs_lu(&g);
 			size_t regind = shift + bit;
+			/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
 			*(ptrs + i) = (void *)(base + regsize * regind);
 
 			i++;
diff --git a/src/background_thread.c b/src/background_thread.c
index 53b492bb..94d91a89 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -368,6 +368,7 @@ check_background_thread_creation(tsd_t *tsd,
 
 		pre_reentrancy(tsd, NULL);
 		int err = background_thread_create_signals_masked(&info->thread,
+			/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
 		    NULL, background_thread_entry, (void *)(uintptr_t)i);
 		post_reentrancy(tsd);
 
@@ -540,6 +541,7 @@ background_thread_create_locked(tsd_t *tsd, unsigned arena_ind) {
 	 * background threads with the underlying pthread_create.
 	 */
 	int err = background_thread_create_signals_masked(&info->thread, NULL,
+		/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
 	    background_thread_entry, (void *)thread_ind);
 	post_reentrancy(tsd);
 
diff --git a/src/base.c b/src/base.c
index 16f90495..8e4606d0 100644
--- a/src/base.c
+++ b/src/base.c
@@ -181,9 +181,9 @@ base_extent_bump_alloc_helper(edata_t *edata, size_t *gap_size, size_t size,
 
 	*gap_size = ALIGNMENT_CEILING((uintptr_t)edata_addr_get(edata),
 	    alignment) - (uintptr_t)edata_addr_get(edata);
-	ret = (void *)((uintptr_t)edata_addr_get(edata) + *gap_size);
+	ret = (void *)((byte_t *)edata_addr_get(edata) + *gap_size);
 	assert(edata_bsize_get(edata) >= *gap_size + size);
-	edata_binit(edata, (void *)((uintptr_t)edata_addr_get(edata) +
+	edata_binit(edata, (void *)((byte_t *)edata_addr_get(edata) +
 	    *gap_size + size), edata_bsize_get(edata) - *gap_size - size,
 	    edata_sn_get(edata));
 	return ret;
@@ -291,7 +291,7 @@ base_block_alloc(tsdn_t *tsdn, base_t *base, ehooks_t *ehooks, unsigned ind,
 	block->next = NULL;
 	assert(block_size >= header_size);
 	base_edata_init(extent_sn_next, &block->edata,
-	    (void *)((uintptr_t)block + header_size), block_size - header_size);
+	    (void *)((byte_t *)block + header_size), block_size - header_size);
 	return block;
 }
 
diff --git a/src/cache_bin.c b/src/cache_bin.c
index a4c22bd7..362605a8 100644
--- a/src/cache_bin.c
+++ b/src/cache_bin.c
@@ -50,7 +50,7 @@ cache_bin_preincrement(cache_bin_info_t *infos, szind_t ninfos, void *alloc,
 		assert(((uintptr_t)alloc & (computed_alignment - 1)) == 0);
 	}
 
-	*(uintptr_t *)((uintptr_t)alloc + *cur_offset) =
+	*(uintptr_t *)((byte_t *)alloc + *cur_offset) =
 	    cache_bin_preceding_junk;
 	*cur_offset += sizeof(void *);
 }
@@ -58,7 +58,7 @@ cache_bin_preincrement(cache_bin_info_t *infos, szind_t ninfos, void *alloc,
 void
 cache_bin_postincrement(cache_bin_info_t *infos, szind_t ninfos, void *alloc,
     size_t *cur_offset) {
-	*(uintptr_t *)((uintptr_t)alloc + *cur_offset) =
+	*(uintptr_t *)((byte_t *)alloc + *cur_offset) =
 	    cache_bin_trailing_junk;
 	*cur_offset += sizeof(void *);
 }
@@ -71,12 +71,12 @@ cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
 	 * will access the slots toward higher addresses (for the benefit of
 	 * adjacent prefetch).
 	 */
-	void *stack_cur = (void *)((uintptr_t)alloc + *cur_offset);
+	void *stack_cur = (void *)((byte_t *)alloc + *cur_offset);
 	void *full_position = stack_cur;
 	uint16_t bin_stack_size = info->ncached_max * sizeof(void *);
 
 	*cur_offset += bin_stack_size;
-	void *empty_position = (void *)((uintptr_t)alloc + *cur_offset);
+	void *empty_position = (void *)((byte_t *)alloc + *cur_offset);
 
 	/* Init to the empty position. */
 	bin->stack_head = (void **)empty_position;
diff --git a/src/ehooks.c b/src/ehooks.c
index da759215..fc2355e6 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -100,7 +100,7 @@ ehooks_default_destroy(extent_hooks_t *extent_hooks, void *addr, size_t size,
 
 bool
 ehooks_default_commit_impl(void *addr, size_t offset, size_t length) {
-	return pages_commit((void *)((uintptr_t)addr + (uintptr_t)offset),
+	return pages_commit((void *)((byte_t *)addr + (uintptr_t)offset),
 	    length);
 }
 
@@ -112,7 +112,7 @@ ehooks_default_commit(extent_hooks_t *extent_hooks, void *addr, size_t size,
 
 bool
 ehooks_default_decommit_impl(void *addr, size_t offset, size_t length) {
-	return pages_decommit((void *)((uintptr_t)addr + (uintptr_t)offset),
+	return pages_decommit((void *)((byte_t *)addr + (uintptr_t)offset),
 	    length);
 }
 
@@ -125,7 +125,7 @@ ehooks_default_decommit(extent_hooks_t *extent_hooks, void *addr, size_t size,
 #ifdef PAGES_CAN_PURGE_LAZY
 bool
 ehooks_default_purge_lazy_impl(void *addr, size_t offset, size_t length) {
-	return pages_purge_lazy((void *)((uintptr_t)addr + (uintptr_t)offset),
+	return pages_purge_lazy((void *)((byte_t *)addr + (uintptr_t)offset),
 	    length);
 }
 
@@ -143,7 +143,7 @@ ehooks_default_purge_lazy(extent_hooks_t *extent_hooks, void *addr, size_t size,
 #ifdef PAGES_CAN_PURGE_FORCED
 bool
 ehooks_default_purge_forced_impl(void *addr, size_t offset, size_t length) {
-	return pages_purge_forced((void *)((uintptr_t)addr +
+	return pages_purge_forced((void *)((byte_t *)addr +
 	    (uintptr_t)offset), length);
 }
 
diff --git a/src/extent.c b/src/extent.c
index 477050b6..822c6eee 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -743,7 +743,7 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		/* A successful commit should return zeroed memory. */
 		if (config_debug) {
 			void *addr = edata_addr_get(edata);
-			size_t *p = (size_t *)(uintptr_t)addr;
+			size_t *p = (size_t *)addr;
 			/* Check the first page only. */
 			for (size_t i = 0; i < PAGE / sizeof(size_t); i++) {
 				assert(p[i] == 0);
@@ -1199,7 +1199,7 @@ extent_split_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	}
 
 	edata_init(trail, edata_arena_ind_get(edata),
-	    (void *)((uintptr_t)edata_base_get(edata) + size_a), size_b,
+	    (void *)((byte_t *)edata_base_get(edata) + size_a), size_b,
 	    /* slab */ false, SC_NSIZES, edata_sn_get(edata),
 	    edata_state_get(edata), edata_zeroed_get(edata),
 	    edata_committed_get(edata), EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
diff --git a/src/extent_dss.c b/src/extent_dss.c
index f8bd8f60..32fb4112 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -8,6 +8,7 @@
 /******************************************************************************/
 /* Data. */
 
+/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
 #define SBRK_INVALID ((void *)-1)
 
 const char	*opt_dss = DSS_DEFAULT;
@@ -149,10 +150,10 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 			 * necessary to satisfy alignment.  This space can be
 			 * recycled for later use.
 			 */
-			void *gap_addr_page = (void *)(PAGE_CEILING(
-			    (uintptr_t)max_cur));
-			void *ret = (void *)ALIGNMENT_CEILING(
-			    (uintptr_t)gap_addr_page, alignment);
+			void *gap_addr_page = ALIGNMENT_ADDR2CEILING(max_cur,
+			    PAGE);
+			void *ret = ALIGNMENT_ADDR2CEILING(
+			    gap_addr_page, alignment);
 			size_t gap_size_page = (uintptr_t)ret -
 			    (uintptr_t)gap_addr_page;
 			if (gap_size_page != 0) {
@@ -167,7 +168,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 			 * Compute the address just past the end of the desired
 			 * allocation space.
 			 */
-			void *dss_next = (void *)((uintptr_t)ret + size);
+			void *dss_next = (void *)((byte_t *)ret + size);
 			if ((uintptr_t)ret < (uintptr_t)max_cur ||
 			    (uintptr_t)dss_next < (uintptr_t)max_cur) {
 				goto label_oom; /* Wrap-around. */
diff --git a/src/hpdata.c b/src/hpdata.c
index e7d7294c..3058eafe 100644
--- a/src/hpdata.c
+++ b/src/hpdata.c
@@ -130,7 +130,7 @@ hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz) {
 
 	hpdata_assert_consistent(hpdata);
 	return (void *)(
-	    (uintptr_t)hpdata_addr_get(hpdata) + (result << LG_PAGE));
+	    (byte_t *)hpdata_addr_get(hpdata) + (result << LG_PAGE));
 }
 
 void
@@ -277,7 +277,7 @@ hpdata_purge_next(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
 	}
 
 	*r_purge_addr = (void *)(
-	    (uintptr_t)hpdata_addr_get(hpdata) + purge_begin * PAGE);
+	    (byte_t *)hpdata_addr_get(hpdata) + purge_begin * PAGE);
 	*r_purge_size = purge_len * PAGE;
 
 	purge_state->next_purge_search_begin = purge_begin + purge_len;
diff --git a/src/jemalloc.c b/src/jemalloc.c
index a36b4974..df0c1ebc 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -3446,7 +3446,7 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 	if (config_fill && unlikely(opt_junk_alloc) && usize > old_usize
 	    && !zero) {
 		size_t excess_len = usize - old_usize;
-		void *excess_start = (void *)((uintptr_t)p + old_usize);
+		void *excess_start = (void *)((byte_t *)p + old_usize);
 		junk_alloc_callback(excess_start, excess_len);
 	}
 
@@ -3716,7 +3716,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	if (config_fill && unlikely(opt_junk_alloc) && usize > old_usize &&
 	    !zero) {
 		size_t excess_len = usize - old_usize;
-		void *excess_start = (void *)((uintptr_t)ptr + old_usize);
+		void *excess_start = (void *)((byte_t *)ptr + old_usize);
 		junk_alloc_callback(excess_start, excess_len);
 	}
 label_not_resized:
diff --git a/src/large.c b/src/large.c
index 10fa652e..d78085f0 100644
--- a/src/large.c
+++ b/src/large.c
@@ -113,10 +113,10 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 			 * of CACHELINE in [0 .. PAGE).
 			 */
 			void *zbase = (void *)
-			    ((uintptr_t)edata_addr_get(edata) + old_usize);
-			void *zpast = PAGE_ADDR2BASE((void *)((uintptr_t)zbase +
+			    ((byte_t *)edata_addr_get(edata) + old_usize);
+			void *zpast = PAGE_ADDR2BASE((void *)((byte_t *)zbase +
 			    PAGE));
-			size_t nzero = (uintptr_t)zpast - (uintptr_t)zbase;
+			size_t nzero = (byte_t *)zpast - (byte_t *)zbase;
 			assert(nzero > 0);
 			memset(zbase, 0, nzero);
 		}
diff --git a/src/pages.c b/src/pages.c
index 249d7c5b..58d9cfaf 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -197,7 +197,7 @@ os_pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
 static void *
 os_pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size,
     bool *commit) {
-	void *ret = (void *)((uintptr_t)addr + leadsize);
+	void *ret = (void *)((byte_t *)addr + leadsize);
 
 	assert(alloc_size >= leadsize + size);
 #ifdef _WIN32
@@ -217,7 +217,7 @@ os_pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size,
 		os_pages_unmap(addr, leadsize);
 	}
 	if (trailsize != 0) {
-		os_pages_unmap((void *)((uintptr_t)ret + size), trailsize);
+		os_pages_unmap((void *)((byte_t *)ret + size), trailsize);
 	}
 	return ret;
 #endif
diff --git a/src/prof_data.c b/src/prof_data.c
index d52522b0..91a9268d 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -85,8 +85,10 @@ prof_tctx_comp(const prof_tctx_t *a, const prof_tctx_t *b) {
 	return ret;
 }
 
+/* NOLINTBEGIN(performance-no-int-to-ptr) */
 rb_gen(static UNUSED, tctx_tree_, prof_tctx_tree_t, prof_tctx_t,
     tctx_link, prof_tctx_comp)
+/* NOLINTEND(performance-no-int-to-ptr) */
 
 static int
 prof_gctx_comp(const prof_gctx_t *a, const prof_gctx_t *b) {
@@ -100,8 +102,10 @@ prof_gctx_comp(const prof_gctx_t *a, const prof_gctx_t *b) {
 	return ret;
 }
 
+/* NOLINTBEGIN(performance-no-int-to-ptr) */
 rb_gen(static UNUSED, gctx_tree_, prof_gctx_tree_t, prof_gctx_t, dump_link,
     prof_gctx_comp)
+/* NOLINTEND(performance-no-int-to-ptr) */
 
 static int
 prof_tdata_comp(const prof_tdata_t *a, const prof_tdata_t *b) {
@@ -119,8 +123,10 @@ prof_tdata_comp(const prof_tdata_t *a, const prof_tdata_t *b) {
 	return ret;
 }
 
+/* NOLINTBEGIN(performance-no-int-to-ptr) */
 rb_gen(static UNUSED, tdata_tree_, prof_tdata_tree_t, prof_tdata_t, tdata_link,
     prof_tdata_comp)
+/* NOLINTEND(performance-no-int-to-ptr) */
 
 /******************************************************************************/
 
@@ -1141,7 +1147,7 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
 		return NULL;
 	}
 
-	tdata->vec = (void **)((uintptr_t)tdata + tdata_sz);
+	tdata->vec = (void **)((byte_t *)tdata + tdata_sz);
 	tdata->lock = prof_tdata_mutex_choose(thr_uid);
 	tdata->thr_uid = thr_uid;
 	tdata->thr_discrim = thr_discrim;
diff --git a/src/san.c b/src/san.c
index 6e512911..28ea3d7c 100644
--- a/src/san.c
+++ b/src/san.c
@@ -20,43 +20,43 @@ ssize_t opt_lg_san_uaf_align = SAN_LG_UAF_ALIGN_DEFAULT;
 uintptr_t san_cache_bin_nonfast_mask = SAN_CACHE_BIN_NONFAST_MASK_DEFAULT;
 
 static inline void
-san_find_guarded_addr(edata_t *edata, uintptr_t *guard1, uintptr_t *guard2,
-    uintptr_t *addr, size_t size, bool left, bool right) {
+san_find_guarded_addr(edata_t *edata, void **guard1, void **guard2,
+    void **addr, size_t size, bool left, bool right) {
 	assert(!edata_guarded_get(edata));
 	assert(size % PAGE == 0);
-	*addr = (uintptr_t)edata_base_get(edata);
+	*addr = edata_base_get(edata);
 	if (left) {
 		*guard1 = *addr;
-		*addr += SAN_PAGE_GUARD;
+		*addr = ((byte_t *)*addr) + SAN_PAGE_GUARD;
 	} else {
-		*guard1 = 0;
+		*guard1 = NULL;
 	}
 
 	if (right) {
-		*guard2 = *addr + size;
+		*guard2 = ((byte_t *)*addr) + size;
 	} else {
-		*guard2 = 0;
+		*guard2 = NULL;
 	}
 }
 
 static inline void
-san_find_unguarded_addr(edata_t *edata, uintptr_t *guard1, uintptr_t *guard2,
-    uintptr_t *addr, size_t size, bool left, bool right) {
+san_find_unguarded_addr(edata_t *edata, void **guard1, void **guard2,
+    void **addr, size_t size, bool left, bool right) {
 	assert(edata_guarded_get(edata));
 	assert(size % PAGE == 0);
-	*addr = (uintptr_t)edata_base_get(edata);
+	*addr = edata_base_get(edata);
 	if (right) {
-		*guard2 = *addr + size;
+		*guard2 = ((byte_t *)*addr) + size;
 	} else {
-		*guard2 = 0;
+		*guard2 = NULL;
 	}
 
 	if (left) {
-		*guard1 = *addr - SAN_PAGE_GUARD;
-		assert(*guard1 != 0);
+		*guard1 = ((byte_t *)*addr) - SAN_PAGE_GUARD;
+		assert(*guard1 != NULL);
 		*addr = *guard1;
 	} else {
-		*guard1 = 0;
+		*guard1 = NULL;
 	}
 }
 
@@ -73,16 +73,16 @@ san_guard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap,
 	    ? san_two_side_unguarded_sz(size_with_guards)
 	    : san_one_side_unguarded_sz(size_with_guards);
 
-	uintptr_t guard1, guard2, addr;
+	void *guard1, *guard2, *addr;
 	san_find_guarded_addr(edata, &guard1, &guard2, &addr, usize, left,
 	    right);
 
 	assert(edata_state_get(edata) == extent_state_active);
-	ehooks_guard(tsdn, ehooks, (void *)guard1, (void *)guard2);
+	ehooks_guard(tsdn, ehooks, guard1, guard2);
 
 	/* Update the guarded addr and usable size of the edata. */
 	edata_size_set(edata, usize);
-	edata_addr_set(edata, (void *)addr);
+	edata_addr_set(edata, addr);
 	edata_guarded_set(edata, true);
 
 	if (remap) {
@@ -108,7 +108,7 @@ san_unguard_pages_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
 	    ? san_two_side_guarded_sz(size)
 	    : san_one_side_guarded_sz(size);
 
-	uintptr_t guard1, guard2, addr;
+	void *guard1, *guard2, *addr;
 	san_find_unguarded_addr(edata, &guard1, &guard2, &addr, size, left,
 	    right);
 
diff --git a/src/tcache.c b/src/tcache.c
index fa16732e..914ddb7a 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -768,9 +768,9 @@ tcache_create_explicit(tsd_t *tsd) {
 	if (mem == NULL) {
 		return NULL;
 	}
-	tcache_t *tcache = (void *)((uintptr_t)mem + tcache_bin_alloc_size);
+	tcache_t *tcache = (void *)((byte_t *)mem + tcache_bin_alloc_size);
 	tcache_slow_t *tcache_slow =
-	    (void *)((uintptr_t)mem + tcache_bin_alloc_size + sizeof(tcache_t));
+	    (void *)((byte_t *)mem + tcache_bin_alloc_size + sizeof(tcache_t));
 	tcache_init(tsd, tcache_slow, tcache, mem);
 
 	tcache_arena_associate(tsd_tsdn(tsd), tcache_slow, tcache,

From 8ff7e7d6c33fd18a9f8c9f086e027dd0edfc27f0 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Tue, 25 Jul 2023 10:42:10 -0700
Subject: [PATCH 110/395] Remove errant `#include`s in public `jemalloc.h`
 header

In an attempt to make all headers self-contained, I inadvertently added
`#include`s which refer to intermediate, generated headers that aren't
included in the final install. Closes #2489.
---
 include/jemalloc/jemalloc_protos.h.in | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/include/jemalloc/jemalloc_protos.h.in b/include/jemalloc/jemalloc_protos.h.in
index 170493dd..3e1d3223 100644
--- a/include/jemalloc/jemalloc_protos.h.in
+++ b/include/jemalloc/jemalloc_protos.h.in
@@ -1,6 +1,3 @@
-#include "jemalloc/jemalloc_defs.h"
-#include "jemalloc/jemalloc_macros.h"
-
 /*
  * The @je_@ prefix on the following public symbol declarations is an artifact
  * of namespace management, and should be omitted in application code unless

From 9ba1e1cb37b84daf00d37936f4223823c2aaac44 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Tue, 25 Jul 2023 12:14:35 -0700
Subject: [PATCH 111/395] Make `ctl_arena_clear` slightly more efficient

While this function isn't particularly hot, (accounting for just 0.27% of
time spent inside the allocator on average across the fleet), looking
at the generated assembly and performance profiles does show we're dispatching
to multiple different `memset`s when we could instead be just tail-calling
`memset` once, reducing code size and marginally improving performance.
---
 src/ctl.c | 18 +-----------------
 1 file changed, 1 insertion(+), 17 deletions(-)

diff --git a/src/ctl.c b/src/ctl.c
index 7d0ab346..454766da 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1042,23 +1042,7 @@ ctl_arena_clear(ctl_arena_t *ctl_arena) {
 	ctl_arena->pdirty = 0;
 	ctl_arena->pmuzzy = 0;
 	if (config_stats) {
-		memset(&ctl_arena->astats->astats, 0, sizeof(arena_stats_t));
-		ctl_arena->astats->allocated_small = 0;
-		ctl_arena->astats->nmalloc_small = 0;
-		ctl_arena->astats->ndalloc_small = 0;
-		ctl_arena->astats->nrequests_small = 0;
-		ctl_arena->astats->nfills_small = 0;
-		ctl_arena->astats->nflushes_small = 0;
-		memset(ctl_arena->astats->bstats, 0, SC_NBINS *
-		    sizeof(bin_stats_data_t));
-		memset(ctl_arena->astats->lstats, 0, (SC_NSIZES - SC_NBINS) *
-		    sizeof(arena_stats_large_t));
-		memset(ctl_arena->astats->estats, 0, SC_NPSIZES *
-		    sizeof(pac_estats_t));
-		memset(&ctl_arena->astats->hpastats, 0,
-		    sizeof(hpa_shard_stats_t));
-		memset(&ctl_arena->astats->secstats, 0,
-		    sizeof(sec_stats_t));
+		memset(ctl_arena->astats, 0, sizeof(*(ctl_arena->astats)));
 	}
 }
 

From b01d49664651f239fdf76774cb6de05ed7e63f4a Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Fri, 28 Jul 2023 11:54:27 -0700
Subject: [PATCH 112/395] Add an override for the compile-time malloc_conf to
 `jemalloc_internal_overrides.h`

---
 include/jemalloc/internal/jemalloc_internal_overrides.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/include/jemalloc/internal/jemalloc_internal_overrides.h b/include/jemalloc/internal/jemalloc_internal_overrides.h
index ddd6ee17..5fbbe249 100644
--- a/include/jemalloc/internal/jemalloc_internal_overrides.h
+++ b/include/jemalloc/internal/jemalloc_internal_overrides.h
@@ -13,4 +13,9 @@
     #define LG_PAGE JEMALLOC_OVERRIDE_LG_PAGE
 #endif
 
+#ifdef JEMALLOC_OVERRIDE_JEMALLOC_CONFIG_MALLOC_CONF
+	#undef JEMALLOC_CONFIG_MALLOC_CONF
+	#define JEMALLOC_CONFIG_MALLOC_CONF JEMALLOC_OVERRIDE_JEMALLOC_CONFIG_MALLOC_CONF
+#endif
+
 #endif /* JEMALLOC_INTERNAL_OVERRIDES_H */

From 62648c88e5e50b8ed11181a8c42dbc1134d6d854 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Wed, 26 Jul 2023 12:25:59 -0700
Subject: [PATCH 113/395] Ensured sampled allocations are properly deallocated
 during `arena_reset`

Sampled allocations were not being demoted before being deallocated
during an `arena_reset` operation.
---
 src/arena.c | 141 ++++++++++++++++++++++++++++------------------------
 1 file changed, 76 insertions(+), 65 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 65eef864..f330663b 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -659,6 +659,76 @@ arena_bin_reset(tsd_t *tsd, arena_t *arena, bin_t *bin) {
 	malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
 }
 
+void
+arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize, size_t bumped_usize) {
+	cassert(config_prof);
+	assert(ptr != NULL);
+	assert(isalloc(tsdn, ptr) == bumped_usize);
+	assert(sz_can_use_slab(usize));
+
+	if (config_opt_safety_checks) {
+		safety_check_set_redzone(ptr, usize, bumped_usize);
+	}
+
+	edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
+
+	szind_t szind = sz_size2index(usize);
+	edata_szind_set(edata, szind);
+	emap_remap(tsdn, &arena_emap_global, edata, szind, /* slab */ false);
+
+	assert(isalloc(tsdn, ptr) == usize);
+}
+
+static size_t
+arena_prof_demote(tsdn_t *tsdn, edata_t *edata, const void *ptr) {
+	cassert(config_prof);
+	assert(ptr != NULL);
+	size_t usize = isalloc(tsdn, ptr);
+	size_t bumped_usize = sz_sa2u(usize, PROF_SAMPLE_ALIGNMENT);
+	assert(bumped_usize <= SC_LARGE_MINCLASS &&
+	    PAGE_CEILING(bumped_usize) == bumped_usize);
+	assert(edata_size_get(edata) - bumped_usize <= sz_large_pad);
+	szind_t szind = sz_size2index(bumped_usize);
+
+	edata_szind_set(edata, szind);
+	emap_remap(tsdn, &arena_emap_global, edata, szind, /* slab */ false);
+
+	assert(isalloc(tsdn, ptr) == bumped_usize);
+
+	return bumped_usize;
+}
+
+static void
+arena_dalloc_promoted_impl(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
+    bool slow_path, edata_t *edata) {
+	cassert(config_prof);
+	assert(opt_prof);
+
+	size_t usize = edata_usize_get(edata);
+	size_t bumped_usize = arena_prof_demote(tsdn, edata, ptr);
+	if (config_opt_safety_checks && usize < SC_LARGE_MINCLASS) {
+		/*
+		 * Currently, we only do redzoning for small sampled
+		 * allocations.
+		 */
+		safety_check_verify_redzone(ptr, usize, bumped_usize);
+	}
+	if (bumped_usize >= SC_LARGE_MINCLASS &&
+	    bumped_usize <= tcache_maxclass && tcache != NULL) {
+		tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr,
+		    sz_size2index(bumped_usize), slow_path);
+	} else {
+		large_dalloc(tsdn, edata);
+	}
+}
+
+void
+arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
+    bool slow_path) {
+	edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
+	arena_dalloc_promoted_impl(tsdn, ptr, tcache, slow_path, edata);
+}
+
 void
 arena_reset(tsd_t *tsd, arena_t *arena) {
 	/*
@@ -697,7 +767,12 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 		if (config_prof && opt_prof) {
 			prof_free(tsd, ptr, usize, &alloc_ctx);
 		}
-		large_dalloc(tsd_tsdn(tsd), edata);
+		if (config_prof && opt_prof && alloc_ctx.szind < SC_NBINS) {
+			arena_dalloc_promoted_impl(tsd_tsdn(tsd), ptr,
+			    /* tcache */ NULL, /* slow_path */ true, edata);
+		} else {
+			large_dalloc(tsd_tsdn(tsd), edata);
+		}
 		malloc_mutex_lock(tsd_tsdn(tsd), &arena->large_mtx);
 	}
 	malloc_mutex_unlock(tsd_tsdn(tsd), &arena->large_mtx);
@@ -1236,70 +1311,6 @@ arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 	}
 }
 
-void
-arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize, size_t bumped_usize) {
-	cassert(config_prof);
-	assert(ptr != NULL);
-	assert(isalloc(tsdn, ptr) == bumped_usize);
-	assert(sz_can_use_slab(usize));
-
-	if (config_opt_safety_checks) {
-		safety_check_set_redzone(ptr, usize, bumped_usize);
-	}
-
-	edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
-
-	szind_t szind = sz_size2index(usize);
-	edata_szind_set(edata, szind);
-	emap_remap(tsdn, &arena_emap_global, edata, szind, /* slab */ false);
-
-	assert(isalloc(tsdn, ptr) == usize);
-}
-
-static size_t
-arena_prof_demote(tsdn_t *tsdn, edata_t *edata, const void *ptr) {
-	cassert(config_prof);
-	assert(ptr != NULL);
-	size_t usize = isalloc(tsdn, ptr);
-	size_t bumped_usize = sz_sa2u(usize, PROF_SAMPLE_ALIGNMENT);
-	assert(bumped_usize <= SC_LARGE_MINCLASS &&
-	    PAGE_CEILING(bumped_usize) == bumped_usize);
-	assert(edata_size_get(edata) - bumped_usize <= sz_large_pad);
-	szind_t szind = sz_size2index(bumped_usize);
-
-	edata_szind_set(edata, szind);
-	emap_remap(tsdn, &arena_emap_global, edata, szind, /* slab */ false);
-
-	assert(isalloc(tsdn, ptr) == bumped_usize);
-
-	return bumped_usize;
-}
-
-void
-arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
-    bool slow_path) {
-	cassert(config_prof);
-	assert(opt_prof);
-
-	edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
-	size_t usize = edata_usize_get(edata);
-	size_t bumped_usize = arena_prof_demote(tsdn, edata, ptr);
-	if (config_opt_safety_checks && usize < SC_LARGE_MINCLASS) {
-		/*
-		 * Currently, we only do redzoning for small sampled
-		 * allocations.
-		 */
-		safety_check_verify_redzone(ptr, usize, bumped_usize);
-	}
-	if (bumped_usize >= SC_LARGE_MINCLASS &&
-	    bumped_usize <= tcache_maxclass && tcache != NULL) {
-		tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr,
-		    sz_size2index(bumped_usize), slow_path);
-	} else {
-		large_dalloc(tsdn, edata);
-	}
-}
-
 static void
 arena_dissociate_bin_slab(arena_t *arena, edata_t *slab, bin_t *bin) {
 	/* Dissociate slab from bin. */

From 6816b238625d67e0bf3b6768f00709051b23f2a6 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 1 Aug 2023 14:28:24 -0700
Subject: [PATCH 114/395] Include the unrecognized malloc conf option in the
 error message.

Previously the option causing trouble will not be printed, unless the option
key:value pair format is found.
---
 src/jemalloc.c | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index df0c1ebc..ccb20c81 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -865,6 +865,14 @@ malloc_conf_multi_sizes_next(const char **slab_size_segment_cur,
 	return false;
 }
 
+static void
+malloc_conf_format_error(const char *msg, const char *begin, const char *end) {
+	size_t len = end - begin + 1;
+	len = len > BUFERROR_BUF ? BUFERROR_BUF : len;
+
+	malloc_printf("<jemalloc>: %s -- %.*s\n", msg, (int)len, begin);
+}
+
 static bool
 malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p,
     char const **v_p, size_t *vlen_p) {
@@ -898,13 +906,15 @@ malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p,
 			break;
 		case '\0':
 			if (opts != *opts_p) {
-				malloc_write("<jemalloc>: Conf string ends "
-				    "with key\n");
+				malloc_conf_format_error(
+				    "Conf string ends with key",
+				    *opts_p, opts - 1);
 				had_conf_error = true;
 			}
 			return true;
 		default:
-			malloc_write("<jemalloc>: Malformed conf string\n");
+			malloc_conf_format_error(
+			    "Malformed conf string", *opts_p, opts);
 			had_conf_error = true;
 			return true;
 		}
@@ -922,8 +932,9 @@ malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p,
 			 * comma if one exists.
 			 */
 			if (*opts == '\0') {
-				malloc_write("<jemalloc>: Conf string ends "
-				    "with comma\n");
+				malloc_conf_format_error(
+				    "Conf string ends with comma",
+				    *opts_p, opts - 1);
 				had_conf_error = true;
 			}
 			*vlen_p = (uintptr_t)opts - 1 - (uintptr_t)*v_p;

From ea5b7bea3144cd26a63510016d778eab3ca58822 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Thu, 6 Jul 2023 12:49:10 -0700
Subject: [PATCH 115/395] Add configuration option controlling DSS support

In many environments, the fallback `sbrk(2)` allocation path is never
used even if the system supports the syscall; if you're at the point
where `mmap(2)` is failing, `sbrk(2)` is unlikely to succeed. Without
changing the default, I've added the ability to disable the usage of DSS
altogether, so that you do not need to pay for the additional code size
and handful of extra runtime branches in such environments.
---
 configure.ac | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index f820d14a..c1ad9e66 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1469,6 +1469,18 @@ if test "x$zero_realloc_default_free" = "x1" ; then
   AC_DEFINE([JEMALLOC_ZERO_REALLOC_DEFAULT_FREE], [ ], [ ])
 fi
 
+dnl Support allocation from DSS by default
+AC_ARG_ENABLE([dss],
+  [AS_HELP_STRING([--disable-dss], [Disable usage of sbrk(2)])],
+[if test "x$enable_dss" = "xno" ; then
+  enable_dss="0"
+else
+  enable_dss="1"
+fi
+],
+[enable_dss="1"]
+)
+
 dnl Enable allocation from DSS if supported by the OS.
 have_dss="1"
 dnl Check whether the BSD/SUSv1 sbrk() exists.  If not, disable DSS support.
@@ -1482,7 +1494,7 @@ else
   have_dss="0"
 fi
 
-if test "x$have_dss" = "x1" ; then
+if test "x$have_dss" = "x1" -a "x$enable_dss" = "x1" ; then
   AC_DEFINE([JEMALLOC_DSS], [ ], [ ])
 fi
 
@@ -2791,4 +2803,5 @@ AC_MSG_RESULT([lazy_lock          : ${enable_lazy_lock}])
 AC_MSG_RESULT([cache-oblivious    : ${enable_cache_oblivious}])
 AC_MSG_RESULT([pageid             : ${enable_pageid}])
 AC_MSG_RESULT([cxx                : ${enable_cxx}])
+AC_MSG_RESULT([dss                : ${enable_dss}])
 AC_MSG_RESULT([===============================================================================])

From 07a2eab3ed5dd76657ee689326acd9ecaf1e2830 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Wed, 19 Jul 2023 12:30:12 -0700
Subject: [PATCH 116/395] Stop over-reporting memory usage from sampled small
 allocations

@interwq noticed [while reviewing an earlier PR](https://github.com/jemalloc/jemalloc/pull/2478#discussion_r1256217261)
that I missed modifying this statistics accounting in line with the rest
of the changes from #2459. This is now fixed, such that sampled small
allocations increment the `.nmalloc`/`.ndalloc` of their effective bin
size instead of over-reporting memory usage by attributing all such
allocations to `SC_LARGE_MINCLASS`.
---
 src/arena.c | 54 +++++++++++++++++++++++++++++------------------------
 1 file changed, 30 insertions(+), 24 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index f330663b..fe5874a4 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -293,34 +293,48 @@ arena_slab_reg_alloc_batch(edata_t *slab, const bin_info_t *bin_info,
 
 static void
 arena_large_malloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) {
-	szind_t index, hindex;
-
 	cassert(config_stats);
 
+	szind_t index = sz_size2index(usize);
+	/* This only occurs when we have a sampled small allocation */
 	if (usize < SC_LARGE_MINCLASS) {
-		usize = SC_LARGE_MINCLASS;
+		assert(index < SC_NBINS);
+		assert(usize >= PAGE && usize % PAGE == 0);
+		bin_t *bin = arena_get_bin(arena, index, /* binshard */ 0);
+		malloc_mutex_lock(tsdn, &bin->lock);
+		bin->stats.nmalloc++;
+		malloc_mutex_unlock(tsdn, &bin->lock);
+	} else {
+		assert(index >= SC_NBINS);
+		szind_t hindex = index - SC_NBINS;
+		LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
+		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
+			&arena->stats.lstats[hindex].nmalloc, 1);
+		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 	}
-	index = sz_size2index(usize);
-	hindex = (index >= SC_NBINS) ? index - SC_NBINS : 0;
-
-	locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-	    &arena->stats.lstats[hindex].nmalloc, 1);
 }
 
 static void
 arena_large_dalloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) {
-	szind_t index, hindex;
-
 	cassert(config_stats);
 
+	szind_t index = sz_size2index(usize);
+	/* This only occurs when we have a sampled small allocation */
 	if (usize < SC_LARGE_MINCLASS) {
-		usize = SC_LARGE_MINCLASS;
+		assert(index < SC_NBINS);
+		assert(usize >= PAGE && usize % PAGE == 0);
+		bin_t *bin = arena_get_bin(arena, index, /* binshard */ 0);
+		malloc_mutex_lock(tsdn, &bin->lock);
+		bin->stats.ndalloc++;
+		malloc_mutex_unlock(tsdn, &bin->lock);
+	} else {
+		assert(index >= SC_NBINS);
+		szind_t hindex = index - SC_NBINS;
+		LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
+		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
+			&arena->stats.lstats[hindex].ndalloc, 1);
+		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 	}
-	index = sz_size2index(usize);
-	hindex = (index >= SC_NBINS) ? index - SC_NBINS : 0;
-
-	locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-	    &arena->stats.lstats[hindex].ndalloc, 1);
 }
 
 static void
@@ -344,9 +358,7 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 
 	if (edata != NULL) {
 		if (config_stats) {
-			LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 			arena_large_malloc_stats_update(tsdn, arena, usize);
-			LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 		}
 	}
 
@@ -360,10 +372,8 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 void
 arena_extent_dalloc_large_prep(tsdn_t *tsdn, arena_t *arena, edata_t *edata) {
 	if (config_stats) {
-		LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 		arena_large_dalloc_stats_update(tsdn, arena,
 		    edata_usize_get(edata));
-		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 	}
 }
 
@@ -373,9 +383,7 @@ arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
 	size_t usize = edata_usize_get(edata);
 
 	if (config_stats) {
-		LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 		arena_large_ralloc_stats_update(tsdn, arena, oldusize, usize);
-		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 	}
 }
 
@@ -385,9 +393,7 @@ arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
 	size_t usize = edata_usize_get(edata);
 
 	if (config_stats) {
-		LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 		arena_large_ralloc_stats_update(tsdn, arena, oldusize, usize);
-		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 	}
 }
 

From 162ff8365da9bc30f3dcddf0e02c7b7c40197bfc Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Fri, 4 Aug 2023 14:22:35 -0700
Subject: [PATCH 117/395] Update the Ubuntu version used by Travis CI

Update from Ubuntu Focal Fossa to Ubuntu Jammy Jellyfish. Staying up to
date is always good, but I'm also hoping that perhaps this newer release
contains fixes so that PowerPC VMs don't randomly hang indefinitely
while booting anymore, stalling our CI pipeline.
---
 .travis.yml           | 2 +-
 scripts/gen_travis.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 49e6aa7e..85e0b720 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -6,7 +6,7 @@
 # Differences are explained here:
 # https://docs.travis-ci.com/user/languages/minimal-and-generic/
 language: minimal
-dist: focal
+dist: jammy
 
 jobs:
   include:
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
index b49905f9..fe4e029f 100755
--- a/scripts/gen_travis.py
+++ b/scripts/gen_travis.py
@@ -24,7 +24,7 @@ TRAVIS_TEMPLATE = """\
 # Differences are explained here:
 # https://docs.travis-ci.com/user/languages/minimal-and-generic/
 language: minimal
-dist: focal
+dist: jammy
 
 jobs:
   include:

From 120abd703addce50fb9105ee4f7e42c3612c3774 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Thu, 3 Aug 2023 15:05:10 -0700
Subject: [PATCH 118/395] Add support for the `deprecated` attribute

This is useful for enforcing the usage of getter/setter functions to
access fields which are considered private or have unique access constraints.
---
 configure.ac                                  | 24 +++++++++++++++++++
 .../internal/jemalloc_internal_macros.h       | 14 +++++++++++
 include/jemalloc/jemalloc_defs.h.in           |  3 +++
 include/jemalloc/jemalloc_macros.h.in         |  7 ++++++
 4 files changed, 48 insertions(+)

diff --git a/configure.ac b/configure.ac
index c1ad9e66..ff493e1d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -988,6 +988,30 @@ if test "x${je_cv_cold}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_HAVE_ATTR_COLD], [ ], [ ])
 fi
 
+dnl Check for deprecated attribute support.
+JE_CFLAGS_SAVE()
+JE_CFLAGS_ADD([-Wdeprecated-declarations])
+JE_COMPILABLE([deprecated attribute],
+              [#if !__has_attribute(deprecated)
+               #error "deprecated attribute not supported"
+               #endif
+               struct has_deprecated_field {
+                   int good;
+                   int __attribute__((deprecated("Do not use"))) bad;
+               };
+              ],
+              [struct has_deprecated_field instance;
+               instance.good = 0;
+               instance.bad = 1;
+              ],
+              [je_cv_deprecated])
+JE_CFLAGS_RESTORE()
+if test "x${je_cv_deprecated}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_HAVE_ATTR_DEPRECATED], [ ], [ ])
+  JE_CFLAGS_ADD([-Wdeprecated-declarations])
+  JE_CXXFLAGS_ADD([-Wdeprecated-declarations])
+fi
+
 dnl Check for VM_MAKE_TAG for mmap support.
 JE_COMPILABLE([vm_make_tag],
 	      [#include <sys/mman.h>
diff --git a/include/jemalloc/internal/jemalloc_internal_macros.h b/include/jemalloc/internal/jemalloc_internal_macros.h
index a08b7e7a..9abcbb20 100644
--- a/include/jemalloc/internal/jemalloc_internal_macros.h
+++ b/include/jemalloc/internal/jemalloc_internal_macros.h
@@ -53,6 +53,7 @@
 #  define JEMALLOC_DIAGNOSTIC_IGNORE_FRAME_ADDRESS
 #  define JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS
 #  define JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
+#  define JEMALLOC_DIAGNOSTIC_IGNORE_DEPRECATED
 #  define JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
 /* #pragma GCC diagnostic first appeared in gcc 4.6. */
 #elif (defined(__GNUC__) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && \
@@ -92,6 +93,12 @@
 #  else
 #    define JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
 #  endif
+#  ifdef JEMALLOC_HAVE_ATTR_DEPRECATED
+#    define JEMALLOC_DIAGNOSTIC_IGNORE_DEPRECATED \
+       JEMALLOC_DIAGNOSTIC_IGNORE("-Wdeprecated-declarations")
+#  else
+#    define JEMALLOC_DIAGNOSTIC_IGNORE_DEPRECATED
+#  endif
 #  define JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS \
   JEMALLOC_DIAGNOSTIC_PUSH \
   JEMALLOC_DIAGNOSTIC_IGNORE_UNUSED_PARAMETER
@@ -103,9 +110,16 @@
 #  define JEMALLOC_DIAGNOSTIC_IGNORE_FRAME_ADDRESS
 #  define JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS
 #  define JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
+#  define JEMALLOC_DIAGNOSTIC_IGNORE_DEPRECATED
 #  define JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
 #endif
 
+#define JEMALLOC_SUPPRESS_WARN_ON_USAGE(...) \
+   JEMALLOC_DIAGNOSTIC_PUSH \
+   JEMALLOC_DIAGNOSTIC_IGNORE_DEPRECATED \
+   __VA_ARGS__ \
+   JEMALLOC_DIAGNOSTIC_POP
+
 /*
  * Disables spurious diagnostics for all headers.  Since these headers are not
  * included by users directly, it does not affect their diagnostic settings.
diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in
index 77d9d3b5..ef04e756 100644
--- a/include/jemalloc/jemalloc_defs.h.in
+++ b/include/jemalloc/jemalloc_defs.h.in
@@ -19,6 +19,9 @@
 /* Defined if cold attribute is supported. */
 #undef JEMALLOC_HAVE_ATTR_COLD
 
+/* Defined if deprecated attribute is supported. */
+#undef JEMALLOC_HAVE_ATTR_DEPRECATED
+
 /*
  * Define overrides for non-standard allocator-related functions if they are
  * present on the system.
diff --git a/include/jemalloc/jemalloc_macros.h.in b/include/jemalloc/jemalloc_macros.h.in
index 05d996be..a0679af5 100644
--- a/include/jemalloc/jemalloc_macros.h.in
+++ b/include/jemalloc/jemalloc_macros.h.in
@@ -86,6 +86,7 @@
 #    define JEMALLOC_ALLOCATOR
 #  endif
 #  define JEMALLOC_COLD
+#  define JEMALLOC_WARN_ON_USAGE(warning_message)
 #elif defined(JEMALLOC_HAVE_ATTR)
 #  define JEMALLOC_ATTR(s) __attribute__((s))
 #  define JEMALLOC_ALIGNED(s) JEMALLOC_ATTR(aligned(s))
@@ -126,6 +127,11 @@
 #  else
 #    define JEMALLOC_COLD
 #  endif
+#  ifdef JEMALLOC_HAVE_ATTR_DEPRECATED
+#    define JEMALLOC_WARN_ON_USAGE(warning_message) JEMALLOC_ATTR(deprecated(warning_message))
+#  else
+#    define JEMALLOC_WARN_ON_USAGE(warning_message)
+#  endif
 #else
 #  define JEMALLOC_ATTR(s)
 #  define JEMALLOC_ALIGNED(s)
@@ -140,6 +146,7 @@
 #  define JEMALLOC_RESTRICT_RETURN
 #  define JEMALLOC_ALLOCATOR
 #  define JEMALLOC_COLD
+#  define JEMALLOC_WARN_ON_USAGE(warning_message)
 #endif
 
 #if (defined(__APPLE__) || defined(__FreeBSD__) || defined(__OpenBSD__) || (defined(__linux__) && !defined(__GLIBC__))) && !defined(JEMALLOC_NO_RENAME)

From 424dd61d57500712fad7371bfd921cb9e3caee22 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Fri, 4 Aug 2023 11:43:59 -0700
Subject: [PATCH 119/395] Issue a warning upon directly accessing an arena's
 bins

An arena's bins should normally be accessed via the `arena_get_bin`
function, which properly takes into account bin-shards. To ensure that
we don't accidentally commit code which incorrectly accesses the bins
directly, we mark the field with `__attribute__((deprecated))` with an
appropriate warning message, and suppress the warning in the few places
where directly accessing the bins is allowed.
---
 include/jemalloc/internal/arena_structs.h |  4 +++-
 src/arena.c                               | 20 +++++++++++++++-----
 2 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index 0fffa7eb..6f79be97 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -99,7 +99,9 @@ struct arena_s {
 	 * The arena is allocated alongside its bins; really this is a
 	 * dynamically sized array determined by the binshard settings.
 	 */
-	bin_t			bins[0];
+	JEMALLOC_WARN_ON_USAGE("Do not use this field directly. "
+	                       "Use `arena_get_bin` instead.")
+	bin_t			 all_bins[0];
 };
 
 #endif /* JEMALLOC_INTERNAL_ARENA_STRUCTS_H */
diff --git a/src/arena.c b/src/arena.c
index fe5874a4..98907bc1 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1700,7 +1700,9 @@ arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) {
 	/* Initialize bins. */
 	atomic_store_u(&arena->binshard_next, 0, ATOMIC_RELEASE);
 	for (i = 0; i < nbins_total; i++) {
-		bool err = bin_init(&arena->bins[i]);
+		JEMALLOC_SUPPRESS_WARN_ON_USAGE(
+		bool err = bin_init(&arena->all_bins[i]);
+		)
 		if (err) {
 			goto label_error;
 		}
@@ -1849,7 +1851,9 @@ arena_boot(sc_data_t *sc_data, base_t *base, bool hpa) {
 		    (1U << sc->lg_base) + (sc->ndelta << sc->lg_delta));
 	}
 
-	uint32_t cur_offset = (uint32_t)offsetof(arena_t, bins);
+	JEMALLOC_SUPPRESS_WARN_ON_USAGE(
+	uint32_t cur_offset = (uint32_t)offsetof(arena_t, all_bins);
+	)
 	for (szind_t i = 0; i < SC_NBINS; i++) {
 		arena_bin_offsets[i] = cur_offset;
 		nbins_total += bin_infos[i].n_shards;
@@ -1904,14 +1908,18 @@ arena_prefork7(tsdn_t *tsdn, arena_t *arena) {
 void
 arena_prefork8(tsdn_t *tsdn, arena_t *arena) {
 	for (unsigned i = 0; i < nbins_total; i++) {
-		bin_prefork(tsdn, &arena->bins[i]);
+		JEMALLOC_SUPPRESS_WARN_ON_USAGE(
+		bin_prefork(tsdn, &arena->all_bins[i]);
+		)
 	}
 }
 
 void
 arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) {
 	for (unsigned i = 0; i < nbins_total; i++) {
-		bin_postfork_parent(tsdn, &arena->bins[i]);
+		JEMALLOC_SUPPRESS_WARN_ON_USAGE(
+		bin_postfork_parent(tsdn, &arena->all_bins[i]);
+		)
 	}
 
 	malloc_mutex_postfork_parent(tsdn, &arena->large_mtx);
@@ -1949,7 +1957,9 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
 	}
 
 	for (unsigned i = 0; i < nbins_total; i++) {
-		bin_postfork_child(tsdn, &arena->bins[i]);
+		JEMALLOC_SUPPRESS_WARN_ON_USAGE(
+		bin_postfork_child(tsdn, &arena->all_bins[i]);
+		)
 	}
 
 	malloc_mutex_postfork_child(tsdn, &arena->large_mtx);

From 3aae792b1021a3e46490bd52e8b3300c3aa71e82 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Mon, 17 Jul 2023 15:22:26 -0700
Subject: [PATCH 120/395] Fix infinite purging loop in HPA

As reported in #2449, under certain circumstances it's possible to get
stuck in an infinite loop attempting to purge from the HPA. We now
handle this by validating the HPA settings at the end of
configuration parsing and either normalizing them or aborting depending on
if `abort_conf` is set.
---
 Makefile.in                                   |  1 +
 .../internal/jemalloc_internal_externs.h      |  1 +
 src/jemalloc.c                                | 44 ++++++++++++++-
 test/unit/hpa_background_thread.sh            |  2 +-
 test/unit/hpa_validate_conf.c                 | 56 +++++++++++++++++++
 test/unit/hpa_validate_conf.sh                |  3 +
 6 files changed, 105 insertions(+), 2 deletions(-)
 create mode 100644 test/unit/hpa_validate_conf.c
 create mode 100644 test/unit/hpa_validate_conf.sh

diff --git a/Makefile.in b/Makefile.in
index a0131558..3a02b3fd 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -225,6 +225,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/hook.c \
 	$(srcroot)test/unit/hpa.c \
 	$(srcroot)test/unit/hpa_background_thread.c \
+	$(srcroot)test/unit/hpa_validate_conf.c \
 	$(srcroot)test/unit/hpdata.c \
 	$(srcroot)test/unit/huge.c \
 	$(srcroot)test/unit/inspect.c \
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index ae03c644..64d9aa20 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -25,6 +25,7 @@ extern bool opt_junk_alloc;
 extern bool opt_junk_free;
 extern void (*JET_MUTABLE junk_free_callback)(void *ptr, size_t size);
 extern void (*JET_MUTABLE junk_alloc_callback)(void *ptr, size_t size);
+extern void (*JET_MUTABLE invalid_conf_abort)(void);
 extern bool opt_utrace;
 extern bool opt_xmalloc;
 extern bool opt_experimental_infallible_new;
diff --git a/src/jemalloc.c b/src/jemalloc.c
index ccb20c81..c5a06f6e 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -144,6 +144,7 @@ static void default_junk_free(void *ptr, size_t usize) {
 
 void (*JET_MUTABLE junk_alloc_callback)(void *ptr, size_t size) = &default_junk_alloc;
 void (*JET_MUTABLE junk_free_callback)(void *ptr, size_t size) = &default_junk_free;
+void (*JET_MUTABLE invalid_conf_abort)(void) = &abort;
 
 bool	opt_utrace = false;
 bool	opt_xmalloc = false;
@@ -959,7 +960,7 @@ malloc_abort_invalid_conf(void) {
 	assert(opt_abort_conf);
 	malloc_printf("<jemalloc>: Abort (abort_conf:true) on invalid conf "
 	    "value (see above).\n");
-	abort();
+	invalid_conf_abort();
 }
 
 static void
@@ -1081,6 +1082,46 @@ obtain_malloc_conf(unsigned which_source, char buf[PATH_MAX + 1]) {
 	return ret;
 }
 
+static void
+validate_hpa_settings(void) {
+	if (!hpa_supported() || !opt_hpa || opt_hpa_opts.dirty_mult == (fxp_t)-1) {
+		return;
+	}
+	size_t hpa_threshold = fxp_mul_frac(HUGEPAGE, opt_hpa_opts.dirty_mult) +
+	    opt_hpa_opts.hugification_threshold;
+	if (hpa_threshold > HUGEPAGE) {
+		return;
+	}
+
+	had_conf_error = true;
+	char hpa_dirty_mult[FXP_BUF_SIZE];
+	char hugification_threshold[FXP_BUF_SIZE];
+	char normalization_message[256] = {0};
+	fxp_print(opt_hpa_opts.dirty_mult, hpa_dirty_mult);
+	fxp_print(fxp_div(FXP_INIT_INT((unsigned)
+	    (opt_hpa_opts.hugification_threshold >> LG_PAGE)),
+	    FXP_INIT_INT(HUGEPAGE_PAGES)), hugification_threshold);
+	if (!opt_abort_conf) {
+		char normalized_hugification_threshold[FXP_BUF_SIZE];
+		opt_hpa_opts.hugification_threshold +=
+		    HUGEPAGE - hpa_threshold;
+		fxp_print(fxp_div(FXP_INIT_INT((unsigned)
+		    (opt_hpa_opts.hugification_threshold >> LG_PAGE)),
+		    FXP_INIT_INT(HUGEPAGE_PAGES)),
+		    normalized_hugification_threshold);
+		malloc_snprintf(normalization_message,
+		    sizeof(normalization_message), "<jemalloc>: Normalizing "
+		    "HPA settings to avoid pathological behavior, setting "
+		    "hpa_hugification_threshold_ratio: to %s.\n",
+		    normalized_hugification_threshold);
+	}
+	malloc_printf(
+	    "<jemalloc>: Invalid combination of options "
+	    "hpa_hugification_threshold_ratio: %s and hpa_dirty_mult: %s. "
+	    "These values should sum to > 1.0.\n%s", hugification_threshold,
+	    hpa_dirty_mult, normalization_message);
+}
+
 static void
 malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
     bool initial_call, const char *opts_cache[MALLOC_CONF_NSOURCES],
@@ -1749,6 +1790,7 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
     /* Re-enable diagnostic "-Wtype-limits" */
     JEMALLOC_DIAGNOSTIC_POP
 		}
+		validate_hpa_settings();
 		if (opt_abort_conf && had_conf_error) {
 			malloc_abort_invalid_conf();
 		}
diff --git a/test/unit/hpa_background_thread.sh b/test/unit/hpa_background_thread.sh
index 65a56a08..33b70e19 100644
--- a/test/unit/hpa_background_thread.sh
+++ b/test/unit/hpa_background_thread.sh
@@ -1,4 +1,4 @@
 #!/bin/sh
 
-export MALLOC_CONF="hpa_dirty_mult:0,hpa_min_purge_interval_ms:50,hpa_sec_nshards:0"
+export MALLOC_CONF="hpa_dirty_mult:0.001,hpa_hugification_threshold_ratio:1.0,hpa_min_purge_interval_ms:50,hpa_sec_nshards:0"
 
diff --git a/test/unit/hpa_validate_conf.c b/test/unit/hpa_validate_conf.c
new file mode 100644
index 00000000..8c1847ba
--- /dev/null
+++ b/test/unit/hpa_validate_conf.c
@@ -0,0 +1,56 @@
+#include "test/jemalloc_test.h"
+
+static bool abort_called = false;
+static void (*default_malloc_message)(void *, const char *);
+
+static void
+mock_invalid_conf_abort(void) {
+	abort_called = true;
+}
+
+static void
+null_malloc_message(void *_1, const char* _2) {
+}
+
+TEST_BEGIN(test_hpa_validate_conf) {
+	test_skip_if(!hpa_supported());
+	void *ptr = malloc(4096);
+	/* Need to restore this here to see any possible assert messages */
+	malloc_message = default_malloc_message;
+	assert_true(abort_called,
+	     "Should have aborted due to invalid values for hpa_dirty_mult and "
+	     "hpa_hugification_threshold_ratio");
+	free(ptr);
+}
+TEST_END
+
+/*
+ * We have to set `abort_conf:true` here and not via the `MALLOC_CONF`
+ * environment variable in the associated shell script for this test. This is
+ * because when testing on FreeBSD (where Jemalloc is the system allocator) in
+ * CI configs where HPA is not supported, setting `abort_conf:true` there would
+ * result in the system Jemalloc picking this up and aborting before we could
+ * ever even launch the test.
+ */
+const char *malloc_conf = "abort_conf:true";
+
+int
+main(void) {
+	/*
+	 * OK, this is a sort of nasty hack.  We don't want to add *another*
+	 * config option for HPA (the intent is that it becomes available on
+	 * more platforms over time, and we're trying to prune back config
+	 * options generally.  But we'll get initialization errors on other
+	 * platforms if we set hpa:true in the MALLOC_CONF (even if we set
+	 * abort_conf:false as well).  So we reach into the internals and set
+	 * them directly, but only if we know that we're actually going to do
+	 * something nontrivial in the tests.
+	 */
+	if (hpa_supported()) {
+		default_malloc_message = malloc_message;
+		malloc_message = null_malloc_message;
+		opt_hpa = true;
+		invalid_conf_abort = mock_invalid_conf_abort;
+	}
+	return test_no_reentrancy(test_hpa_validate_conf);
+}
diff --git a/test/unit/hpa_validate_conf.sh b/test/unit/hpa_validate_conf.sh
new file mode 100644
index 00000000..692c3da9
--- /dev/null
+++ b/test/unit/hpa_validate_conf.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+export MALLOC_CONF='tcache:false,hpa_dirty_mult:0.25,hpa_hugification_threshold_ratio:0.6'

From 4f50f782fa8e48248684e9f479b895fe19609635 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Thu, 27 Jul 2023 11:49:07 -0700
Subject: [PATCH 121/395] Use compiler-provided assume builtins when available

There are several benefits to this:
1. It's cleaner and more reliable to use the builtin to
   inform the compiler of assumptions instead of hoping that the
   optimizer understands your intentions.
2. `clang` will warn you if any of your assumptions would produce
   side-effects (which the compiler will discard). [This blog post](https://fastcompression.blogspot.com/2019/01/compiler-checked-contracts.html)
   by Yann Collet highlights that a hazard of using the
   `unreachable()`-based method of signaling assumptions is that it
   can sometimes result in additional instructions being generated (see
   [this Godbolt link](https://godbolt.org/z/lKNMs3) from the blog post
   for an example).
---
 include/jemalloc/internal/arena_inlines_b.h |  2 +-
 include/jemalloc/internal/util.h            | 19 +++++++++++++------
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index b1cd84b4..1c98ffa0 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -378,7 +378,7 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 	if (caller_alloc_ctx != NULL) {
 		alloc_ctx = *caller_alloc_ctx;
 	} else {
-		util_assume(!tsdn_null(tsdn));
+		util_assume(tsdn != NULL);
 		emap_alloc_ctx_lookup(tsdn, &arena_emap_global, ptr,
 		    &alloc_ctx);
 	}
diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h
index 536c0970..2c35ef76 100644
--- a/include/jemalloc/internal/util.h
+++ b/include/jemalloc/internal/util.h
@@ -65,12 +65,19 @@ get_errno(void) {
 #endif
 }
 
-JEMALLOC_ALWAYS_INLINE void
-util_assume(bool b) {
-	if (!b) {
-		unreachable();
-	}
-}
+#ifdef _MSC_VER
+#define util_assume __assume
+#elif defined(__clang__) && (__clang_major__ > 3 || \
+    (__clang_major__ == 3 && __clang_minor__ >= 6))
+#define util_assume __builtin_assume
+#else
+#define util_assume(expr)		\
+	do {				\
+		if (!(expr)) {		\
+			unreachable();	\
+		}			\
+	} while(0)
+#endif
 
 /* ptr should be valid. */
 JEMALLOC_ALWAYS_INLINE void

From 254c4847e8ac263d24720aa93c2c7d410f55a239 Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@fb.com>
Date: Sat, 5 Aug 2023 13:40:23 -0700
Subject: [PATCH 122/395] Print colorful reminder for failed tests.

---
 test/src/test.c | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/test/src/test.c b/test/src/test.c
index 8b69d74a..a21356d5 100644
--- a/test/src/test.c
+++ b/test/src/test.c
@@ -173,13 +173,19 @@ p_test_impl(bool do_malloc_init, bool do_reentrant, test_t *t, va_list ap) {
 		}
 	}
 
-	malloc_printf("--- %s: %u/%u, %s: %u/%u, %s: %u/%u ---\n",
+	bool colored = test_counts[test_status_fail] != 0 &&
+	    isatty(STDERR_FILENO);
+	const char *color_start = colored ? "\033[1;31m" : "";
+	const char *color_end = colored ? "\033[0m" : "";
+	malloc_printf("%s--- %s: %u/%u, %s: %u/%u, %s: %u/%u ---\n%s",
+	    color_start,
 	    test_status_string(test_status_pass),
 	    test_counts[test_status_pass], test_count,
 	    test_status_string(test_status_skip),
 	    test_counts[test_status_skip], test_count,
 	    test_status_string(test_status_fail),
-	    test_counts[test_status_fail], test_count);
+	    test_counts[test_status_fail], test_count,
+	    color_end);
 
 	return ret;
 }
@@ -229,7 +235,12 @@ p_test_no_malloc_init(test_t *t, ...) {
 
 void
 p_test_fail(bool may_abort, const char *prefix, const char *message) {
-	malloc_cprintf(NULL, NULL, "%s%s\n", prefix, message);
+	bool colored = test_counts[test_status_fail] != 0 &&
+	    isatty(STDERR_FILENO);
+	const char *color_start = colored ? "\033[1;31m" : "";
+	const char *color_end = colored ? "\033[0m" : "";
+	malloc_cprintf(NULL, NULL, "%s%s%s\n%s", color_start, prefix, message,
+	    color_end);
 	test_status = test_status_fail;
 	if (may_abort) {
 		abort();

From d2c9ed3d1e7c1a318e6fd018eb0e0f3ba5ee3365 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Thu, 10 Aug 2023 10:43:42 -0700
Subject: [PATCH 123/395] Ensure short `read(2)`s/`write(2)`s are properly
 handled by IO utilities

`read(2)` and `write(2)` may read or write fewer bytes than were
requested. In order to robustly ensure that all of the requested bytes
are read/written, these edge-cases must be handled.
---
 include/jemalloc/internal/malloc_io.h | 35 +++++++++++++++++++++++++--
 1 file changed, 33 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/malloc_io.h b/include/jemalloc/internal/malloc_io.h
index 0afb0429..91e7b2ba 100644
--- a/include/jemalloc/internal/malloc_io.h
+++ b/include/jemalloc/internal/malloc_io.h
@@ -68,7 +68,7 @@ void malloc_cprintf(write_cb_t *write_cb, void *cbopaque, const char *format,
 void malloc_printf(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2);
 
 static inline ssize_t
-malloc_write_fd(int fd, const void *buf, size_t count) {
+malloc_write_fd_syscall(int fd, const void *buf, size_t count) {
 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_write)
 	/*
 	 * Use syscall(2) rather than write(2) when possible in order to avoid
@@ -90,7 +90,22 @@ malloc_write_fd(int fd, const void *buf, size_t count) {
 }
 
 static inline ssize_t
-malloc_read_fd(int fd, void *buf, size_t count) {
+malloc_write_fd(int fd, const void *buf, size_t count) {
+	size_t bytes_written = 0;
+	do {
+		ssize_t result = malloc_write_fd_syscall(fd,
+		    &((const byte_t *)buf)[bytes_written],
+		    count - bytes_written);
+		if (result < 0) {
+			return result;
+		}
+		bytes_written += result;
+	} while (bytes_written < count);
+	return bytes_written;
+}
+
+static inline ssize_t
+malloc_read_fd_syscall(int fd, void *buf, size_t count) {
 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_read)
 	long result = syscall(SYS_read, fd, buf, count);
 #else
@@ -103,4 +118,20 @@ malloc_read_fd(int fd, void *buf, size_t count) {
 	return (ssize_t)result;
 }
 
+static inline ssize_t
+malloc_read_fd(int fd, void *buf, size_t count) {
+	size_t bytes_read = 0;
+	do {
+		ssize_t result = malloc_read_fd_syscall(fd,
+		    &((byte_t *)buf)[bytes_read], count - bytes_read);
+		if (result < 0) {
+			return result;
+		} else if (result == 0) {
+			break;
+		}
+		bytes_read += result;
+	} while (bytes_read < count);
+	return bytes_read;
+}
+
 #endif /* JEMALLOC_INTERNAL_MALLOC_IO_H */

From da66aa391f853ccf2300845b3873cc8f1cf48f2d Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Thu, 10 Aug 2023 16:31:35 -0700
Subject: [PATCH 124/395] Enable a few additional warnings for CI and fix the
 issues they uncovered

- `-Wmissing-prototypes` and `-Wmissing-variable-declarations` are
  helpful for finding dead code and/or things that should be `static`
  but aren't marked as such.
- `-Wunused-macros` is of similar utility, but for identifying dead macros.
- `-Wunreachable-code` and `-Wunreachable-code-aggressive` do exactly
  what they say: flag unreachable code.
---
 include/jemalloc/jemalloc_protos.h.in |  1 +
 scripts/run_static_analysis.sh        | 10 +++++++++-
 src/ctl.c                             | 25 -------------------------
 src/inspect.c                         |  1 +
 src/jemalloc.c                        |  3 +--
 src/jemalloc_cpp.cpp                  |  1 -
 src/mutex.c                           |  2 +-
 src/pa.c                              |  7 -------
 src/pages.c                           |  2 +-
 src/prof_sys.c                        |  1 -
 test/src/mtx.c                        |  2 +-
 11 files changed, 15 insertions(+), 40 deletions(-)

diff --git a/include/jemalloc/jemalloc_protos.h.in b/include/jemalloc/jemalloc_protos.h.in
index 3e1d3223..e474930f 100644
--- a/include/jemalloc/jemalloc_protos.h.in
+++ b/include/jemalloc/jemalloc_protos.h.in
@@ -4,6 +4,7 @@
  * JEMALLOC_NO_DEMANGLE is defined (see jemalloc_mangle@install_suffix@.h).
  */
 extern JEMALLOC_EXPORT const char	*@je_@malloc_conf;
+extern JEMALLOC_EXPORT const char	*@je_@malloc_conf_2_conf_harder;
 extern JEMALLOC_EXPORT void		(*@je_@malloc_message)(void *cbopaque,
     const char *s);
 
diff --git a/scripts/run_static_analysis.sh b/scripts/run_static_analysis.sh
index 70c813d1..e2185ec9 100755
--- a/scripts/run_static_analysis.sh
+++ b/scripts/run_static_analysis.sh
@@ -12,8 +12,16 @@ compile_time_malloc_conf='background_thread:true,'\
 'zero_realloc:free,'\
 'prof_unbias:false,'\
 'prof_time_resolution:high'
+extra_flags=(
+	-Wmissing-prototypes
+	-Wmissing-variable-declarations
+	-Wstrict-prototypes
+	-Wunreachable-code
+	-Wunreachable-code-aggressive
+	-Wunused-macros
+)
 
-EXTRA_CFLAGS='-Wstrict-prototypes' EXTRA_CXXFLAGS='-Wstrict-prototypes' ./autogen.sh \
+EXTRA_CFLAGS="${extra_flags[*]}" EXTRA_CXXFLAGS="${extra_flags[*]}" ./autogen.sh \
 	--with-private-namespace=jemalloc_ \
 	--disable-cache-oblivious \
 	--enable-prof \
diff --git a/src/ctl.c b/src/ctl.c
index 454766da..2607aed2 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1861,31 +1861,6 @@ ctl_mtx_assert_held(tsdn_t *tsdn) {
  * There's a lot of code duplication in the following macros due to limitations
  * in how nested cpp macros are expanded.
  */
-#define CTL_RO_CLGEN(c, l, n, v, t)					\
-static int								\
-n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
-    size_t *oldlenp, void *newp, size_t newlen) {			\
-	int ret;							\
-	t oldval;							\
-									\
-	if (!(c)) {							\
-		return ENOENT;						\
-	}								\
-	if (l) {							\
-		malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);		\
-	}								\
-	READONLY();							\
-	oldval = (v);							\
-	READ(oldval, t);						\
-									\
-	ret = 0;							\
-label_return:								\
-	if (l) {							\
-		malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);		\
-	}								\
-	return ret;							\
-}
-
 #define CTL_RO_CGEN(c, n, v, t)						\
 static int								\
 n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,			\
diff --git a/src/inspect.c b/src/inspect.c
index 911b5d52..2575b5c1 100644
--- a/src/inspect.c
+++ b/src/inspect.c
@@ -1,5 +1,6 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
+#include "jemalloc/internal/inspect.h"
 
 void
 inspect_extent_util_stats_get(tsdn_t *tsdn, const void *ptr, size_t *nfree,
diff --git a/src/jemalloc.c b/src/jemalloc.c
index c5a06f6e..e2b4917f 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1,4 +1,3 @@
-#define JEMALLOC_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
@@ -151,7 +150,7 @@ bool	opt_xmalloc = false;
 bool	opt_experimental_infallible_new = false;
 bool	opt_zero = false;
 unsigned	opt_narenas = 0;
-fxp_t		opt_narenas_ratio = FXP_INIT_INT(4);
+static fxp_t		opt_narenas_ratio = FXP_INIT_INT(4);
 
 unsigned	ncpus;
 
diff --git a/src/jemalloc_cpp.cpp b/src/jemalloc_cpp.cpp
index 44569c14..08107a8a 100644
--- a/src/jemalloc_cpp.cpp
+++ b/src/jemalloc_cpp.cpp
@@ -2,7 +2,6 @@
 #include <new>
 // NOLINTBEGIN(misc-use-anonymous-namespace)
 
-#define JEMALLOC_CPP_CPP_
 #ifdef __cplusplus
 extern "C" {
 #endif
diff --git a/src/mutex.c b/src/mutex.c
index 0b3547a8..5655100d 100644
--- a/src/mutex.c
+++ b/src/mutex.c
@@ -5,7 +5,7 @@
 #include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/spin.h"
 
-#ifndef _CRT_SPINCOUNT
+#if defined(_WIN32) && !defined(_CRT_SPINCOUNT)
 #define _CRT_SPINCOUNT 4000
 #endif
 
diff --git a/src/pa.c b/src/pa.c
index 63eef2b5..ebc6861d 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -220,13 +220,6 @@ pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
 	pai_dalloc(tsdn, pai, edata, deferred_work_generated);
 }
 
-bool
-pa_shard_retain_grow_limit_get_set(tsdn_t *tsdn, pa_shard_t *shard,
-    size_t *old_limit, size_t *new_limit) {
-	return pac_retain_grow_limit_get_set(tsdn, &shard->pac, old_limit,
-	    new_limit);
-}
-
 bool
 pa_decay_ms_set(tsdn_t *tsdn, pa_shard_t *shard, extent_state_t state,
     ssize_t decay_ms, pac_purge_eagerness_t eagerness) {
diff --git a/src/pages.c b/src/pages.c
index 58d9cfaf..8cf2fd9f 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -21,7 +21,7 @@
 #else
 #define PAGES_FD_TAG -1
 #endif
-#ifdef JEMALLOC_HAVE_PRCTL
+#if defined(JEMALLOC_HAVE_PRCTL) && defined(JEMALLOC_PAGEID)
 #include <sys/prctl.h>
 #ifndef PR_SET_VMA
 #define PR_SET_VMA 0x53564d41
diff --git a/src/prof_sys.c b/src/prof_sys.c
index dbb4c80a..c2998926 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -1,4 +1,3 @@
-#define JEMALLOC_PROF_SYS_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
diff --git a/test/src/mtx.c b/test/src/mtx.c
index d9ce375c..6cb3ecd5 100644
--- a/test/src/mtx.c
+++ b/test/src/mtx.c
@@ -1,6 +1,6 @@
 #include "test/jemalloc_test.h"
 
-#ifndef _CRT_SPINCOUNT
+#if defined(_WIN32) && !defined(_CRT_SPINCOUNT)
 #define _CRT_SPINCOUNT 4000
 #endif
 

From 87c56c8df86107fdf32e92db68211e8b10d94ded Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 25 Aug 2023 11:34:24 -0700
Subject: [PATCH 125/395] Fix arenas.i.bins.j.mutex link id in manual.

---
 doc/jemalloc.xml.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index bdebd433..d0d4b20b 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -3307,7 +3307,7 @@ struct extent_hooks_s {
         <listitem><para>Current number of nonfull slabs.</para></listitem>
       </varlistentry>
 
-      <varlistentry id="stats.arenas.i.bins.mutex">
+      <varlistentry id="stats.arenas.i.bins.j.mutex">
         <term>
           <mallctl>stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.mutex.{counter}</mallctl>
           (<type>counter specific type</type>) <literal>r-</literal>

From b71da25b8a12c2c3f0c10b0811d15a61980186e8 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 28 Aug 2023 10:21:11 -0700
Subject: [PATCH 126/395] Fix reading CPU id using rdtscp.

As pointed out in #2527, the correct register containing CPU id should be ecx
instead edx.
---
 include/jemalloc/internal/jemalloc_internal_inlines_a.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
index 8d5e22fd..111cda42 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -19,9 +19,9 @@ malloc_getcpu(void) {
 #elif defined(JEMALLOC_HAVE_SCHED_GETCPU)
 	return (malloc_cpuid_t)sched_getcpu();
 #elif defined(JEMALLOC_HAVE_RDTSCP)
-	unsigned int ax, cx, dx;
-	asm volatile("rdtscp" : "=a"(ax), "=d"(dx), "=c"(cx) ::);
-	return (malloc_cpuid_t)(dx & 0xfff);
+	unsigned int ecx;
+	asm volatile("rdtscp" : "=c" (ecx) :: "eax", "edx");
+	return (malloc_cpuid_t)(ecx & 0xfff);
 #elif defined(__aarch64__) && defined(__APPLE__)
 	/* Other oses most likely use tpidr_el0 instead */
 	uintptr_t c;

From 7d563a8f8117966d9466d92ed2c782eeae7a19eb Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 30 Aug 2023 10:15:30 -0700
Subject: [PATCH 127/395] Update safety check message to remove --enable-debug
 when it's already on.

---
 src/safety_check.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/safety_check.c b/src/safety_check.c
index 209fdda9..7ffe1f4f 100644
--- a/src/safety_check.c
+++ b/src/safety_check.c
@@ -7,12 +7,13 @@ void safety_check_fail_sized_dealloc(bool current_dealloc, const void *ptr,
     size_t true_size, size_t input_size) {
 	char *src = current_dealloc ? "the current pointer being freed" :
 	    "in thread cache, possibly from previous deallocations";
+	char *suggest_debug_build = config_debug ? "" : " --enable-debug or";
 
 	safety_check_fail("<jemalloc>: size mismatch detected (true size %zu "
 	    "vs input size %zu), likely caused by application sized "
-	    "deallocation bugs (source address: %p, %s). Suggest building with "
-	    "--enable-debug or address sanitizer for debugging. Abort.\n",
-	    true_size, input_size, ptr, src);
+	    "deallocation bugs (source address: %p, %s). Suggest building with"
+	    "%s address sanitizer for debugging. Abort.\n",
+	    true_size, input_size, ptr, src, suggest_debug_build);
 }
 
 void safety_check_set_abort(safety_check_abort_hook_t abort_fn) {

From ed7e6fe71a193ce24d1409d19d2c792f19af6a21 Mon Sep 17 00:00:00 2001
From: BtbN <btbn@btbn.de>
Date: Sun, 20 Aug 2023 16:21:56 +0200
Subject: [PATCH 128/395] Expose private library dependencies via pkg-config

When linking statically, these need to be included for linking to succeed.
---
 jemalloc.pc.in | 1 +
 1 file changed, 1 insertion(+)

diff --git a/jemalloc.pc.in b/jemalloc.pc.in
index 0a377152..3aecfda1 100644
--- a/jemalloc.pc.in
+++ b/jemalloc.pc.in
@@ -10,3 +10,4 @@ URL: https://jemalloc.net/
 Version: @jemalloc_version_major@.@jemalloc_version_minor@.@jemalloc_version_bugfix@_@jemalloc_version_nrev@
 Cflags: -I${includedir}
 Libs: -L${libdir} -ljemalloc${install_suffix}
+Libs.private: @LIBS@

From ce8ce99a4a969e8dd8644d7382126fbb423d9859 Mon Sep 17 00:00:00 2001
From: BtbN <btbn@btbn.de>
Date: Sun, 20 Aug 2023 16:38:09 +0200
Subject: [PATCH 129/395] Expose jemalloc_prefix via pkg-config

---
 jemalloc.pc.in | 1 +
 1 file changed, 1 insertion(+)

diff --git a/jemalloc.pc.in b/jemalloc.pc.in
index 3aecfda1..b50770d1 100644
--- a/jemalloc.pc.in
+++ b/jemalloc.pc.in
@@ -3,6 +3,7 @@ exec_prefix=@exec_prefix@
 libdir=@libdir@
 includedir=@includedir@
 install_suffix=@install_suffix@
+jemalloc_prefix=@JEMALLOC_PREFIX@
 
 Name: jemalloc
 Description: A general purpose malloc(3) implementation that emphasizes fragmentation avoidance and scalable concurrency support.

From 7d9eceaf3858515cd8774c3fad8e90fe53454e3c Mon Sep 17 00:00:00 2001
From: Evers Chen <evers_chen@163.com>
Date: Sun, 20 Aug 2023 08:11:35 +0000
Subject: [PATCH 130/395] Fix array bounds false warning in gcc 12.3.0
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

1.error: array subscript 232 is above array bounds of ‘size_t[232]’ in gcc 12.3.0
2.it also optimizer to the code
---
 test/unit/size_classes.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/test/unit/size_classes.c b/test/unit/size_classes.c
index c70eb592..9e8a408f 100644
--- a/test/unit/size_classes.c
+++ b/test/unit/size_classes.c
@@ -24,7 +24,7 @@ get_max_size_class(void) {
 
 TEST_BEGIN(test_size_classes) {
 	size_t size_class, max_size_class;
-	szind_t index, max_index;
+	szind_t index, gen_index, max_index;
 
 	max_size_class = get_max_size_class();
 	max_index = sz_size2index(max_size_class);
@@ -32,6 +32,7 @@ TEST_BEGIN(test_size_classes) {
 	for (index = 0, size_class = sz_index2size(index); index < max_index ||
 	    size_class < max_size_class; index++, size_class =
 	    sz_index2size(index)) {
+		gen_index = sz_size2index(size_class);
 		expect_true(index < max_index,
 		    "Loop conditionals should be equivalent; index=%u, "
 		    "size_class=%zu (%#zx)", index, size_class, size_class);
@@ -39,17 +40,15 @@ TEST_BEGIN(test_size_classes) {
 		    "Loop conditionals should be equivalent; index=%u, "
 		    "size_class=%zu (%#zx)", index, size_class, size_class);
 
-		expect_u_eq(index, sz_size2index(size_class),
+		expect_u_eq(index, gen_index,
 		    "sz_size2index() does not reverse sz_index2size(): index=%u"
 		    " --> size_class=%zu --> index=%u --> size_class=%zu",
-		    index, size_class, sz_size2index(size_class),
-		    sz_index2size(sz_size2index(size_class)));
-		expect_zu_eq(size_class,
-		    sz_index2size(sz_size2index(size_class)),
+		    index, size_class, gen_index, sz_index2size(gen_index));
+
+		expect_zu_eq(size_class, sz_index2size(gen_index),
 		    "sz_index2size() does not reverse sz_size2index(): index=%u"
 		    " --> size_class=%zu --> index=%u --> size_class=%zu",
-		    index, size_class, sz_size2index(size_class),
-		    sz_index2size(sz_size2index(size_class)));
+		    index, size_class, gen_index, sz_index2size(gen_index));
 
 		expect_u_eq(index+1, sz_size2index(size_class+1),
 		    "Next size_class does not round up properly");

From fbca96c4332380c5799dcc804365ac6e93d7db2f Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@fb.com>
Date: Sun, 20 Aug 2023 23:28:38 -0700
Subject: [PATCH 131/395] Remove unnecessary parameters for
 cache_bin_postincrement.

---
 include/jemalloc/internal/cache_bin.h | 3 +--
 src/cache_bin.c                       | 3 +--
 src/tcache.c                          | 3 +--
 test/unit/cache_bin.c                 | 2 +-
 4 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 2e95c33c..e6313144 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -691,8 +691,7 @@ void cache_bin_info_compute_alloc(cache_bin_info_t *infos, szind_t ninfos,
  */
 void cache_bin_preincrement(cache_bin_info_t *infos, szind_t ninfos,
     void *alloc, size_t *cur_offset);
-void cache_bin_postincrement(cache_bin_info_t *infos, szind_t ninfos,
-    void *alloc, size_t *cur_offset);
+void cache_bin_postincrement(void *alloc, size_t *cur_offset);
 void cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
     size_t *cur_offset);
 
diff --git a/src/cache_bin.c b/src/cache_bin.c
index 362605a8..5fb5607a 100644
--- a/src/cache_bin.c
+++ b/src/cache_bin.c
@@ -56,8 +56,7 @@ cache_bin_preincrement(cache_bin_info_t *infos, szind_t ninfos, void *alloc,
 }
 
 void
-cache_bin_postincrement(cache_bin_info_t *infos, szind_t ninfos, void *alloc,
-    size_t *cur_offset) {
+cache_bin_postincrement(void *alloc, size_t *cur_offset) {
 	*(uintptr_t *)((byte_t *)alloc + *cur_offset) =
 	    cache_bin_trailing_junk;
 	*cur_offset += sizeof(void *);
diff --git a/src/tcache.c b/src/tcache.c
index 914ddb7a..e9cf2ee5 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -698,8 +698,7 @@ tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 		assert(tcache_small_bin_disabled(i, cache_bin));
 	}
 
-	cache_bin_postincrement(tcache_bin_info, nhbins, mem,
-	    &cur_offset);
+	cache_bin_postincrement(mem, &cur_offset);
 	/* Sanity check that the whole stack is used. */
 	assert(cur_offset == tcache_bin_alloc_size);
 }
diff --git a/test/unit/cache_bin.c b/test/unit/cache_bin.c
index 3b6dbab3..50d51a6d 100644
--- a/test/unit/cache_bin.c
+++ b/test/unit/cache_bin.c
@@ -93,7 +93,7 @@ test_bin_init(cache_bin_t *bin, cache_bin_info_t *info) {
 	size_t cur_offset = 0;
 	cache_bin_preincrement(info, 1, mem, &cur_offset);
 	cache_bin_init(bin, info, mem, &cur_offset);
-	cache_bin_postincrement(info, 1, mem, &cur_offset);
+	cache_bin_postincrement(mem, &cur_offset);
 	assert_zu_eq(cur_offset, size, "Should use all requested memory");
 }
 

From a442d9b895935ac872e7ccc705213537bc747c19 Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@fb.com>
Date: Sun, 6 Aug 2023 11:38:30 -0700
Subject: [PATCH 132/395] Enable per-tcache tcache_max

1. add tcache_max and nhbins into tcache_t so that they are per-tcache,
   with one auto tcache per thread, it's also per-thread;
2. add mallctl for each thread to set its own tcache_max (of its auto tcache);
3. store the maximum number of items in each bin instead of using a global storage;
4. add tests for the modifications above.
5. Rename `nhbins` and `tcache_maxclass` to `global_do_not_change_nhbins` and `global_do_not_change_tcache_maxclass`.
---
 include/jemalloc/internal/arena_inlines_b.h   |   7 +-
 include/jemalloc/internal/cache_bin.h         |   3 +
 .../internal/jemalloc_internal_inlines_b.h    |   1 +
 .../internal/jemalloc_internal_inlines_c.h    |   2 +-
 include/jemalloc/internal/tcache_externs.h    |  19 +-
 include/jemalloc/internal/tcache_inlines.h    |  83 ++++-
 include/jemalloc/internal/tcache_structs.h    |   2 +
 include/jemalloc/internal/tcache_types.h      |   2 +-
 src/arena.c                                   |  14 +-
 src/cache_bin.c                               |   1 +
 src/ctl.c                                     |  40 ++-
 src/jemalloc.c                                |  14 +-
 src/tcache.c                                  | 319 ++++++++++--------
 test/unit/batch_alloc.c                       |   2 +-
 test/unit/tcache_max.c                        | 241 ++++++++++---
 15 files changed, 528 insertions(+), 222 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 1c98ffa0..c4d1c887 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -198,11 +198,11 @@ arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero,
 			assert(sz_can_use_slab(size));
 			return tcache_alloc_small(tsdn_tsd(tsdn), arena,
 			    tcache, size, ind, zero, slow_path);
-		} else if (likely(size <= tcache_maxclass)) {
+		} else if (likely(size <= tcache_max_get(tcache))) {
 			return tcache_alloc_large(tsdn_tsd(tsdn), arena,
 			    tcache, size, ind, zero, slow_path);
 		}
-		/* (size > tcache_maxclass) case falls through. */
+		/* (size > tcache_max) case falls through. */
 	}
 
 	return arena_malloc_hard(tsdn, arena, size, ind, zero, slab);
@@ -297,7 +297,8 @@ arena_dalloc_no_tcache(tsdn_t *tsdn, void *ptr) {
 JEMALLOC_ALWAYS_INLINE void
 arena_dalloc_large(tsdn_t *tsdn, void *ptr, tcache_t *tcache, szind_t szind,
     bool slow_path) {
-	if (szind < nhbins) {
+	assert (!tsdn_null(tsdn) && tcache != NULL);
+	if (szind < tcache_nhbins_get(tcache)) {
 		if (config_prof && unlikely(szind < SC_NBINS)) {
 			arena_dalloc_promoted(tsdn, ptr, tcache, slow_path);
 		} else {
diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index e6313144..4cfc3f1d 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -125,6 +125,9 @@ struct cache_bin_s {
 	 * array.  Immutable after initialization.
 	 */
 	uint16_t low_bits_empty;
+
+	/* The maximum number of cached items in the bin. */
+	cache_bin_info_t bin_info;
 };
 
 /*
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_b.h b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
index b2cab228..2ddb4a89 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_b.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
@@ -23,6 +23,7 @@ percpu_arena_update(tsd_t *tsd, unsigned cpu) {
 		tcache_t *tcache = tcache_get(tsd);
 		if (tcache != NULL) {
 			tcache_slow_t *tcache_slow = tsd_tcache_slowp_get(tsd);
+			assert(tcache_slow->arena != NULL);
 			tcache_arena_reassociate(tsd_tsdn(tsd), tcache_slow,
 			    tcache, newarena);
 		}
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index 1dac668a..8b80e3c1 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -530,7 +530,7 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
         /*
          * Currently the fastpath only handles small sizes.  The branch on
          * SC_LOOKUP_MAXCLASS makes sure of it.  This lets us avoid checking
-         * tcache szind upper limit (i.e. tcache_maxclass) as well.
+         * tcache szind upper limit (i.e. tcache_max) as well.
          */
         assert(alloc_ctx.slab);
 
diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index af6fd970..87d243a1 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -21,14 +21,19 @@ extern unsigned opt_lg_tcache_flush_large_div;
 
 /*
  * Number of tcache bins.  There are SC_NBINS small-object bins, plus 0 or more
- * large-object bins.
+ * large-object bins.  This is only used during threads initialization and
+ * changing it will not reflect on initialized threads as expected.  Thus,
+ * it should not be changed on the fly.  To change the number of tcache bins
+ * in use, refer to tcache_nhbins of each tcache.
  */
-extern unsigned	nhbins;
+extern unsigned	global_do_not_change_nhbins;
 
-/* Maximum cached size class. */
-extern size_t	tcache_maxclass;
-
-extern cache_bin_info_t *tcache_bin_info;
+/*
+ * Maximum cached size class.  Same as above, this is only used during threads
+ * initialization and should not be changed.  To change the maximum cached size
+ * class, refer to tcache_max of each tcache.
+ */
+extern size_t	global_do_not_change_tcache_maxclass;
 
 /*
  * Explicit tcaches, managed via the tcache.{create,flush,destroy} mallctls and
@@ -65,7 +70,7 @@ void tcache_prefork(tsdn_t *tsdn);
 void tcache_postfork_parent(tsdn_t *tsdn);
 void tcache_postfork_child(tsdn_t *tsdn);
 void tcache_flush(tsd_t *tsd);
-bool tsd_tcache_data_init(tsd_t *tsd);
+bool tsd_tcache_data_init(tsd_t *tsd, arena_t *arena);
 bool tsd_tcache_enabled_data_init(tsd_t *tsd);
 
 void tcache_assert_initialized(tcache_t *tcache);
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index b69d89ad..97501ee2 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -23,7 +23,7 @@ tcache_enabled_set(tsd_t *tsd, bool enabled) {
 	bool was_enabled = tsd_tcache_enabled_get(tsd);
 
 	if (!was_enabled && enabled) {
-		tsd_tcache_data_init(tsd);
+		tsd_tcache_data_init(tsd, NULL);
 	} else if (was_enabled && !enabled) {
 		tcache_cleanup(tsd);
 	}
@@ -32,13 +32,67 @@ tcache_enabled_set(tsd_t *tsd, bool enabled) {
 	tsd_slow_update(tsd);
 }
 
+static inline unsigned
+tcache_nhbins_get(tcache_t *tcache) {
+	assert(tcache != NULL);
+	assert(tcache->tcache_nhbins <= TCACHE_NBINS_MAX);
+	return tcache->tcache_nhbins;
+}
+
+static inline size_t
+tcache_max_get(tcache_t *tcache) {
+	assert(tcache != NULL);
+	assert(tcache->tcache_max <= TCACHE_MAXCLASS_LIMIT);
+	return tcache->tcache_max;
+}
+
+static inline void
+tcache_max_and_nhbins_set(tcache_t *tcache, size_t tcache_max) {
+	assert(tcache != NULL);
+	assert(tcache_max <= TCACHE_MAXCLASS_LIMIT);
+	tcache->tcache_max = tcache_max;
+	tcache->tcache_nhbins = sz_size2index(tcache_max) + 1;
+}
+
+static inline void
+thread_tcache_max_and_nhbins_set(tsd_t *tsd, size_t tcache_max) {
+	assert(tcache_max <= TCACHE_MAXCLASS_LIMIT);
+	assert(tcache_max == sz_s2u(tcache_max));
+	tcache_t *tcache = tsd_tcachep_get(tsd);
+	tcache_slow_t *tcache_slow;
+	assert(tcache != NULL);
+
+	bool enabled = tcache_available(tsd);
+	arena_t *assigned_arena;
+	if (enabled) {
+		tcache_slow = tcache_slow_get(tsd);
+		assert(tcache != NULL && tcache_slow != NULL);
+		assigned_arena = tcache_slow->arena;
+		/* Shutdown and reboot the tcache for a clean slate. */
+		tcache_cleanup(tsd);
+	}
+
+	/*
+	* Still set tcache_max and tcache_nhbins of the tcache even if
+	* the tcache is not available yet because the values are
+	* stored in tsd_t and are always available for changing.
+	*/
+	tcache_max_and_nhbins_set(tcache, tcache_max);
+
+	if (enabled) {
+		tsd_tcache_data_init(tsd, assigned_arena);
+	}
+
+	assert(tcache_nhbins_get(tcache) == sz_size2index(tcache_max) + 1);
+}
+
 JEMALLOC_ALWAYS_INLINE bool
 tcache_small_bin_disabled(szind_t ind, cache_bin_t *bin) {
 	assert(ind < SC_NBINS);
-	bool ret = (cache_bin_info_ncached_max(&tcache_bin_info[ind]) == 0);
-	if (ret && bin != NULL) {
+	assert(bin != NULL);
+	bool ret = cache_bin_info_ncached_max(&bin->bin_info) == 0;
+	if (ret) {
 		/* small size class but cache bin disabled. */
-		assert(ind >= nhbins);
 		assert((uintptr_t)(*bin->stack_head) ==
 		    cache_bin_preceding_junk);
 	}
@@ -46,6 +100,14 @@ tcache_small_bin_disabled(szind_t ind, cache_bin_t *bin) {
 	return ret;
 }
 
+JEMALLOC_ALWAYS_INLINE bool
+tcache_large_bin_disabled(szind_t ind, cache_bin_t *bin) {
+	assert(ind >= SC_NBINS);
+	assert(bin != NULL);
+	return (cache_bin_info_ncached_max(&bin->bin_info) == 0 ||
+	    cache_bin_still_zero_initialized(bin));
+}
+
 JEMALLOC_ALWAYS_INLINE void *
 tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
     size_t size, szind_t binind, bool zero, bool slow_path) {
@@ -95,7 +157,7 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 	void *ret;
 	bool tcache_success;
 
-	assert(binind >= SC_NBINS && binind < nhbins);
+	assert(binind >= SC_NBINS && binind < tcache_nhbins_get(tcache));
 	cache_bin_t *bin = &tcache->bins[binind];
 	ret = cache_bin_alloc(bin, &tcache_success);
 	assert(tcache_success == (ret != NULL));
@@ -118,7 +180,7 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 	} else {
 		if (unlikely(zero)) {
 			size_t usize = sz_index2size(binind);
-			assert(usize <= tcache_maxclass);
+			assert(usize <= tcache_max_get(tcache));
 			memset(ret, 0, usize);
 		}
 
@@ -157,7 +219,7 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 			return;
 		}
 		cache_bin_sz_t max = cache_bin_info_ncached_max(
-		    &tcache_bin_info[binind]);
+		    &bin->bin_info);
 		unsigned remain = max >> opt_lg_tcache_flush_small_div;
 		tcache_bin_flush_small(tsd, tcache, bin, binind, remain);
 		bool ret = cache_bin_dalloc_easy(bin, ptr);
@@ -169,14 +231,13 @@ JEMALLOC_ALWAYS_INLINE void
 tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
     bool slow_path) {
 
-	assert(tcache_salloc(tsd_tsdn(tsd), ptr)
-	    > SC_SMALL_MAXCLASS);
-	assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= tcache_maxclass);
+	assert(tcache_salloc(tsd_tsdn(tsd), ptr) > SC_SMALL_MAXCLASS);
+	assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= tcache_max_get(tcache));
 
 	cache_bin_t *bin = &tcache->bins[binind];
 	if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
 		unsigned remain = cache_bin_info_ncached_max(
-		    &tcache_bin_info[binind]) >> opt_lg_tcache_flush_large_div;
+		    &bin->bin_info) >> opt_lg_tcache_flush_large_div;
 		tcache_bin_flush_large(tsd, tcache, bin, binind, remain);
 		bool ret = cache_bin_dalloc_easy(bin, ptr);
 		assert(ret);
diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h
index 75918158..b51e10a7 100644
--- a/include/jemalloc/internal/tcache_structs.h
+++ b/include/jemalloc/internal/tcache_structs.h
@@ -55,6 +55,8 @@ struct tcache_slow_s {
 
 struct tcache_s {
 	tcache_slow_t	*tcache_slow;
+	unsigned	tcache_nhbins;
+	size_t		tcache_max;
 	cache_bin_t	bins[TCACHE_NBINS_MAX];
 };
 
diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h
index a781f5a6..a91b3252 100644
--- a/include/jemalloc/internal/tcache_types.h
+++ b/include/jemalloc/internal/tcache_types.h
@@ -19,7 +19,7 @@ typedef struct tcaches_s tcaches_t;
 /* NOLINTNEXTLINE(performance-no-int-to-ptr) */
 #define TCACHES_ELM_NEED_REINIT ((tcache_t *)(uintptr_t)1)
 
-#define TCACHE_LG_MAXCLASS_LIMIT 23 /* tcache_maxclass = 8M */
+#define TCACHE_LG_MAXCLASS_LIMIT 23 /* tcache_max = 8M */
 #define TCACHE_MAXCLASS_LIMIT ((size_t)1 << TCACHE_LG_MAXCLASS_LIMIT)
 #define TCACHE_NBINS_MAX (SC_NBINS + SC_NGROUP *			\
     (TCACHE_LG_MAXCLASS_LIMIT - SC_LG_LARGE_MINCLASS) + 1)
diff --git a/src/arena.c b/src/arena.c
index 98907bc1..e7fa0971 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -157,11 +157,18 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
 	cache_bin_array_descriptor_t *descriptor;
 	ql_foreach(descriptor, &arena->cache_bin_array_descriptor_ql, link) {
-		for (szind_t i = 0; i < nhbins; i++) {
+		for (szind_t i = 0; i < TCACHE_NBINS_MAX; i++) {
 			cache_bin_t *cache_bin = &descriptor->bins[i];
 			cache_bin_sz_t ncached, nstashed;
 			cache_bin_nitems_get_remote(cache_bin,
-			    &tcache_bin_info[i], &ncached, &nstashed);
+			    &cache_bin->bin_info, &ncached, &nstashed);
+
+			if ((i < SC_NBINS &&
+			    tcache_small_bin_disabled(i, cache_bin)) ||
+			    (i >= SC_NBINS &&
+			    tcache_large_bin_disabled(i, cache_bin))) {
+				assert(ncached == 0 && nstashed == 0);
+			}
 
 			astats->tcache_bytes += ncached * sz_index2size(i);
 			astats->tcache_stashed_bytes += nstashed *
@@ -720,7 +727,8 @@ arena_dalloc_promoted_impl(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 		safety_check_verify_redzone(ptr, usize, bumped_usize);
 	}
 	if (bumped_usize >= SC_LARGE_MINCLASS &&
-	    bumped_usize <= tcache_maxclass && tcache != NULL) {
+	    tcache != NULL &&
+	    bumped_usize <= tcache_max_get(tcache)) {
 		tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr,
 		    sz_size2index(bumped_usize), slow_path);
 	} else {
diff --git a/src/cache_bin.c b/src/cache_bin.c
index 5fb5607a..03577084 100644
--- a/src/cache_bin.c
+++ b/src/cache_bin.c
@@ -82,6 +82,7 @@ cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
 	bin->low_bits_low_water = (uint16_t)(uintptr_t)bin->stack_head;
 	bin->low_bits_full = (uint16_t)(uintptr_t)full_position;
 	bin->low_bits_empty = (uint16_t)(uintptr_t)empty_position;
+	cache_bin_info_init(&bin->bin_info, info->ncached_max);
 	cache_bin_sz_t free_spots = cache_bin_diff(bin,
 	    bin->low_bits_full, (uint16_t)(uintptr_t)bin->stack_head);
 	assert(free_spots == bin_stack_size);
diff --git a/src/ctl.c b/src/ctl.c
index 2607aed2..5697539a 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -66,6 +66,7 @@ CTL_PROTO(epoch)
 CTL_PROTO(background_thread)
 CTL_PROTO(max_background_threads)
 CTL_PROTO(thread_tcache_enabled)
+CTL_PROTO(thread_tcache_max)
 CTL_PROTO(thread_tcache_flush)
 CTL_PROTO(thread_peak_read)
 CTL_PROTO(thread_peak_reset)
@@ -371,6 +372,7 @@ CTL_PROTO(stats_mutexes_reset)
 
 static const ctl_named_node_t	thread_tcache_node[] = {
 	{NAME("enabled"),	CTL(thread_tcache_enabled)},
+	{NAME("max"),		CTL(thread_tcache_max)},
 	{NAME("flush"),		CTL(thread_tcache_flush)}
 };
 
@@ -2289,6 +2291,40 @@ label_return:
 	return ret;
 }
 
+static int
+thread_tcache_max_ctl(tsd_t *tsd, const size_t *mib,
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
+    size_t newlen) {
+	int ret;
+	size_t oldval;
+
+	/* pointer to tcache_t always exists even with tcache disabled. */
+	tcache_t *tcache = tsd_tcachep_get(tsd);
+	assert(tcache != NULL);
+	oldval = tcache_max_get(tcache);
+	READ(oldval, size_t);
+
+	if (newp != NULL) {
+		if (newlen != sizeof(size_t)) {
+			ret = EINVAL;
+			goto label_return;
+		}
+		size_t new_tcache_max = oldval;
+		WRITE(new_tcache_max, size_t);
+		if (new_tcache_max > TCACHE_MAXCLASS_LIMIT) {
+			new_tcache_max = TCACHE_MAXCLASS_LIMIT;
+		}
+		new_tcache_max = sz_s2u(new_tcache_max);
+		if(new_tcache_max != oldval) {
+			thread_tcache_max_and_nhbins_set(tsd, new_tcache_max);
+		}
+	}
+
+	ret = 0;
+label_return:
+	return ret;
+}
+
 static int
 thread_tcache_flush_ctl(tsd_t *tsd, const size_t *mib,
     size_t miblen, void *oldp, size_t *oldlenp, void *newp,
@@ -3101,9 +3137,9 @@ arenas_muzzy_decay_ms_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 
 CTL_RO_NL_GEN(arenas_quantum, QUANTUM, size_t)
 CTL_RO_NL_GEN(arenas_page, PAGE, size_t)
-CTL_RO_NL_GEN(arenas_tcache_max, tcache_maxclass, size_t)
+CTL_RO_NL_GEN(arenas_tcache_max, global_do_not_change_tcache_maxclass, size_t)
 CTL_RO_NL_GEN(arenas_nbins, SC_NBINS, unsigned)
-CTL_RO_NL_GEN(arenas_nhbins, nhbins, unsigned)
+CTL_RO_NL_GEN(arenas_nhbins, global_do_not_change_nhbins, unsigned)
 CTL_RO_NL_GEN(arenas_bin_i_size, bin_infos[mib[2]].reg_size, size_t)
 CTL_RO_NL_GEN(arenas_bin_i_nregs, bin_infos[mib[2]].nregs, uint32_t)
 CTL_RO_NL_GEN(arenas_bin_i_slab_size, bin_infos[mib[2]].slab_size, size_t)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index e2b4917f..7aa6a1cd 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -4136,15 +4136,13 @@ batch_alloc(void **ptrs, size_t num, size_t size, int flags) {
 			filled += n;
 		}
 
-		if (likely(ind < nhbins) && progress < batch) {
+		unsigned tcache_ind = mallocx_tcache_get(flags);
+		tcache_t *tcache = tcache_get_from_ind(tsd, tcache_ind,
+		    /* slow */ true, /* is_alloc */ true);
+		if (likely(tcache != NULL &&
+		    ind < tcache_nhbins_get(tcache)) && progress < batch) {
 			if (bin == NULL) {
-				unsigned tcache_ind = mallocx_tcache_get(flags);
-				tcache_t *tcache = tcache_get_from_ind(tsd,
-				    tcache_ind, /* slow */ true,
-				    /* is_alloc */ true);
-				if (tcache != NULL) {
-					bin = &tcache->bins[ind];
-				}
+				bin = &tcache->bins[ind];
 			}
 			/*
 			 * If we don't have a tcache bin, we don't want to
diff --git a/src/tcache.c b/src/tcache.c
index e9cf2ee5..ae68c08b 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -12,7 +12,7 @@
 
 bool opt_tcache = true;
 
-/* tcache_maxclass is set to 32KB by default.  */
+/* global_do_not_change_tcache_maxclass is set to 32KB by default. */
 size_t opt_tcache_max = ((size_t)1) << 15;
 
 /* Reasonable defaults for min and max values. */
@@ -57,16 +57,18 @@ size_t opt_tcache_gc_delay_bytes = 0;
 unsigned opt_lg_tcache_flush_small_div = 1;
 unsigned opt_lg_tcache_flush_large_div = 1;
 
-cache_bin_info_t	*tcache_bin_info;
-
-/* Total stack size required (per tcache).  Include the padding above. */
-static size_t tcache_bin_alloc_size;
-static size_t tcache_bin_alloc_alignment;
-
-/* Number of cache bins enabled, including both large and small. */
-unsigned		nhbins;
-/* Max size class to be cached (can be small or large). */
-size_t			tcache_maxclass;
+/*
+ * Number of cache bins enabled, including both large and small.  This value
+ * is only used to initialize tcache_nhbins in the per-thread tcache.
+ * Directly modifying it will not affect threads already launched.
+ */
+unsigned		global_do_not_change_nhbins;
+/*
+ * Max size class to be cached (can be small or large). This value is only used
+ * to initialize tcache_max in the per-thread tcache.   Directly modifying it
+ * will not affect threads already launched.
+ */
+size_t			global_do_not_change_tcache_maxclass;
 
 tcaches_t		*tcaches;
 
@@ -127,9 +129,9 @@ tcache_gc_small(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 
 	cache_bin_t *cache_bin = &tcache->bins[szind];
 	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin,
-	    &tcache_bin_info[szind]);
+	    &cache_bin->bin_info);
 	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin,
-	    &tcache_bin_info[szind]);
+	    &cache_bin->bin_info);
 	assert(!tcache_slow->bin_refilled[szind]);
 
 	size_t nflush = low_water - (low_water >> 2);
@@ -152,7 +154,7 @@ tcache_gc_small(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	 * Reduce fill count by 2X.  Limit lg_fill_div such that
 	 * the fill count is always at least 1.
 	 */
-	if ((cache_bin_info_ncached_max(&tcache_bin_info[szind])
+	if ((cache_bin_info_ncached_max(&cache_bin->bin_info)
 	    >> (tcache_slow->lg_fill_div[szind] + 1)) >= 1) {
 		tcache_slow->lg_fill_div[szind]++;
 	}
@@ -165,9 +167,9 @@ tcache_gc_large(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	assert(szind >= SC_NBINS);
 	cache_bin_t *cache_bin = &tcache->bins[szind];
 	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin,
-	    &tcache_bin_info[szind]);
+	    &cache_bin->bin_info);
 	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin,
-	    &tcache_bin_info[szind]);
+	    &cache_bin->bin_info);
 	tcache_bin_flush_large(tsd, tcache, cache_bin, szind,
 	    (unsigned)(ncached - low_water + (low_water >> 2)));
 }
@@ -187,7 +189,7 @@ tcache_event(tsd_t *tsd) {
 	tcache_bin_flush_stashed(tsd, tcache, cache_bin, szind, is_small);
 
 	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin,
-	    &tcache_bin_info[szind]);
+	    &cache_bin->bin_info);
 	if (low_water > 0) {
 		if (is_small) {
 			tcache_gc_small(tsd, tcache_slow, tcache, szind);
@@ -208,7 +210,7 @@ tcache_event(tsd_t *tsd) {
 	cache_bin_low_water_set(cache_bin);
 
 	tcache_slow->next_gc_bin++;
-	if (tcache_slow->next_gc_bin == nhbins) {
+	if (tcache_slow->next_gc_bin == tcache_nhbins_get(tcache)) {
 		tcache_slow->next_gc_bin = 0;
 	}
 }
@@ -233,10 +235,10 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena,
 	void *ret;
 
 	assert(tcache_slow->arena != NULL);
-	unsigned nfill = cache_bin_info_ncached_max(&tcache_bin_info[binind])
+	unsigned nfill = cache_bin_info_ncached_max(&cache_bin->bin_info)
 	    >> tcache_slow->lg_fill_div[binind];
 	arena_cache_bin_fill_small(tsdn, arena, cache_bin,
-	    &tcache_bin_info[binind], binind, nfill);
+	    &cache_bin->bin_info, binind, nfill);
 	tcache_slow->bin_refilled[binind] = true;
 	ret = cache_bin_alloc(cache_bin, tcache_success);
 
@@ -318,7 +320,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 	if (small) {
 		assert(binind < SC_NBINS);
 	} else {
-		assert(binind < nhbins);
+		assert(binind < tcache_nhbins_get(tcache));
 	}
 	arena_t *tcache_arena = tcache_slow->arena;
 	assert(tcache_arena != NULL);
@@ -508,18 +510,18 @@ tcache_bin_flush_bottom(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 	tcache_bin_flush_stashed(tsd, tcache, cache_bin, binind, small);
 
 	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin,
-	    &tcache_bin_info[binind]);
+	    &cache_bin->bin_info);
 	assert((cache_bin_sz_t)rem <= ncached);
 	unsigned nflush = ncached - rem;
 
 	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nflush);
-	cache_bin_init_ptr_array_for_flush(cache_bin, &tcache_bin_info[binind],
+	cache_bin_init_ptr_array_for_flush(cache_bin, &cache_bin->bin_info,
 	    &ptrs, nflush);
 
 	tcache_bin_flush_impl(tsd, tcache, cache_bin, binind, &ptrs, nflush,
 	    small);
 
-	cache_bin_finish_flush(cache_bin, &tcache_bin_info[binind], &ptrs,
+	cache_bin_finish_flush(cache_bin, &cache_bin->bin_info, &ptrs,
 	    ncached - rem);
 }
 
@@ -548,7 +550,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 void
 tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
     szind_t binind, bool is_small) {
-	cache_bin_info_t *info = &tcache_bin_info[binind];
+	cache_bin_info_t *info = &cache_bin->bin_info;
 	/*
 	 * The two below are for assertion only.  The content of original cached
 	 * items remain unchanged -- the stashed items reside on the other end
@@ -633,15 +635,31 @@ tcache_arena_reassociate(tsdn_t *tsdn, tcache_slow_t *tcache_slow,
 	tcache_arena_associate(tsdn, tcache_slow, tcache, arena);
 }
 
+static void
+tcache_max_and_nhbins_init(tcache_t *tcache) {
+	assert(tcache != NULL);
+	assert(global_do_not_change_tcache_maxclass != 0);
+	assert(global_do_not_change_nhbins != 0);
+	tcache->tcache_max = global_do_not_change_tcache_maxclass;
+	tcache->tcache_nhbins = global_do_not_change_nhbins;
+	assert(tcache->tcache_nhbins == sz_size2index(tcache->tcache_max) + 1);
+}
+
 bool
 tsd_tcache_enabled_data_init(tsd_t *tsd) {
 	/* Called upon tsd initialization. */
 	tsd_tcache_enabled_set(tsd, opt_tcache);
+	/*
+	 * tcache is not available yet, but we need to set up its tcache_max
+	 * and tcache_nhbins in advance.
+	 */
+	tcache_t *tcache = tsd_tcachep_get(tsd);
+	tcache_max_and_nhbins_init(tcache);
 	tsd_slow_update(tsd);
 
 	if (opt_tcache) {
 		/* Trigger tcache init. */
-		tsd_tcache_data_init(tsd);
+		tsd_tcache_data_init(tsd, NULL);
 	}
 
 	return false;
@@ -649,7 +667,7 @@ tsd_tcache_enabled_data_init(tsd_t *tsd) {
 
 static void
 tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
-    void *mem) {
+    void *mem, cache_bin_info_t *tcache_bin_info) {
 	tcache->tcache_slow = tcache_slow;
 	tcache_slow->tcache = tcache;
 
@@ -660,17 +678,19 @@ tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 
 	/*
 	 * We reserve cache bins for all small size classes, even if some may
-	 * not get used (i.e. bins higher than nhbins).  This allows the fast
-	 * and common paths to access cache bin metadata safely w/o worrying
-	 * about which ones are disabled.
+	 * not get used (i.e. bins higher than tcache_nhbins).  This allows
+	 * the fast and common paths to access cache bin metadata safely w/o
+	 * worrying about which ones are disabled.
 	 */
-	unsigned n_reserved_bins = nhbins < SC_NBINS ? SC_NBINS : nhbins;
+	unsigned tcache_nhbins = tcache_nhbins_get(tcache);
+	unsigned n_reserved_bins = tcache_nhbins < SC_NBINS ? SC_NBINS
+	    : tcache_nhbins;
 	memset(tcache->bins, 0, sizeof(cache_bin_t) * n_reserved_bins);
 
 	size_t cur_offset = 0;
-	cache_bin_preincrement(tcache_bin_info, nhbins, mem,
+	cache_bin_preincrement(tcache_bin_info, tcache_nhbins, mem,
 	    &cur_offset);
-	for (unsigned i = 0; i < nhbins; i++) {
+	for (unsigned i = 0; i < tcache_nhbins; i++) {
 		if (i < SC_NBINS) {
 			tcache_slow->lg_fill_div[i] = 1;
 			tcache_slow->bin_refilled[i] = false;
@@ -682,12 +702,12 @@ tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 		    &cur_offset);
 	}
 	/*
-	 * For small size classes beyond tcache_maxclass (i.e. nhbins < NBINS),
-	 * their cache bins are initialized to a state to safely and efficiently
-	 * fail all fastpath alloc / free, so that no additional check around
-	 * nhbins is needed on fastpath.
+	 * For small size classes beyond tcache_max(i.e.
+	 * tcache_nhbins< NBINS), their cache bins are initialized to a state
+	 * to safely and efficiently fail all fastpath alloc / free, so that
+	 * no additional check around tcache_nhbins is needed on fastpath.
 	 */
-	for (unsigned i = nhbins; i < SC_NBINS; i++) {
+	for (unsigned i = tcache_nhbins; i < SC_NBINS; i++) {
 		/* Disabled small bins. */
 		cache_bin_t *cache_bin = &tcache->bins[i];
 		void *fake_stack = mem;
@@ -699,19 +719,102 @@ tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	}
 
 	cache_bin_postincrement(mem, &cur_offset);
-	/* Sanity check that the whole stack is used. */
-	assert(cur_offset == tcache_bin_alloc_size);
+	if (config_debug) {
+		/* Sanity check that the whole stack is used. */
+		size_t size, alignment;
+		cache_bin_info_compute_alloc(tcache_bin_info, tcache_nhbins,
+		    &size, &alignment);
+		assert(cur_offset == size);
+	}
+}
+
+static inline unsigned
+tcache_ncached_max_compute(szind_t szind, unsigned current_nhbins) {
+	if (szind >= SC_NBINS) {
+		assert(szind < current_nhbins);
+		return opt_tcache_nslots_large;
+	}
+	unsigned slab_nregs = bin_infos[szind].nregs;
+
+	/* We may modify these values; start with the opt versions. */
+	unsigned nslots_small_min = opt_tcache_nslots_small_min;
+	unsigned nslots_small_max = opt_tcache_nslots_small_max;
+
+	/*
+	 * Clamp values to meet our constraints -- even, nonzero, min < max, and
+	 * suitable for a cache bin size.
+	 */
+	if (opt_tcache_nslots_small_max > CACHE_BIN_NCACHED_MAX) {
+		nslots_small_max = CACHE_BIN_NCACHED_MAX;
+	}
+	if (nslots_small_min % 2 != 0) {
+		nslots_small_min++;
+	}
+	if (nslots_small_max % 2 != 0) {
+		nslots_small_max--;
+	}
+	if (nslots_small_min < 2) {
+		nslots_small_min = 2;
+	}
+	if (nslots_small_max < 2) {
+		nslots_small_max = 2;
+	}
+	if (nslots_small_min > nslots_small_max) {
+		nslots_small_min = nslots_small_max;
+	}
+
+	unsigned candidate;
+	if (opt_lg_tcache_nslots_mul < 0) {
+		candidate = slab_nregs >> (-opt_lg_tcache_nslots_mul);
+	} else {
+		candidate = slab_nregs << opt_lg_tcache_nslots_mul;
+	}
+	if (candidate % 2 != 0) {
+		/*
+		 * We need the candidate size to be even -- we assume that we
+		 * can divide by two and get a positive number (e.g. when
+		 * flushing).
+		 */
+		++candidate;
+	}
+	if (candidate <= nslots_small_min) {
+		return nslots_small_min;
+	} else if (candidate <= nslots_small_max) {
+		return candidate;
+	} else {
+		return nslots_small_max;
+	}
+}
+
+static void
+tcache_bin_info_compute(cache_bin_info_t *tcache_bin_info,
+    unsigned tcache_nhbins) {
+	for (szind_t i = 0; i < tcache_nhbins; i++) {
+		unsigned ncached_max = tcache_ncached_max_compute(i,
+		    tcache_nhbins);
+		cache_bin_info_init(&tcache_bin_info[i], ncached_max);
+	}
+	for (szind_t i = tcache_nhbins; i < SC_NBINS; i++) {
+		/* Disabled small bins. */
+		cache_bin_info_init(&tcache_bin_info[i], 0);
+	}
 }
 
 /* Initialize auto tcache (embedded in TSD). */
 bool
-tsd_tcache_data_init(tsd_t *tsd) {
+tsd_tcache_data_init(tsd_t *tsd, arena_t *arena) {
 	tcache_slow_t *tcache_slow = tsd_tcache_slowp_get_unsafe(tsd);
 	tcache_t *tcache = tsd_tcachep_get_unsafe(tsd);
 
 	assert(cache_bin_still_zero_initialized(&tcache->bins[0]));
-	size_t alignment = tcache_bin_alloc_alignment;
-	size_t size = sz_sa2u(tcache_bin_alloc_size, alignment);
+	unsigned tcache_nhbins = tcache_nhbins_get(tcache);
+	size_t size, alignment;
+	/* Takes 146B stack space. */
+	cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX] = {0};
+	tcache_bin_info_compute(tcache_bin_info, tcache_nhbins);
+	cache_bin_info_compute_alloc(tcache_bin_info, tcache_nhbins,
+	    &size, &alignment);
+	size = sz_sa2u(size, alignment);
 
 	void *mem = ipallocztm(tsd_tsdn(tsd), size, alignment, true, NULL,
 	    true, arena_get(TSDN_NULL, 0, true));
@@ -719,7 +822,7 @@ tsd_tcache_data_init(tsd_t *tsd) {
 		return true;
 	}
 
-	tcache_init(tsd, tcache_slow, tcache, mem);
+	tcache_init(tsd, tcache_slow, tcache, mem, tcache_bin_info);
 	/*
 	 * Initialization is a bit tricky here.  After malloc init is done, all
 	 * threads can rely on arena_choose and associate tcache accordingly.
@@ -729,14 +832,15 @@ tsd_tcache_data_init(tsd_t *tsd) {
 	 * arena_choose_hard() will re-associate properly.
 	 */
 	tcache_slow->arena = NULL;
-	arena_t *arena;
 	if (!malloc_initialized()) {
 		/* If in initialization, assign to a0. */
 		arena = arena_get(tsd_tsdn(tsd), 0, false);
 		tcache_arena_associate(tsd_tsdn(tsd), tcache_slow, tcache,
 		    arena);
 	} else {
-		arena = arena_choose(tsd, NULL);
+		if (arena == NULL) {
+			arena = arena_choose(tsd, NULL);
+		}
 		/* This may happen if thread.tcache.enabled is used. */
 		if (tcache_slow->arena == NULL) {
 			tcache_arena_associate(tsd_tsdn(tsd), tcache_slow,
@@ -756,21 +860,29 @@ tcache_create_explicit(tsd_t *tsd) {
 	 * the beginning of the whole allocation (for freeing).  The makes sure
 	 * the cache bins have the requested alignment.
 	 */
-	size_t size = tcache_bin_alloc_size + sizeof(tcache_t)
+	unsigned tcache_nhbins = global_do_not_change_nhbins;
+	size_t tcache_size, alignment;
+	cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX] = {0};
+	tcache_bin_info_compute(tcache_bin_info, tcache_nhbins);
+	cache_bin_info_compute_alloc(tcache_bin_info, tcache_nhbins,
+	    &tcache_size, &alignment);
+
+	size_t size = tcache_size + sizeof(tcache_t)
 	    + sizeof(tcache_slow_t);
 	/* Naturally align the pointer stacks. */
 	size = PTR_CEILING(size);
-	size = sz_sa2u(size, tcache_bin_alloc_alignment);
+	size = sz_sa2u(size, alignment);
 
-	void *mem = ipallocztm(tsd_tsdn(tsd), size, tcache_bin_alloc_alignment,
+	void *mem = ipallocztm(tsd_tsdn(tsd), size, alignment,
 	    true, NULL, true, arena_get(TSDN_NULL, 0, true));
 	if (mem == NULL) {
 		return NULL;
 	}
-	tcache_t *tcache = (void *)((byte_t *)mem + tcache_bin_alloc_size);
+	tcache_t *tcache = (void *)((byte_t *)mem + tcache_size);
 	tcache_slow_t *tcache_slow =
-	    (void *)((byte_t *)mem + tcache_bin_alloc_size + sizeof(tcache_t));
-	tcache_init(tsd, tcache_slow, tcache, mem);
+	    (void *)((byte_t *)mem + tcache_size + sizeof(tcache_t));
+	tcache_max_and_nhbins_init(tcache);
+	tcache_init(tsd, tcache_slow, tcache, mem, tcache_bin_info);
 
 	tcache_arena_associate(tsd_tsdn(tsd), tcache_slow, tcache,
 	    arena_ichoose(tsd, NULL));
@@ -783,7 +895,7 @@ tcache_flush_cache(tsd_t *tsd, tcache_t *tcache) {
 	tcache_slow_t *tcache_slow = tcache->tcache_slow;
 	assert(tcache_slow->arena != NULL);
 
-	for (unsigned i = 0; i < nhbins; i++) {
+	for (unsigned i = 0; i < tcache_nhbins_get(tcache); i++) {
 		cache_bin_t *cache_bin = &tcache->bins[i];
 		if (i < SC_NBINS) {
 			tcache_bin_flush_small(tsd, tcache, cache_bin, i, 0);
@@ -811,7 +923,7 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) {
 
 	if (tsd_tcache) {
 		cache_bin_t *cache_bin = &tcache->bins[0];
-		cache_bin_assert_empty(cache_bin, &tcache_bin_info[0]);
+		cache_bin_assert_empty(cache_bin, &cache_bin->bin_info);
 	}
 	idalloctm(tsd_tsdn(tsd), tcache_slow->dyn_alloc, NULL, NULL, true,
 	    true);
@@ -849,13 +961,9 @@ tcache_cleanup(tsd_t *tsd) {
 	assert(!cache_bin_still_zero_initialized(&tcache->bins[0]));
 
 	tcache_destroy(tsd, tcache, true);
-	if (config_debug) {
-		/*
-		 * For debug testing only, we want to pretend we're still in the
-		 * zero-initialized state.
-		 */
-		memset(tcache->bins, 0, sizeof(cache_bin_t) * nhbins);
-	}
+	/* Make sure all bins used are reinitialized to the clean state. */
+	memset(tcache->bins, 0, sizeof(cache_bin_t) *
+	    tcache_nhbins_get(tcache));
 }
 
 void
@@ -863,7 +971,7 @@ tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
 	cassert(config_stats);
 
 	/* Merge and reset tcache stats. */
-	for (unsigned i = 0; i < nhbins; i++) {
+	for (unsigned i = 0; i < tcache_nhbins_get(tcache); i++) {
 		cache_bin_t *cache_bin = &tcache->bins[i];
 		if (i < SC_NBINS) {
 			bin_t *bin = arena_bin_choose(tsdn, arena, i, NULL);
@@ -986,97 +1094,18 @@ tcaches_destroy(tsd_t *tsd, unsigned ind) {
 	}
 }
 
-static unsigned
-tcache_ncached_max_compute(szind_t szind) {
-	if (szind >= SC_NBINS) {
-		assert(szind < nhbins);
-		return opt_tcache_nslots_large;
-	}
-	unsigned slab_nregs = bin_infos[szind].nregs;
-
-	/* We may modify these values; start with the opt versions. */
-	unsigned nslots_small_min = opt_tcache_nslots_small_min;
-	unsigned nslots_small_max = opt_tcache_nslots_small_max;
-
-	/*
-	 * Clamp values to meet our constraints -- even, nonzero, min < max, and
-	 * suitable for a cache bin size.
-	 */
-	if (opt_tcache_nslots_small_max > CACHE_BIN_NCACHED_MAX) {
-		nslots_small_max = CACHE_BIN_NCACHED_MAX;
-	}
-	if (nslots_small_min % 2 != 0) {
-		nslots_small_min++;
-	}
-	if (nslots_small_max % 2 != 0) {
-		nslots_small_max--;
-	}
-	if (nslots_small_min < 2) {
-		nslots_small_min = 2;
-	}
-	if (nslots_small_max < 2) {
-		nslots_small_max = 2;
-	}
-	if (nslots_small_min > nslots_small_max) {
-		nslots_small_min = nslots_small_max;
-	}
-
-	unsigned candidate;
-	if (opt_lg_tcache_nslots_mul < 0) {
-		candidate = slab_nregs >> (-opt_lg_tcache_nslots_mul);
-	} else {
-		candidate = slab_nregs << opt_lg_tcache_nslots_mul;
-	}
-	if (candidate % 2 != 0) {
-		/*
-		 * We need the candidate size to be even -- we assume that we
-		 * can divide by two and get a positive number (e.g. when
-		 * flushing).
-		 */
-		++candidate;
-	}
-	if (candidate <= nslots_small_min) {
-		return nslots_small_min;
-	} else if (candidate <= nslots_small_max) {
-		return candidate;
-	} else {
-		return nslots_small_max;
-	}
-}
-
 bool
 tcache_boot(tsdn_t *tsdn, base_t *base) {
-	tcache_maxclass = sz_s2u(opt_tcache_max);
-	assert(tcache_maxclass <= TCACHE_MAXCLASS_LIMIT);
-	nhbins = sz_size2index(tcache_maxclass) + 1;
+	global_do_not_change_tcache_maxclass = sz_s2u(opt_tcache_max);
+	assert(global_do_not_change_tcache_maxclass <= TCACHE_MAXCLASS_LIMIT);
+	global_do_not_change_nhbins =
+	    sz_size2index(global_do_not_change_tcache_maxclass) + 1;
 
 	if (malloc_mutex_init(&tcaches_mtx, "tcaches", WITNESS_RANK_TCACHES,
 	    malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 
-	/* Initialize tcache_bin_info.  See comments in tcache_init(). */
-	unsigned n_reserved_bins = nhbins < SC_NBINS ? SC_NBINS : nhbins;
-	size_t size = n_reserved_bins * sizeof(cache_bin_info_t);
-	tcache_bin_info = (cache_bin_info_t *)base_alloc(tsdn, base, size,
-	    CACHELINE);
-	if (tcache_bin_info == NULL) {
-		return true;
-	}
-
-	for (szind_t i = 0; i < nhbins; i++) {
-		unsigned ncached_max = tcache_ncached_max_compute(i);
-		cache_bin_info_init(&tcache_bin_info[i], ncached_max);
-	}
-	for (szind_t i = nhbins; i < SC_NBINS; i++) {
-		/* Disabled small bins. */
-		cache_bin_info_init(&tcache_bin_info[i], 0);
-		assert(tcache_small_bin_disabled(i, NULL));
-	}
-
-	cache_bin_info_compute_alloc(tcache_bin_info, nhbins,
-	    &tcache_bin_alloc_size, &tcache_bin_alloc_alignment);
-
 	return false;
 }
 
diff --git a/test/unit/batch_alloc.c b/test/unit/batch_alloc.c
index 901c52b1..2bd5968e 100644
--- a/test/unit/batch_alloc.c
+++ b/test/unit/batch_alloc.c
@@ -168,7 +168,7 @@ TEST_BEGIN(test_batch_alloc_large) {
 		assert_zu_eq(filled, batch, "");
 		release_batch(global_ptrs, batch, size);
 	}
-	size = tcache_maxclass + 1;
+	size = global_do_not_change_tcache_maxclass + 1;
 	for (size_t batch = 0; batch < 4; ++batch) {
 		assert(batch < BATCH_MAX);
 		size_t filled = batch_alloc(global_ptrs, batch, size, 0);
diff --git a/test/unit/tcache_max.c b/test/unit/tcache_max.c
index b1093f40..0a563c2f 100644
--- a/test/unit/tcache_max.c
+++ b/test/unit/tcache_max.c
@@ -18,11 +18,10 @@ enum {
 	dalloc_option_end
 };
 
-static unsigned alloc_option, dalloc_option;
-static size_t tcache_max;
+static bool global_test;
 
 static void *
-alloc_func(size_t sz) {
+alloc_func(size_t sz, unsigned alloc_option) {
 	void *ret;
 
 	switch (alloc_option) {
@@ -41,7 +40,7 @@ alloc_func(size_t sz) {
 }
 
 static void
-dalloc_func(void *ptr, size_t sz) {
+dalloc_func(void *ptr, size_t sz, unsigned dalloc_option) {
 	switch (dalloc_option) {
 	case use_free:
 		free(ptr);
@@ -58,10 +57,10 @@ dalloc_func(void *ptr, size_t sz) {
 }
 
 static size_t
-tcache_bytes_read(void) {
+tcache_bytes_read_global(void) {
 	uint64_t epoch;
-	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
-	    0, "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch,
+	    sizeof(epoch)), 0, "Unexpected mallctl() failure");
 
 	size_t tcache_bytes;
 	size_t sz = sizeof(tcache_bytes);
@@ -72,16 +71,30 @@ tcache_bytes_read(void) {
 	return tcache_bytes;
 }
 
+static size_t
+tcache_bytes_read_local(void) {
+	size_t tcache_bytes = 0;
+	tsd_t *tsd = tsd_fetch();
+	tcache_t *tcache = tcache_get(tsd);
+	for (szind_t i = 0; i < tcache_nhbins_get(tcache); i++) {
+		cache_bin_t *cache_bin = &tcache->bins[i];
+		cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin,
+		    &cache_bin->bin_info);
+		tcache_bytes += ncached * sz_index2size(i);
+	}
+	return tcache_bytes;
+}
 static void
 tcache_bytes_check_update(size_t *prev, ssize_t diff) {
-	size_t tcache_bytes = tcache_bytes_read();
+	size_t tcache_bytes = global_test ? tcache_bytes_read_global():
+	    tcache_bytes_read_local();
 	expect_zu_eq(tcache_bytes, *prev + diff, "tcache bytes not expected");
-
 	*prev += diff;
 }
 
 static void
-test_tcache_bytes_alloc(size_t alloc_size) {
+test_tcache_bytes_alloc(size_t alloc_size, size_t tcache_max,
+    unsigned alloc_option, unsigned dalloc_option) {
 	expect_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0), 0,
 	    "Unexpected tcache flush failure");
 
@@ -90,65 +103,82 @@ test_tcache_bytes_alloc(size_t alloc_size) {
 	bool cached = (usize <= tcache_max);
 	ssize_t diff = cached ? usize : 0;
 
-	void *ptr1 = alloc_func(alloc_size);
-	void *ptr2 = alloc_func(alloc_size);
+	void *ptr1 = alloc_func(alloc_size, alloc_option);
+	void *ptr2 = alloc_func(alloc_size, alloc_option);
 
-	size_t bytes = tcache_bytes_read();
-	dalloc_func(ptr2, alloc_size);
+	size_t bytes = global_test ? tcache_bytes_read_global() :
+	    tcache_bytes_read_local();
+	dalloc_func(ptr2, alloc_size, dalloc_option);
 	/* Expect tcache_bytes increase after dalloc */
 	tcache_bytes_check_update(&bytes, diff);
 
-	dalloc_func(ptr1, alloc_size);
+	dalloc_func(ptr1, alloc_size, alloc_option);
 	/* Expect tcache_bytes increase again */
 	tcache_bytes_check_update(&bytes, diff);
 
-	void *ptr3 = alloc_func(alloc_size);
+	void *ptr3 = alloc_func(alloc_size, alloc_option);
 	if (cached) {
 		expect_ptr_eq(ptr1, ptr3, "Unexpected cached ptr");
 	}
 	/* Expect tcache_bytes decrease after alloc */
 	tcache_bytes_check_update(&bytes, -diff);
 
-	void *ptr4 = alloc_func(alloc_size);
+	void *ptr4 = alloc_func(alloc_size, alloc_option);
 	if (cached) {
 		expect_ptr_eq(ptr2, ptr4, "Unexpected cached ptr");
 	}
 	/* Expect tcache_bytes decrease again */
 	tcache_bytes_check_update(&bytes, -diff);
 
-	dalloc_func(ptr3, alloc_size);
+	dalloc_func(ptr3, alloc_size, dalloc_option);
 	tcache_bytes_check_update(&bytes, diff);
-	dalloc_func(ptr4, alloc_size);
+	dalloc_func(ptr4, alloc_size, dalloc_option);
 	tcache_bytes_check_update(&bytes, diff);
 }
 
 static void
-test_tcache_max_impl(void) {
-	size_t sz;
+test_tcache_max_impl(size_t target_tcache_max, unsigned alloc_option,
+    unsigned dalloc_option) {
+	size_t tcache_max, sz;
 	sz = sizeof(tcache_max);
-	assert_d_eq(mallctl("arenas.tcache_max", (void *)&tcache_max,
-	    &sz, NULL, 0), 0, "Unexpected mallctl() failure");
+	if (global_test) {
+		assert_d_eq(mallctl("arenas.tcache_max", (void *)&tcache_max,
+		    &sz, NULL, 0), 0, "Unexpected mallctl() failure");
+		expect_zu_eq(tcache_max, target_tcache_max,
+		    "Global tcache_max not expected");
+	} else {
+		assert_d_eq(mallctl("thread.tcache.max",
+		    (void *)&tcache_max, &sz, NULL,.0), 0,
+		    "Unexpected.mallctl().failure");
+		expect_zu_eq(tcache_max, target_tcache_max,
+		    "Current thread's tcache_max not expected");
+	}
+	test_tcache_bytes_alloc(1, tcache_max, alloc_option, dalloc_option);
+	test_tcache_bytes_alloc(tcache_max - 1, tcache_max, alloc_option,
+	    dalloc_option);
+	test_tcache_bytes_alloc(tcache_max, tcache_max, alloc_option,
+	    dalloc_option);
+	test_tcache_bytes_alloc(tcache_max + 1, tcache_max, alloc_option,
+	    dalloc_option);
 
-	/* opt.tcache_max set to 1024 in tcache_max.sh */
-	expect_zu_eq(tcache_max, 1024, "tcache_max not expected");
-
-	test_tcache_bytes_alloc(1);
-	test_tcache_bytes_alloc(tcache_max - 1);
-	test_tcache_bytes_alloc(tcache_max);
-	test_tcache_bytes_alloc(tcache_max + 1);
-
-	test_tcache_bytes_alloc(PAGE - 1);
-	test_tcache_bytes_alloc(PAGE);
-	test_tcache_bytes_alloc(PAGE + 1);
+	test_tcache_bytes_alloc(PAGE - 1, tcache_max, alloc_option,
+	    dalloc_option);
+	test_tcache_bytes_alloc(PAGE, tcache_max, alloc_option,
+	    dalloc_option);
+	test_tcache_bytes_alloc(PAGE + 1, tcache_max, alloc_option,
+	    dalloc_option);
 
 	size_t large;
 	sz = sizeof(large);
 	assert_d_eq(mallctl("arenas.lextent.0.size", (void *)&large, &sz, NULL,
 	    0), 0, "Unexpected mallctl() failure");
 
-	test_tcache_bytes_alloc(large - 1);
-	test_tcache_bytes_alloc(large);
-	test_tcache_bytes_alloc(large + 1);
+	test_tcache_bytes_alloc(large - 1, tcache_max, alloc_option,
+	    dalloc_option);
+	test_tcache_bytes_alloc(large, tcache_max, alloc_option,
+	    dalloc_option);
+	test_tcache_bytes_alloc(large + 1, tcache_max, alloc_option,
+	    dalloc_option);
 }
 
 TEST_BEGIN(test_tcache_max) {
@@ -157,26 +187,157 @@ TEST_BEGIN(test_tcache_max) {
 	test_skip_if(opt_prof);
 	test_skip_if(san_uaf_detection_enabled());
 
-	unsigned arena_ind;
+	unsigned arena_ind, alloc_option, dalloc_option;
 	size_t sz = sizeof(arena_ind);
 	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
 	    0, "Unexpected mallctl() failure");
 	expect_d_eq(mallctl("thread.arena", NULL, NULL, &arena_ind,
 	    sizeof(arena_ind)), 0, "Unexpected mallctl() failure");
 
+	global_test = true;
 	for (alloc_option = alloc_option_start;
 	     alloc_option < alloc_option_end;
 	     alloc_option++) {
 		for (dalloc_option = dalloc_option_start;
 		     dalloc_option < dalloc_option_end;
 		     dalloc_option++) {
-			test_tcache_max_impl();
+			/* opt.tcache_max set to 1024 in tcache_max.sh. */
+			test_tcache_max_impl(1024, alloc_option,
+			    dalloc_option);
 		}
 	}
+	global_test = false;
+}
+TEST_END
+
+static size_t
+tcache_max2nhbins(size_t tcache_max) {
+	return sz_size2index(tcache_max) + 1;
+}
+
+static void *
+tcache_check(void *arg) {
+	size_t old_tcache_max, new_tcache_max, min_tcache_max, sz;
+	unsigned tcache_nhbins;
+	tsd_t *tsd = tsd_fetch();
+	tcache_t *tcache = tsd_tcachep_get(tsd);
+	sz = sizeof(size_t);
+	new_tcache_max = *(size_t *)arg;
+	min_tcache_max = 1;
+
+	/*
+	 * Check the default tcache_max and tcache_nhbins of each thread's
+	 * auto tcache.
+	 */
+	old_tcache_max = tcache_max_get(tcache);
+	expect_zu_eq(old_tcache_max, opt_tcache_max,
+	    "Unexpected default value for tcache_max");
+	tcache_nhbins = tcache_nhbins_get(tcache);
+	expect_zu_eq(tcache_nhbins, (size_t)global_do_not_change_nhbins,
+	    "Unexpected default value for tcache_nhbins");
+
+	/*
+	 * Close the tcache and test the set.
+	 * Test an input that is not a valid size class, it should be ceiled
+	 * to a valid size class.
+	 */
+	bool e0 = false, e1;
+	size_t bool_sz = sizeof(bool);
+	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e1, &bool_sz,
+	    (void *)&e0, bool_sz), 0, "Unexpected mallctl() error");
+	expect_true(e1, "Unexpected previous tcache state");
+
+	size_t temp_tcache_max = TCACHE_MAXCLASS_LIMIT - 1;
+	assert_d_eq(mallctl("thread.tcache.max",
+	    NULL, NULL, (void *)&temp_tcache_max, sz),.0,
+	    "Unexpected.mallctl().failure");
+	old_tcache_max = tcache_max_get(tcache);
+	expect_zu_eq(old_tcache_max, TCACHE_MAXCLASS_LIMIT,
+	    "Unexpected value for tcache_max");
+	tcache_nhbins = tcache_nhbins_get(tcache);
+	expect_zu_eq(tcache_nhbins, TCACHE_NBINS_MAX,
+	    "Unexpected value for tcache_nhbins");
+	assert_d_eq(mallctl("thread.tcache.max",
+	    (void *)&old_tcache_max, &sz,
+	    (void *)&min_tcache_max, sz),.0,
+	    "Unexpected.mallctl().failure");
+	expect_zu_eq(old_tcache_max, TCACHE_MAXCLASS_LIMIT,
+	    "Unexpected value for tcache_max");
+
+	/* Enable tcache, the set should still be valid. */
+	e0 = true;
+	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e1, &bool_sz,
+	    (void *)&e0, bool_sz), 0, "Unexpected mallctl() error");
+	expect_false(e1, "Unexpected previous tcache state");
+	min_tcache_max = sz_s2u(min_tcache_max);
+	expect_zu_eq(tcache_max_get(tcache), min_tcache_max,
+	    "Unexpected value for tcache_max");
+	expect_zu_eq(tcache_nhbins_get(tcache),
+	    tcache_max2nhbins(min_tcache_max), "Unexpected value for nhbins");
+	assert_d_eq(mallctl("thread.tcache.max",
+	    (void *)&old_tcache_max, &sz,
+	    (void *)&new_tcache_max, sz),.0,
+	    "Unexpected.mallctl().failure");
+	expect_zu_eq(old_tcache_max, min_tcache_max,
+	    "Unexpected value for tcache_max");
+
+	/*
+	 * Check the thread's tcache_max and nhbins both through mallctl
+	 * and alloc tests.
+	 */
+	if (new_tcache_max > TCACHE_MAXCLASS_LIMIT) {
+		new_tcache_max = TCACHE_MAXCLASS_LIMIT;
+	}
+	old_tcache_max = tcache_max_get(tcache);
+	expect_zu_eq(old_tcache_max, new_tcache_max,
+	    "Unexpected value for tcache_max");
+	tcache_nhbins = tcache_nhbins_get(tcache);
+	expect_zu_eq(tcache_nhbins, tcache_max2nhbins(new_tcache_max),
+	    "Unexpected value for tcache_nhbins");
+	for (unsigned alloc_option = alloc_option_start;
+	     alloc_option < alloc_option_end;
+	     alloc_option++) {
+		for (unsigned dalloc_option = dalloc_option_start;
+		     dalloc_option < dalloc_option_end;
+		     dalloc_option++) {
+			test_tcache_max_impl(new_tcache_max,
+			    alloc_option, dalloc_option);
+		}
+	}
+
+	return NULL;
+}
+
+TEST_BEGIN(test_thread_tcache_max) {
+	test_skip_if(!config_stats);
+	test_skip_if(!opt_tcache);
+	test_skip_if(opt_prof);
+	test_skip_if(san_uaf_detection_enabled());
+
+	unsigned nthreads = 8;
+	global_test = false;
+	VARIABLE_ARRAY(thd_t, threads, nthreads);
+	VARIABLE_ARRAY(size_t, all_threads_tcache_max, nthreads);
+	for (unsigned i = 0; i < nthreads; i++) {
+		all_threads_tcache_max[i] = 1024 * (1<<((i + 10) % 20));
+		if (i == nthreads - 1) {
+			all_threads_tcache_max[i] = UINT_MAX;
+		}
+	}
+	for (unsigned i = 0; i < nthreads; i++) {
+		thd_create(&threads[i], tcache_check,
+		    &(all_threads_tcache_max[i]));
+	}
+	for (unsigned i = 0; i < nthreads; i++) {
+		thd_join(threads[i], NULL);
+	}
 }
 TEST_END
 
 int
 main(void) {
-	return test(test_tcache_max);
+	return test(
+	    test_tcache_max,
+	    test_thread_tcache_max);
 }
+

From 72cfdce71806443f4ccdbfe10aa5d50346a3d07e Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 13 Sep 2023 21:51:54 -0700
Subject: [PATCH 133/395] Allocate tcache stack from base allocator

When using metadata_thp, allocate tcache bin stacks from base0, which means they
will be placed on huge pages along with other metadata, instead of mixed with
other regular allocations.

In order to do so, modified the base allocator to support limited reuse: freed
tcached stacks (from thread termination) will be returned to base0 and made
available for reuse, but no merging will be attempted since they were bump
allocated out of base blocks. These reused base extents are managed using
separately allocated base edata_t -- they are cached in base->edata_avail when
the extent is all allocated.

One tricky part is, stats updating must be skipped for such reused extents
(since they were accounted for already, and there is no purging for base). This
requires tracking the "if is reused" state explicitly and bypass the stats
updates when allocating from them.
---
 include/jemalloc/internal/base.h      |   5 +
 include/jemalloc/internal/cache_bin.h |   1 +
 include/jemalloc/internal/edata.h     |   6 +-
 src/base.c                            | 151 +++++++++++++++++++++++---
 src/cache_bin.c                       |  18 ++-
 src/tcache.c                          |  22 +++-
 test/unit/tcache_max.c                |  26 +++++
 7 files changed, 202 insertions(+), 27 deletions(-)

diff --git a/include/jemalloc/internal/base.h b/include/jemalloc/internal/base.h
index 6b41aa6f..451be10f 100644
--- a/include/jemalloc/internal/base.h
+++ b/include/jemalloc/internal/base.h
@@ -73,6 +73,9 @@ struct base_s {
 	/* Heap of extents that track unused trailing space within blocks. */
 	edata_heap_t avail[SC_NSIZES];
 
+	/* Contains reusable base edata (used by tcache_stacks currently). */
+	edata_avail_t edata_avail;
+
 	/* Stats, only maintained if config_stats. */
 	size_t allocated;
 	size_t resident;
@@ -101,6 +104,8 @@ extent_hooks_t *base_extent_hooks_set(base_t *base,
     extent_hooks_t *extent_hooks);
 void *base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment);
 edata_t *base_alloc_edata(tsdn_t *tsdn, base_t *base);
+void *b0_alloc_tcache_stack(tsdn_t *tsdn, size_t size);
+void b0_dalloc_tcache_stack(tsdn_t *tsdn, void *tcache_stack);
 void base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated,
     size_t *resident, size_t *mapped, size_t *n_thp);
 void base_prefork(tsdn_t *tsdn, base_t *base);
diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 4cfc3f1d..78ac3295 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -704,5 +704,6 @@ void cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
  * not cache_bin_init was called on it.
  */
 bool cache_bin_still_zero_initialized(cache_bin_t *bin);
+bool cache_bin_stack_use_thp(void);
 
 #endif /* JEMALLOC_INTERNAL_CACHE_BIN_H */
diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index baf5187f..17befd92 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -621,7 +621,8 @@ edata_init(edata_t *edata, unsigned arena_ind, void *addr, size_t size,
 }
 
 static inline void
-edata_binit(edata_t *edata, void *addr, size_t bsize, uint64_t sn) {
+edata_binit(edata_t *edata, void *addr, size_t bsize, uint64_t sn,
+    bool reused) {
 	edata_arena_ind_set(edata, (1U << MALLOCX_ARENA_BITS) - 1);
 	edata_addr_set(edata, addr);
 	edata_bsize_set(edata, bsize);
@@ -629,7 +630,8 @@ edata_binit(edata_t *edata, void *addr, size_t bsize, uint64_t sn) {
 	edata_szind_set(edata, SC_NSIZES);
 	edata_sn_set(edata, sn);
 	edata_state_set(edata, extent_state_active);
-	edata_guarded_set(edata, false);
+	/* See comments in base_edata_is_reused. */
+	edata_guarded_set(edata, reused);
 	edata_zeroed_set(edata, true);
 	edata_committed_set(edata, true);
 	/*
diff --git a/src/base.c b/src/base.c
index 8e4606d0..e1dfe604 100644
--- a/src/base.c
+++ b/src/base.c
@@ -110,6 +110,16 @@ label_done:
 	}
 }
 
+static inline bool
+base_edata_is_reused(edata_t *edata) {
+	/*
+	 * Borrow the guarded bit to indicate if the extent is a recycled one,
+	 * i.e. the ones returned to base for reuse; currently only tcache bin
+	 * stacks.  Skips stats updating if so (needed for this purpose only).
+	 */
+	return edata_guarded_get(edata);
+}
+
 static void
 base_edata_init(size_t *extent_sn_next, edata_t *edata, void *addr,
     size_t size) {
@@ -118,7 +128,7 @@ base_edata_init(size_t *extent_sn_next, edata_t *edata, void *addr,
 	sn = *extent_sn_next;
 	(*extent_sn_next)++;
 
-	edata_binit(edata, addr, size, sn);
+	edata_binit(edata, addr, size, sn, false /* is_reused */);
 }
 
 static size_t
@@ -185,24 +195,57 @@ base_extent_bump_alloc_helper(edata_t *edata, size_t *gap_size, size_t size,
 	assert(edata_bsize_get(edata) >= *gap_size + size);
 	edata_binit(edata, (void *)((byte_t *)edata_addr_get(edata) +
 	    *gap_size + size), edata_bsize_get(edata) - *gap_size - size,
-	    edata_sn_get(edata));
+	    edata_sn_get(edata), base_edata_is_reused(edata));
 	return ret;
 }
 
 static void
-base_extent_bump_alloc_post(base_t *base, edata_t *edata, size_t gap_size,
-    void *addr, size_t size) {
-	if (edata_bsize_get(edata) > 0) {
-		/*
-		 * Compute the index for the largest size class that does not
-		 * exceed extent's size.
-		 */
-		szind_t index_floor =
-		    sz_size2index(edata_bsize_get(edata) + 1) - 1;
-		edata_heap_insert(&base->avail[index_floor], edata);
+base_edata_heap_insert(tsdn_t *tsdn, base_t *base, edata_t *edata) {
+	malloc_mutex_assert_owner(tsdn, &base->mtx);
+
+	size_t bsize = edata_bsize_get(edata);
+	assert(bsize > 0);
+	/*
+	 * Compute the index for the largest size class that does not exceed
+	 * extent's size.
+	 */
+	szind_t index_floor = sz_size2index(bsize + 1) - 1;
+	edata_heap_insert(&base->avail[index_floor], edata);
+}
+
+/*
+ * Only can be called by top-level functions, since it may call base_alloc
+ * internally when cache is empty.
+ */
+static edata_t *
+base_alloc_base_edata(tsdn_t *tsdn, base_t *base) {
+	edata_t *edata;
+
+	malloc_mutex_lock(tsdn, &base->mtx);
+	edata = edata_avail_first(&base->edata_avail);
+	if (edata != NULL) {
+		edata_avail_remove(&base->edata_avail, edata);
+	}
+	malloc_mutex_unlock(tsdn, &base->mtx);
+
+	if (edata == NULL) {
+		edata = base_alloc_edata(tsdn, base);
 	}
 
-	if (config_stats) {
+	return edata;
+}
+
+static void
+base_extent_bump_alloc_post(tsdn_t *tsdn, base_t *base, edata_t *edata,
+    size_t gap_size, void *addr, size_t size) {
+	if (edata_bsize_get(edata) > 0) {
+		base_edata_heap_insert(tsdn, base, edata);
+	} else {
+		/* Freed base edata_t stored in edata_avail. */
+		edata_avail_insert(&base->edata_avail, edata);
+	}
+
+	if (config_stats && !base_edata_is_reused(edata)) {
 		base->allocated += size;
 		/*
 		 * Add one PAGE to base_resident for every page boundary that is
@@ -224,13 +267,13 @@ base_extent_bump_alloc_post(base_t *base, edata_t *edata, size_t gap_size,
 }
 
 static void *
-base_extent_bump_alloc(base_t *base, edata_t *edata, size_t size,
+base_extent_bump_alloc(tsdn_t *tsdn, base_t *base, edata_t *edata, size_t size,
     size_t alignment) {
 	void *ret;
 	size_t gap_size;
 
 	ret = base_extent_bump_alloc_helper(edata, &gap_size, size, alignment);
-	base_extent_bump_alloc_post(base, edata, gap_size, ret, size);
+	base_extent_bump_alloc_post(tsdn, base, edata, gap_size, ret, size);
 	return ret;
 }
 
@@ -384,6 +427,8 @@ base_new(tsdn_t *tsdn, unsigned ind, const extent_hooks_t *extent_hooks,
 	for (szind_t i = 0; i < SC_NSIZES; i++) {
 		edata_heap_new(&base->avail[i]);
 	}
+	edata_avail_new(&base->edata_avail);
+
 	if (config_stats) {
 		base->allocated = sizeof(base_block_t);
 		base->resident = PAGE_CEILING(sizeof(base_block_t));
@@ -395,8 +440,12 @@ base_new(tsdn_t *tsdn, unsigned ind, const extent_hooks_t *extent_hooks,
 		assert(base->resident <= base->mapped);
 		assert(base->n_thp << LG_HUGEPAGE <= base->mapped);
 	}
-	base_extent_bump_alloc_post(base, &block->edata, gap_size, base,
+
+	/* Locking here is only necessary because of assertions. */
+	malloc_mutex_lock(tsdn, &base->mtx);
+	base_extent_bump_alloc_post(tsdn, base, &block->edata, gap_size, base,
 	    base_size);
+	malloc_mutex_unlock(tsdn, &base->mtx);
 
 	return base;
 }
@@ -457,7 +506,7 @@ base_alloc_impl(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment,
 		goto label_return;
 	}
 
-	ret = base_extent_bump_alloc(base, edata, usize, alignment);
+	ret = base_extent_bump_alloc(tsdn, base, edata, usize, alignment);
 	if (esn != NULL) {
 		*esn = (size_t)edata_sn_get(edata);
 	}
@@ -491,6 +540,74 @@ base_alloc_edata(tsdn_t *tsdn, base_t *base) {
 	return edata;
 }
 
+static inline void
+b0_alloc_header_size(size_t *header_size, size_t *alignment) {
+	*alignment = QUANTUM;
+	*header_size = QUANTUM > sizeof(edata_t *) ? QUANTUM :
+	    sizeof(edata_t *);
+}
+
+/*
+ * Each piece allocated here is managed by a separate edata, because it was bump
+ * allocated and cannot be merged back into the original base_block.  This means
+ * it's not for general purpose: 1) they are not page aligned, nor page sized,
+ * and 2) the requested size should not be too small (as each piece comes with
+ * an edata_t).  Only used for tcache bin stack allocation now.
+ */
+void *
+b0_alloc_tcache_stack(tsdn_t *tsdn, size_t stack_size) {
+	base_t *base = b0get();
+	edata_t *edata = base_alloc_base_edata(tsdn, base);
+	if (edata == NULL) {
+		return NULL;
+	}
+
+	/*
+	 * Reserve room for the header, which stores a pointer to the managing
+	 * edata_t.  The header itself is located right before the return
+	 * address, so that edata can be retrieved on dalloc.  Bump up to usize
+	 * to improve reusability -- otherwise the freed stacks will be put back
+	 * into the previous size class.
+	 */
+	size_t esn, alignment, header_size;
+	b0_alloc_header_size(&header_size, &alignment);
+
+	size_t alloc_size = sz_s2u(stack_size + header_size);
+	void *addr = base_alloc_impl(tsdn, base, alloc_size, alignment, &esn);
+	if (addr == NULL) {
+		edata_avail_insert(&base->edata_avail, edata);
+		return NULL;
+	}
+
+	/* Set is_reused: see comments in base_edata_is_reused. */
+	edata_binit(edata, addr, alloc_size, esn, true /* is_reused */);
+	*(edata_t **)addr = edata;
+
+	return (byte_t *)addr + header_size;
+}
+
+void
+b0_dalloc_tcache_stack(tsdn_t *tsdn, void *tcache_stack) {
+	/* edata_t pointer stored in header. */
+	size_t alignment, header_size;
+	b0_alloc_header_size(&header_size, &alignment);
+
+	edata_t *edata = *(edata_t **)((byte_t *)tcache_stack - header_size);
+	void *addr = edata_addr_get(edata);
+	size_t bsize = edata_bsize_get(edata);
+	/* Marked as "reused" to avoid double counting stats. */
+	assert(base_edata_is_reused(edata));
+	assert(addr != NULL && bsize > 0);
+
+	/* Zero out since base_alloc returns zeroed memory. */
+	memset(addr, 0, bsize);
+
+	base_t *base = b0get();
+	malloc_mutex_lock(tsdn, &base->mtx);
+	base_edata_heap_insert(tsdn, base, edata);
+	malloc_mutex_unlock(tsdn, &base->mtx);
+}
+
 void
 base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated, size_t *resident,
     size_t *mapped, size_t *n_thp) {
diff --git a/src/cache_bin.c b/src/cache_bin.c
index 03577084..2ad2062d 100644
--- a/src/cache_bin.c
+++ b/src/cache_bin.c
@@ -14,6 +14,17 @@ cache_bin_info_init(cache_bin_info_t *info,
 	info->ncached_max = (cache_bin_sz_t)ncached_max;
 }
 
+bool
+cache_bin_stack_use_thp(void) {
+	/*
+	 * If metadata_thp is enabled, allocating tcache stack from the base
+	 * allocator for efficiency gains.  The downside, however, is that base
+	 * allocator never purges freed memory, and may cache a fair amount of
+	 * memory after many threads are terminated and not reused.
+	 */
+	return metadata_thp_enabled();
+}
+
 void
 cache_bin_info_compute_alloc(cache_bin_info_t *infos, szind_t ninfos,
     size_t *size, size_t *alignment) {
@@ -31,10 +42,11 @@ cache_bin_info_compute_alloc(cache_bin_info_t *infos, szind_t ninfos,
 	}
 
 	/*
-	 * Align to at least PAGE, to minimize the # of TLBs needed by the
-	 * smaller sizes; also helps if the larger sizes don't get used at all.
+	 * When not using THP, align to at least PAGE, to minimize the # of TLBs
+	 * needed by the smaller sizes; also helps if the larger sizes don't get
+	 * used at all.
 	 */
-	*alignment = PAGE;
+	*alignment = cache_bin_stack_use_thp() ? QUANTUM : PAGE;
 }
 
 void
diff --git a/src/tcache.c b/src/tcache.c
index ae68c08b..2c0a7e2e 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -2,6 +2,7 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/base.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/safety_check.h"
 #include "jemalloc/internal/san.h"
@@ -814,10 +815,17 @@ tsd_tcache_data_init(tsd_t *tsd, arena_t *arena) {
 	tcache_bin_info_compute(tcache_bin_info, tcache_nhbins);
 	cache_bin_info_compute_alloc(tcache_bin_info, tcache_nhbins,
 	    &size, &alignment);
-	size = sz_sa2u(size, alignment);
 
-	void *mem = ipallocztm(tsd_tsdn(tsd), size, alignment, true, NULL,
-	    true, arena_get(TSDN_NULL, 0, true));
+	void *mem;
+	if (cache_bin_stack_use_thp()) {
+		/* Alignment is ignored since it comes from THP. */
+		assert(alignment == QUANTUM);
+		mem = b0_alloc_tcache_stack(tsd_tsdn(tsd), size);
+	} else {
+		size = sz_sa2u(size, alignment);
+		mem = ipallocztm(tsd_tsdn(tsd), size, alignment, true, NULL,
+		    true, arena_get(TSDN_NULL, 0, true));
+	}
 	if (mem == NULL) {
 		return true;
 	}
@@ -925,8 +933,12 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) {
 		cache_bin_t *cache_bin = &tcache->bins[0];
 		cache_bin_assert_empty(cache_bin, &cache_bin->bin_info);
 	}
-	idalloctm(tsd_tsdn(tsd), tcache_slow->dyn_alloc, NULL, NULL, true,
-	    true);
+	if (tsd_tcache && cache_bin_stack_use_thp()) {
+		b0_dalloc_tcache_stack(tsd_tsdn(tsd), tcache_slow->dyn_alloc);
+	} else {
+		idalloctm(tsd_tsdn(tsd), tcache_slow->dyn_alloc, NULL, NULL,
+		    true, true);
+	}
 
 	/*
 	 * The deallocation and tcache flush above may not trigger decay since
diff --git a/test/unit/tcache_max.c b/test/unit/tcache_max.c
index 0a563c2f..6481504e 100644
--- a/test/unit/tcache_max.c
+++ b/test/unit/tcache_max.c
@@ -215,6 +215,29 @@ tcache_max2nhbins(size_t tcache_max) {
 	return sz_size2index(tcache_max) + 1;
 }
 
+static void
+validate_tcache_stack(tcache_t *tcache) {
+	/* Assume bins[0] is enabled. */
+	void *tcache_stack = tcache->bins[0].stack_head;
+	bool expect_found = cache_bin_stack_use_thp() ? true : false;
+
+	/* Walk through all blocks to see if the stack is within range. */
+	base_t *base = b0get();
+	base_block_t *next = base->blocks;
+	bool found = false;
+	do {
+		base_block_t *block = next;
+		if ((byte_t *)tcache_stack >= (byte_t *)block &&
+		    (byte_t *)tcache_stack < ((byte_t *)block + block->size)) {
+			found = true;
+			break;
+		}
+		next = block->next;
+	} while (next != NULL);
+
+	expect_true(found == expect_found, "Unexpected tcache stack source");
+}
+
 static void *
 tcache_check(void *arg) {
 	size_t old_tcache_max, new_tcache_max, min_tcache_max, sz;
@@ -235,6 +258,7 @@ tcache_check(void *arg) {
 	tcache_nhbins = tcache_nhbins_get(tcache);
 	expect_zu_eq(tcache_nhbins, (size_t)global_do_not_change_nhbins,
 	    "Unexpected default value for tcache_nhbins");
+	validate_tcache_stack(tcache);
 
 	/*
 	 * Close the tcache and test the set.
@@ -280,6 +304,7 @@ tcache_check(void *arg) {
 	    "Unexpected.mallctl().failure");
 	expect_zu_eq(old_tcache_max, min_tcache_max,
 	    "Unexpected value for tcache_max");
+	validate_tcache_stack(tcache);
 
 	/*
 	 * Check the thread's tcache_max and nhbins both through mallctl
@@ -303,6 +328,7 @@ tcache_check(void *arg) {
 			test_tcache_max_impl(new_tcache_max,
 			    alloc_option, dalloc_option);
 		}
+		validate_tcache_stack(tcache);
 	}
 
 	return NULL;

From 7a9e4c9073c9a06fa43130ecfd4790740327d415 Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@fb.com>
Date: Mon, 2 Oct 2023 22:48:22 -0700
Subject: [PATCH 134/395] Mark jemalloc.h as system header to resolve header
 conflicts.

---
 include/jemalloc/jemalloc.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/jemalloc/jemalloc.sh b/include/jemalloc/jemalloc.sh
index b19b1548..dacd6195 100755
--- a/include/jemalloc/jemalloc.sh
+++ b/include/jemalloc/jemalloc.sh
@@ -5,6 +5,7 @@ objroot=$1
 cat <<EOF
 #ifndef JEMALLOC_H_
 #define JEMALLOC_H_
+#pragma GCC system_header
 #ifdef __cplusplus
 extern "C" {
 #endif

From 005f20aa7fdef1be6f9fe46e4f2e7b88177a9f21 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 27 Sep 2023 09:55:12 -0700
Subject: [PATCH 135/395] Fix comments about malloc_conf to enable logging.

---
 include/jemalloc/internal/log.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/log.h b/include/jemalloc/internal/log.h
index 921985c8..7b074abd 100644
--- a/include/jemalloc/internal/log.h
+++ b/include/jemalloc/internal/log.h
@@ -27,9 +27,9 @@
  * log("extent.a", "log msg for extent.a"); // 5
  * log("extent.b", "log msg for extent.b"); // 6
  *
- * And your malloc_conf option is "log=arena.a|extent", then lines 2, 4, 5, and
+ * And your malloc_conf option is "log:arena.a|extent", then lines 2, 4, 5, and
  * 6 will print at runtime.  You can enable logging from all log vars by
- * writing "log=.".
+ * writing "log:.".
  *
  * None of this should be regarded as a stable API for right now.  It's intended
  * as a debugging interface, to let us keep around some of our printf-debugging

From 36becb1302552c24b7bd59d8f00598e10a2411ea Mon Sep 17 00:00:00 2001
From: Shirui Cheng <sherrycheng@meta.com>
Date: Tue, 10 Oct 2023 09:46:23 -0700
Subject: [PATCH 136/395] metadata usage breakdowns: tracking edata and rtree
 usages

---
 include/jemalloc/internal/arena_stats.h |  2 ++
 include/jemalloc/internal/base.h        |  6 +++-
 include/jemalloc/internal/ctl.h         |  2 ++
 src/arena.c                             |  8 +++--
 src/base.c                              | 39 +++++++++++++++++++++----
 src/ctl.c                               | 24 +++++++++++++++
 src/rtree.c                             |  8 ++---
 src/stats.c                             | 22 ++++++++++----
 test/unit/base.c                        | 22 +++++++-------
 test/unit/stats.c                       | 13 ++++++++-
 10 files changed, 116 insertions(+), 30 deletions(-)

diff --git a/include/jemalloc/internal/arena_stats.h b/include/jemalloc/internal/arena_stats.h
index 3407b023..3d512630 100644
--- a/include/jemalloc/internal/arena_stats.h
+++ b/include/jemalloc/internal/arena_stats.h
@@ -52,6 +52,8 @@ struct arena_stats_s {
 	 * in pa_shard_stats_t.
 	 */
 	size_t			base; /* Derived. */
+	size_t			metadata_edata; /* Derived. */
+	size_t			metadata_rtree; /* Derived. */
 	size_t			resident; /* Derived. */
 	size_t			metadata_thp; /* Derived. */
 	size_t			mapped; /* Derived. */
diff --git a/include/jemalloc/internal/base.h b/include/jemalloc/internal/base.h
index 451be10f..86b0cf4a 100644
--- a/include/jemalloc/internal/base.h
+++ b/include/jemalloc/internal/base.h
@@ -78,6 +78,8 @@ struct base_s {
 
 	/* Stats, only maintained if config_stats. */
 	size_t allocated;
+	size_t edata_allocated;
+	size_t rtree_allocated;
 	size_t resident;
 	size_t mapped;
 	/* Number of THP regions touched. */
@@ -104,10 +106,12 @@ extent_hooks_t *base_extent_hooks_set(base_t *base,
     extent_hooks_t *extent_hooks);
 void *base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment);
 edata_t *base_alloc_edata(tsdn_t *tsdn, base_t *base);
+void *base_alloc_rtree(tsdn_t *tsdn, base_t *base, size_t size);
 void *b0_alloc_tcache_stack(tsdn_t *tsdn, size_t size);
 void b0_dalloc_tcache_stack(tsdn_t *tsdn, void *tcache_stack);
 void base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated,
-    size_t *resident, size_t *mapped, size_t *n_thp);
+    size_t *edata_allocated, size_t *rtree_allocated, size_t *resident,
+    size_t *mapped, size_t *n_thp);
 void base_prefork(tsdn_t *tsdn, base_t *base);
 void base_postfork_parent(tsdn_t *tsdn, base_t *base);
 void base_postfork_child(tsdn_t *tsdn, base_t *base);
diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index 1d3e6140..f38236f6 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -57,6 +57,8 @@ typedef struct ctl_stats_s {
 	size_t allocated;
 	size_t active;
 	size_t metadata;
+	size_t metadata_edata;
+	size_t metadata_rtree;
 	size_t metadata_thp;
 	size_t resident;
 	size_t mapped;
diff --git a/src/arena.c b/src/arena.c
index e7fa0971..d937c349 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -92,8 +92,10 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	arena_basic_stats_merge(tsdn, arena, nthreads, dss, dirty_decay_ms,
 	    muzzy_decay_ms, nactive, ndirty, nmuzzy);
 
-	size_t base_allocated, base_resident, base_mapped, metadata_thp;
-	base_stats_get(tsdn, arena->base, &base_allocated, &base_resident,
+	size_t base_allocated, base_edata_allocated, base_rtree_allocated,
+	    base_resident, base_mapped, metadata_thp;
+	base_stats_get(tsdn, arena->base, &base_allocated,
+	    &base_edata_allocated, &base_rtree_allocated, &base_resident,
 	    &base_mapped, &metadata_thp);
 	size_t pac_mapped_sz = pac_mapped(&arena->pa_shard.pac);
 	astats->mapped += base_mapped + pac_mapped_sz;
@@ -102,6 +104,8 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 
 	astats->base += base_allocated;
+	astats->metadata_edata += base_edata_allocated;
+	astats->metadata_rtree += base_rtree_allocated;
 	atomic_load_add_store_zu(&astats->internal, arena_internal_get(arena));
 	astats->metadata_thp += metadata_thp;
 
diff --git a/src/base.c b/src/base.c
index e1dfe604..1d5e8fcd 100644
--- a/src/base.c
+++ b/src/base.c
@@ -430,6 +430,8 @@ base_new(tsdn_t *tsdn, unsigned ind, const extent_hooks_t *extent_hooks,
 	edata_avail_new(&base->edata_avail);
 
 	if (config_stats) {
+		base->edata_allocated = 0;
+		base->rtree_allocated = 0;
 		base->allocated = sizeof(base_block_t);
 		base->resident = PAGE_CEILING(sizeof(base_block_t));
 		base->mapped = block->size;
@@ -482,7 +484,7 @@ base_extent_hooks_set(base_t *base, extent_hooks_t *extent_hooks) {
 
 static void *
 base_alloc_impl(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment,
-    size_t *esn) {
+    size_t *esn, size_t *ret_usize) {
 	alignment = QUANTUM_CEILING(alignment);
 	size_t usize = ALIGNMENT_CEILING(size, alignment);
 	size_t asize = usize + alignment - QUANTUM;
@@ -510,6 +512,9 @@ base_alloc_impl(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment,
 	if (esn != NULL) {
 		*esn = (size_t)edata_sn_get(edata);
 	}
+	if (ret_usize != NULL) {
+		*ret_usize = usize;
+	}
 label_return:
 	malloc_mutex_unlock(tsdn, &base->mtx);
 	return ret;
@@ -525,21 +530,38 @@ label_return:
  */
 void *
 base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
-	return base_alloc_impl(tsdn, base, size, alignment, NULL);
+	return base_alloc_impl(tsdn, base, size, alignment, NULL, NULL);
 }
 
 edata_t *
 base_alloc_edata(tsdn_t *tsdn, base_t *base) {
-	size_t esn;
+	size_t esn, usize;
 	edata_t *edata = base_alloc_impl(tsdn, base, sizeof(edata_t),
-	    EDATA_ALIGNMENT, &esn);
+	    EDATA_ALIGNMENT, &esn, &usize);
 	if (edata == NULL) {
 		return NULL;
 	}
+	if (config_stats) {
+		base->edata_allocated += usize;
+	}
 	edata_esn_set(edata, esn);
 	return edata;
 }
 
+void *
+base_alloc_rtree(tsdn_t *tsdn, base_t *base, size_t size) {
+	size_t usize;
+	void *rtree = base_alloc_impl(tsdn, base, size, CACHELINE, NULL,
+	    &usize);
+	if (rtree == NULL) {
+		return NULL;
+	}
+	if (config_stats) {
+		base->rtree_allocated += usize;
+	}
+	return rtree;
+}
+
 static inline void
 b0_alloc_header_size(size_t *header_size, size_t *alignment) {
 	*alignment = QUANTUM;
@@ -573,7 +595,8 @@ b0_alloc_tcache_stack(tsdn_t *tsdn, size_t stack_size) {
 	b0_alloc_header_size(&header_size, &alignment);
 
 	size_t alloc_size = sz_s2u(stack_size + header_size);
-	void *addr = base_alloc_impl(tsdn, base, alloc_size, alignment, &esn);
+	void *addr = base_alloc_impl(tsdn, base, alloc_size, alignment, &esn,
+	    NULL);
 	if (addr == NULL) {
 		edata_avail_insert(&base->edata_avail, edata);
 		return NULL;
@@ -609,14 +632,18 @@ b0_dalloc_tcache_stack(tsdn_t *tsdn, void *tcache_stack) {
 }
 
 void
-base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated, size_t *resident,
+base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated,
+    size_t *edata_allocated, size_t *rtree_allocated, size_t *resident,
     size_t *mapped, size_t *n_thp) {
 	cassert(config_stats);
 
 	malloc_mutex_lock(tsdn, &base->mtx);
 	assert(base->allocated <= base->resident);
 	assert(base->resident <= base->mapped);
+	assert(base->edata_allocated + base->rtree_allocated <= base->allocated);
 	*allocated = base->allocated;
+	*edata_allocated = base->edata_allocated;
+	*rtree_allocated = base->rtree_allocated;
 	*resident = base->resident;
 	*mapped = base->mapped;
 	*n_thp = base->n_thp;
diff --git a/src/ctl.c b/src/ctl.c
index 5697539a..b0277c0a 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -294,6 +294,8 @@ CTL_PROTO(stats_arenas_i_muzzy_nmadvise)
 CTL_PROTO(stats_arenas_i_muzzy_purged)
 CTL_PROTO(stats_arenas_i_base)
 CTL_PROTO(stats_arenas_i_internal)
+CTL_PROTO(stats_arenas_i_metadata_edata)
+CTL_PROTO(stats_arenas_i_metadata_rtree)
 CTL_PROTO(stats_arenas_i_metadata_thp)
 CTL_PROTO(stats_arenas_i_tcache_bytes)
 CTL_PROTO(stats_arenas_i_tcache_stashed_bytes)
@@ -307,6 +309,8 @@ CTL_PROTO(stats_background_thread_num_threads)
 CTL_PROTO(stats_background_thread_num_runs)
 CTL_PROTO(stats_background_thread_run_interval)
 CTL_PROTO(stats_metadata)
+CTL_PROTO(stats_metadata_edata)
+CTL_PROTO(stats_metadata_rtree)
 CTL_PROTO(stats_metadata_thp)
 CTL_PROTO(stats_resident)
 CTL_PROTO(stats_mapped)
@@ -801,6 +805,8 @@ static const ctl_named_node_t stats_arenas_i_node[] = {
 	{NAME("muzzy_purged"),	CTL(stats_arenas_i_muzzy_purged)},
 	{NAME("base"),		CTL(stats_arenas_i_base)},
 	{NAME("internal"),	CTL(stats_arenas_i_internal)},
+	{NAME("metadata_edata"),	CTL(stats_arenas_i_metadata_edata)},
+	{NAME("metadata_rtree"),	CTL(stats_arenas_i_metadata_rtree)},
 	{NAME("metadata_thp"),	CTL(stats_arenas_i_metadata_thp)},
 	{NAME("tcache_bytes"),	CTL(stats_arenas_i_tcache_bytes)},
 	{NAME("tcache_stashed_bytes"),
@@ -846,6 +852,8 @@ static const ctl_named_node_t stats_node[] = {
 	{NAME("allocated"),	CTL(stats_allocated)},
 	{NAME("active"),	CTL(stats_active)},
 	{NAME("metadata"),	CTL(stats_metadata)},
+	{NAME("metadata_edata"),	CTL(stats_metadata_edata)},
+	{NAME("metadata_rtree"),	CTL(stats_metadata_rtree)},
 	{NAME("metadata_thp"),	CTL(stats_metadata_thp)},
 	{NAME("resident"),	CTL(stats_resident)},
 	{NAME("mapped"),	CTL(stats_mapped)},
@@ -1138,6 +1146,10 @@ MUTEX_PROF_ARENA_MUTEXES
 #undef OP
 		if (!destroyed) {
 			sdstats->astats.base += astats->astats.base;
+			sdstats->astats.metadata_edata += astats->astats
+			    .metadata_edata;
+			sdstats->astats.metadata_rtree += astats->astats
+			    .metadata_rtree;
 			sdstats->astats.resident += astats->astats.resident;
 			sdstats->astats.metadata_thp += astats->astats.metadata_thp;
 			ctl_accum_atomic_zu(&sdstats->astats.internal,
@@ -1341,6 +1353,10 @@ ctl_refresh(tsdn_t *tsdn) {
 		ctl_stats->metadata = ctl_sarena->astats->astats.base +
 		    atomic_load_zu(&ctl_sarena->astats->astats.internal,
 			ATOMIC_RELAXED);
+		ctl_stats->metadata_edata = ctl_sarena->astats->astats
+		    .metadata_edata;
+		ctl_stats->metadata_rtree = ctl_sarena->astats->astats
+		    .metadata_rtree;
 		ctl_stats->resident = ctl_sarena->astats->astats.resident;
 		ctl_stats->metadata_thp =
 		    ctl_sarena->astats->astats.metadata_thp;
@@ -3599,6 +3615,10 @@ label_return:
 CTL_RO_CGEN(config_stats, stats_allocated, ctl_stats->allocated, size_t)
 CTL_RO_CGEN(config_stats, stats_active, ctl_stats->active, size_t)
 CTL_RO_CGEN(config_stats, stats_metadata, ctl_stats->metadata, size_t)
+CTL_RO_CGEN(config_stats, stats_metadata_edata, ctl_stats->metadata_edata,
+    size_t)
+CTL_RO_CGEN(config_stats, stats_metadata_rtree, ctl_stats->metadata_rtree,
+    size_t)
 CTL_RO_CGEN(config_stats, stats_metadata_thp, ctl_stats->metadata_thp, size_t)
 CTL_RO_CGEN(config_stats, stats_resident, ctl_stats->resident, size_t)
 CTL_RO_CGEN(config_stats, stats_mapped, ctl_stats->mapped, size_t)
@@ -3664,6 +3684,10 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_base,
 CTL_RO_CGEN(config_stats, stats_arenas_i_internal,
     atomic_load_zu(&arenas_i(mib[2])->astats->astats.internal, ATOMIC_RELAXED),
     size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_metadata_edata,
+    arenas_i(mib[2])->astats->astats.metadata_edata, size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_metadata_rtree,
+    arenas_i(mib[2])->astats->astats.metadata_rtree, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_metadata_thp,
     arenas_i(mib[2])->astats->astats.metadata_thp, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_tcache_bytes,
diff --git a/src/rtree.c b/src/rtree.c
index 6496b5af..b6ac04b7 100644
--- a/src/rtree.c
+++ b/src/rtree.c
@@ -29,14 +29,14 @@ rtree_new(rtree_t *rtree, base_t *base, bool zeroed) {
 
 static rtree_node_elm_t *
 rtree_node_alloc(tsdn_t *tsdn, rtree_t *rtree, size_t nelms) {
-	return (rtree_node_elm_t *)base_alloc(tsdn, rtree->base,
-	    nelms * sizeof(rtree_node_elm_t), CACHELINE);
+	return (rtree_node_elm_t *)base_alloc_rtree(tsdn, rtree->base,
+	    nelms * sizeof(rtree_node_elm_t));
 }
 
 static rtree_leaf_elm_t *
 rtree_leaf_alloc(tsdn_t *tsdn, rtree_t *rtree, size_t nelms) {
-	return (rtree_leaf_elm_t *)base_alloc(tsdn, rtree->base,
-	    nelms * sizeof(rtree_leaf_elm_t), CACHELINE);
+	return (rtree_leaf_elm_t *)base_alloc_rtree(tsdn, rtree->base,
+	    nelms * sizeof(rtree_leaf_elm_t));
 }
 
 static rtree_node_elm_t *
diff --git a/src/stats.c b/src/stats.c
index d80af226..c580b49e 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1052,7 +1052,8 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	const char *dss;
 	ssize_t dirty_decay_ms, muzzy_decay_ms;
 	size_t page, pactive, pdirty, pmuzzy, mapped, retained;
-	size_t base, internal, resident, metadata_thp, extent_avail;
+	size_t base, internal, resident, metadata_edata, metadata_rtree,
+	    metadata_thp, extent_avail;
 	uint64_t dirty_npurge, dirty_nmadvise, dirty_purged;
 	uint64_t muzzy_npurge, muzzy_nmadvise, muzzy_purged;
 	size_t small_allocated;
@@ -1352,6 +1353,8 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	GET_AND_EMIT_MEM_STAT(retained)
 	GET_AND_EMIT_MEM_STAT(base)
 	GET_AND_EMIT_MEM_STAT(internal)
+	GET_AND_EMIT_MEM_STAT(metadata_edata)
+	GET_AND_EMIT_MEM_STAT(metadata_rtree)
 	GET_AND_EMIT_MEM_STAT(metadata_thp)
 	GET_AND_EMIT_MEM_STAT(tcache_bytes)
 	GET_AND_EMIT_MEM_STAT(tcache_stashed_bytes)
@@ -1696,8 +1699,8 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 	 * These should be deleted.  We keep them around for a while, to aid in
 	 * the transition to the emitter code.
 	 */
-	size_t allocated, active, metadata, metadata_thp, resident, mapped,
-	    retained;
+	size_t allocated, active, metadata, metadata_edata, metadata_rtree,
+	    metadata_thp, resident, mapped, retained;
 	size_t num_background_threads;
 	size_t zero_reallocs;
 	uint64_t background_thread_num_runs, background_thread_run_interval;
@@ -1705,6 +1708,8 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 	CTL_GET("stats.allocated", &allocated, size_t);
 	CTL_GET("stats.active", &active, size_t);
 	CTL_GET("stats.metadata", &metadata, size_t);
+	CTL_GET("stats.metadata_edata", &metadata_edata, size_t);
+	CTL_GET("stats.metadata_rtree", &metadata_rtree, size_t);
 	CTL_GET("stats.metadata_thp", &metadata_thp, size_t);
 	CTL_GET("stats.resident", &resident, size_t);
 	CTL_GET("stats.mapped", &mapped, size_t);
@@ -1730,6 +1735,10 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 	emitter_json_kv(emitter, "allocated", emitter_type_size, &allocated);
 	emitter_json_kv(emitter, "active", emitter_type_size, &active);
 	emitter_json_kv(emitter, "metadata", emitter_type_size, &metadata);
+	emitter_json_kv(emitter, "metadata_edata", emitter_type_size,
+	    &metadata_edata);
+	emitter_json_kv(emitter, "metadata_rtree", emitter_type_size,
+	    &metadata_rtree);
 	emitter_json_kv(emitter, "metadata_thp", emitter_type_size,
 	    &metadata_thp);
 	emitter_json_kv(emitter, "resident", emitter_type_size, &resident);
@@ -1739,9 +1748,10 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 	    &zero_reallocs);
 
 	emitter_table_printf(emitter, "Allocated: %zu, active: %zu, "
-	    "metadata: %zu (n_thp %zu), resident: %zu, mapped: %zu, "
-	    "retained: %zu\n", allocated, active, metadata, metadata_thp,
-	    resident, mapped, retained);
+	    "metadata: %zu (n_thp %zu, edata %zu, rtree %zu), resident: %zu, "
+	    "mapped: %zu, retained: %zu\n", allocated, active, metadata,
+		metadata_thp, metadata_edata, metadata_rtree, resident, mapped,
+	    retained);
 
 	/* Strange behaviors */
 	emitter_table_printf(emitter,
diff --git a/test/unit/base.c b/test/unit/base.c
index 15e04a8c..3e46626e 100644
--- a/test/unit/base.c
+++ b/test/unit/base.c
@@ -28,7 +28,8 @@ static extent_hooks_t hooks_not_null = {
 
 TEST_BEGIN(test_base_hooks_default) {
 	base_t *base;
-	size_t allocated0, allocated1, resident, mapped, n_thp;
+	size_t allocated0, allocated1, edata_allocated,
+	    rtree_allocated, resident, mapped, n_thp;
 
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 	base = base_new(tsdn, 0,
@@ -36,8 +37,8 @@ TEST_BEGIN(test_base_hooks_default) {
 	    /* metadata_use_hooks */ true);
 
 	if (config_stats) {
-		base_stats_get(tsdn, base, &allocated0, &resident, &mapped,
-		    &n_thp);
+		base_stats_get(tsdn, base, &allocated0, &edata_allocated,
+		    &rtree_allocated, &resident, &mapped, &n_thp);
 		expect_zu_ge(allocated0, sizeof(base_t),
 		    "Base header should count as allocated");
 		if (opt_metadata_thp == metadata_thp_always) {
@@ -50,8 +51,8 @@ TEST_BEGIN(test_base_hooks_default) {
 	    "Unexpected base_alloc() failure");
 
 	if (config_stats) {
-		base_stats_get(tsdn, base, &allocated1, &resident, &mapped,
-		    &n_thp);
+		base_stats_get(tsdn, base, &allocated1, &edata_allocated,
+		    &rtree_allocated, &resident, &mapped, &n_thp);
 		expect_zu_ge(allocated1 - allocated0, 42,
 		    "At least 42 bytes were allocated by base_alloc()");
 	}
@@ -63,7 +64,8 @@ TEST_END
 TEST_BEGIN(test_base_hooks_null) {
 	extent_hooks_t hooks_orig;
 	base_t *base;
-	size_t allocated0, allocated1, resident, mapped, n_thp;
+	size_t allocated0, allocated1, edata_allocated,
+	    rtree_allocated, resident, mapped, n_thp;
 
 	extent_hooks_prep();
 	try_dalloc = false;
@@ -79,8 +81,8 @@ TEST_BEGIN(test_base_hooks_null) {
 	expect_ptr_not_null(base, "Unexpected base_new() failure");
 
 	if (config_stats) {
-		base_stats_get(tsdn, base, &allocated0, &resident, &mapped,
-		    &n_thp);
+		base_stats_get(tsdn, base, &allocated0, &edata_allocated,
+		    &rtree_allocated, &resident, &mapped, &n_thp);
 		expect_zu_ge(allocated0, sizeof(base_t),
 		    "Base header should count as allocated");
 		if (opt_metadata_thp == metadata_thp_always) {
@@ -93,8 +95,8 @@ TEST_BEGIN(test_base_hooks_null) {
 	    "Unexpected base_alloc() failure");
 
 	if (config_stats) {
-		base_stats_get(tsdn, base, &allocated1, &resident, &mapped,
-		    &n_thp);
+		base_stats_get(tsdn, base, &allocated1, &edata_allocated,
+		    &rtree_allocated, &resident, &mapped, &n_thp);
 		expect_zu_ge(allocated1 - allocated0, 42,
 		    "At least 42 bytes were allocated by base_alloc()");
 	}
diff --git a/test/unit/stats.c b/test/unit/stats.c
index bbdbd180..203a71b5 100644
--- a/test/unit/stats.c
+++ b/test/unit/stats.c
@@ -4,7 +4,8 @@
 #define STRINGIFY(x) STRINGIFY_HELPER(x)
 
 TEST_BEGIN(test_stats_summary) {
-	size_t sz, allocated, active, resident, mapped;
+	size_t sz, allocated, active, resident, mapped,
+	    metadata, metadata_edata, metadata_rtree;
 	int expected = config_stats ? 0 : ENOENT;
 
 	sz = sizeof(size_t);
@@ -17,6 +18,13 @@ TEST_BEGIN(test_stats_summary) {
 	expect_d_eq(mallctl("stats.mapped", (void *)&mapped, &sz, NULL, 0),
 	    expected, "Unexpected mallctl() result");
 
+	expect_d_eq(mallctl("stats.metadata", (void *)&metadata, &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
+	expect_d_eq(mallctl("stats.metadata_edata", (void *)&metadata_edata,
+	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
+	expect_d_eq(mallctl("stats.metadata_rtree", (void *)&metadata_rtree,
+	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
+
 	if (config_stats) {
 		expect_zu_le(allocated, active,
 		    "allocated should be no larger than active");
@@ -24,6 +32,9 @@ TEST_BEGIN(test_stats_summary) {
 		    "active should be less than resident");
 		expect_zu_lt(active, mapped,
 		    "active should be less than mapped");
+		expect_zu_le(metadata_edata + metadata_rtree, metadata,
+		    "the sum of metadata_edata and metadata_rtree "
+		    "should be no larger than metadata");
 	}
 }
 TEST_END

From 6b197fdd460be8bf3379da91d42e677dd5b5437a Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@fb.com>
Date: Tue, 22 Aug 2023 16:31:54 -0700
Subject: [PATCH 137/395] Pre-generate ncached_max for all bins for better
 tcache_max tuning experience.

---
 include/jemalloc/internal/arena_inlines_b.h |   8 +-
 include/jemalloc/internal/cache_bin.h       |  91 +++++---
 include/jemalloc/internal/tcache_externs.h  |   7 +-
 include/jemalloc/internal/tcache_inlines.h  | 144 +++++-------
 include/jemalloc/internal/tcache_structs.h  |   4 +-
 src/arena.c                                 |  22 +-
 src/cache_bin.c                             |  20 +-
 src/ctl.c                                   |   6 +-
 src/jemalloc.c                              |   3 +-
 src/tcache.c                                | 244 +++++++++++++-------
 test/unit/cache_bin.c                       | 132 ++++++-----
 test/unit/tcache_max.c                      |  44 ++--
 12 files changed, 417 insertions(+), 308 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index c4d1c887..a4bacd8b 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -198,7 +198,9 @@ arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero,
 			assert(sz_can_use_slab(size));
 			return tcache_alloc_small(tsdn_tsd(tsdn), arena,
 			    tcache, size, ind, zero, slow_path);
-		} else if (likely(size <= tcache_max_get(tcache))) {
+		} else if (likely(ind < TCACHE_NBINS_MAX &&
+		    !tcache_bin_disabled(ind, &tcache->bins[ind],
+		    tcache->tcache_slow))) {
 			return tcache_alloc_large(tsdn_tsd(tsdn), arena,
 			    tcache, size, ind, zero, slow_path);
 		}
@@ -298,7 +300,9 @@ JEMALLOC_ALWAYS_INLINE void
 arena_dalloc_large(tsdn_t *tsdn, void *ptr, tcache_t *tcache, szind_t szind,
     bool slow_path) {
 	assert (!tsdn_null(tsdn) && tcache != NULL);
-	if (szind < tcache_nhbins_get(tcache)) {
+	if (szind < TCACHE_NBINS_MAX &&
+	    !tcache_bin_disabled(szind, &tcache->bins[szind],
+	    tcache->tcache_slow)) {
 		if (config_prof && unlikely(szind < SC_NBINS)) {
 			arena_dalloc_promoted(tsdn, ptr, tcache, slow_path);
 		} else {
diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 78ac3295..2c831caf 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -23,16 +23,20 @@
  */
 typedef uint16_t cache_bin_sz_t;
 
+#define JUNK_ADDR ((uintptr_t)0x7a7a7a7a7a7a7a7aULL)
 /*
  * Leave a noticeable mark pattern on the cache bin stack boundaries, in case a
  * bug starts leaking those.  Make it look like the junk pattern but be distinct
  * from it.
  */
-static const uintptr_t cache_bin_preceding_junk =
-    (uintptr_t)0x7a7a7a7a7a7a7a7aULL;
-/* Note: a7 vs. 7a above -- this tells you which pointer leaked. */
-static const uintptr_t cache_bin_trailing_junk =
-    (uintptr_t)0xa7a7a7a7a7a7a7a7ULL;
+static const uintptr_t cache_bin_preceding_junk = JUNK_ADDR;
+/* Note: JUNK_ADDR vs. JUNK_ADDR + 1 -- this tells you which pointer leaked. */
+static const uintptr_t cache_bin_trailing_junk = JUNK_ADDR + 1;
+/*
+ * A pointer used to initialize a fake stack_head for disabled small bins
+ * so that the enabled/disabled assessment does not rely on ncached_max.
+ */
+extern const uintptr_t disabled_bin;
 
 /*
  * That implies the following value, for the maximum number of items in any
@@ -174,9 +178,35 @@ cache_bin_nonfast_aligned(const void *ptr) {
 	return ((uintptr_t)ptr & san_cache_bin_nonfast_mask) == 0;
 }
 
+static inline const void *
+cache_bin_disabled_bin_stack(void) {
+	return &disabled_bin;
+}
+
+/*
+ * If a cache bin was zero initialized (either because it lives in static or
+ * thread-local storage, or was memset to 0), this function indicates whether or
+ * not cache_bin_init was called on it.
+ */
+static inline bool
+cache_bin_still_zero_initialized(cache_bin_t *bin) {
+	return bin->stack_head == NULL;
+}
+
+static inline bool
+cache_bin_disabled(cache_bin_t *bin) {
+	bool disabled = (bin->stack_head == cache_bin_disabled_bin_stack());
+	if (disabled) {
+		assert((uintptr_t)(*bin->stack_head) == JUNK_ADDR);
+	}
+	return disabled;
+}
+
 /* Returns ncached_max: Upper limit on ncached. */
 static inline cache_bin_sz_t
-cache_bin_info_ncached_max(cache_bin_info_t *info) {
+cache_bin_info_ncached_max_get(cache_bin_t *bin, cache_bin_info_t *info) {
+	assert(!cache_bin_disabled(bin));
+	assert(info == &bin->bin_info);
 	return info->ncached_max;
 }
 
@@ -234,7 +264,7 @@ cache_bin_ncached_get_internal(cache_bin_t *bin) {
 static inline cache_bin_sz_t
 cache_bin_ncached_get_local(cache_bin_t *bin, cache_bin_info_t *info) {
 	cache_bin_sz_t n = cache_bin_ncached_get_internal(bin);
-	assert(n <= cache_bin_info_ncached_max(info));
+	assert(n <= cache_bin_info_ncached_max_get(bin, info));
 	return n;
 }
 
@@ -271,7 +301,7 @@ cache_bin_empty_position_get(cache_bin_t *bin) {
 static inline uint16_t
 cache_bin_low_bits_low_bound_get(cache_bin_t *bin, cache_bin_info_t *info) {
 	return (uint16_t)bin->low_bits_empty -
-	    info->ncached_max * sizeof(void *);
+	    cache_bin_info_ncached_max_get(bin, info) * sizeof(void *);
 }
 
 /*
@@ -281,7 +311,7 @@ cache_bin_low_bits_low_bound_get(cache_bin_t *bin, cache_bin_info_t *info) {
  */
 static inline void **
 cache_bin_low_bound_get(cache_bin_t *bin, cache_bin_info_t *info) {
-	cache_bin_sz_t ncached_max = cache_bin_info_ncached_max(info);
+	cache_bin_sz_t ncached_max = cache_bin_info_ncached_max_get(bin, info);
 	void **ret = cache_bin_empty_position_get(bin) - ncached_max;
 	assert(ret <= bin->stack_head);
 
@@ -313,7 +343,7 @@ cache_bin_low_water_get_internal(cache_bin_t *bin) {
 static inline cache_bin_sz_t
 cache_bin_low_water_get(cache_bin_t *bin, cache_bin_info_t *info) {
 	cache_bin_sz_t low_water = cache_bin_low_water_get_internal(bin);
-	assert(low_water <= cache_bin_info_ncached_max(info));
+	assert(low_water <= cache_bin_info_ncached_max_get(bin, info));
 	assert(low_water <= cache_bin_ncached_get_local(bin, info));
 
 	cache_bin_assert_earlier(bin, (uint16_t)(uintptr_t)bin->stack_head,
@@ -328,11 +358,13 @@ cache_bin_low_water_get(cache_bin_t *bin, cache_bin_info_t *info) {
  */
 static inline void
 cache_bin_low_water_set(cache_bin_t *bin) {
+	assert(!cache_bin_disabled(bin));
 	bin->low_bits_low_water = (uint16_t)(uintptr_t)bin->stack_head;
 }
 
 static inline void
 cache_bin_low_water_adjust(cache_bin_t *bin) {
+	assert(!cache_bin_disabled(bin));
 	if (cache_bin_ncached_get_internal(bin)
 	    < cache_bin_low_water_get_internal(bin)) {
 		cache_bin_low_water_set(bin);
@@ -494,25 +526,26 @@ cache_bin_stash(cache_bin_t *bin, void *ptr) {
 /* Get the number of stashed pointers. */
 JEMALLOC_ALWAYS_INLINE cache_bin_sz_t
 cache_bin_nstashed_get_internal(cache_bin_t *bin, cache_bin_info_t *info) {
-	cache_bin_sz_t ncached_max = cache_bin_info_ncached_max(info);
+	cache_bin_sz_t ncached_max = cache_bin_info_ncached_max_get(bin, info);
 	uint16_t low_bits_low_bound = cache_bin_low_bits_low_bound_get(bin,
 	    info);
 
 	cache_bin_sz_t n = cache_bin_diff(bin, low_bits_low_bound,
 	    bin->low_bits_full) / sizeof(void *);
 	assert(n <= ncached_max);
+	if (config_debug && n != 0) {
+		/* Below are for assertions only. */
+		void **low_bound = cache_bin_low_bound_get(bin, info);
 
-	/* Below are for assertions only. */
-	void **low_bound = cache_bin_low_bound_get(bin, info);
-
-	assert((uint16_t)(uintptr_t)low_bound == low_bits_low_bound);
-	void *stashed = *(low_bound + n - 1);
-	bool aligned = cache_bin_nonfast_aligned(stashed);
+		assert((uint16_t)(uintptr_t)low_bound == low_bits_low_bound);
+		void *stashed = *(low_bound + n - 1);
+		bool aligned = cache_bin_nonfast_aligned(stashed);
 #ifdef JEMALLOC_JET
-	/* Allow arbitrary pointers to be stashed in tests. */
-	aligned = true;
+		/* Allow arbitrary pointers to be stashed in tests. */
+		aligned = true;
 #endif
-	assert(n == 0 || (stashed != NULL && aligned));
+		assert(stashed != NULL && aligned);
+	}
 
 	return n;
 }
@@ -520,7 +553,7 @@ cache_bin_nstashed_get_internal(cache_bin_t *bin, cache_bin_info_t *info) {
 JEMALLOC_ALWAYS_INLINE cache_bin_sz_t
 cache_bin_nstashed_get_local(cache_bin_t *bin, cache_bin_info_t *info) {
 	cache_bin_sz_t n = cache_bin_nstashed_get_internal(bin, info);
-	assert(n <= cache_bin_info_ncached_max(info));
+	assert(n <= cache_bin_info_ncached_max_get(bin, info));
 	return n;
 }
 
@@ -541,8 +574,8 @@ cache_bin_nstashed_get_local(cache_bin_t *bin, cache_bin_info_t *info) {
  * This function should not call other utility functions because the racy
  * condition may cause unexpected / undefined behaviors in unverified utility
  * functions.  Currently, this function calls two utility functions
- * cache_bin_info_ncached_max and cache_bin_low_bits_low_bound_get because they
- * help access values that will not be concurrently modified.
+ * cache_bin_info_ncached_max_get and cache_bin_low_bits_low_bound_get because
+ * they help access values that will not be concurrently modified.
  */
 static inline void
 cache_bin_nitems_get_remote(cache_bin_t *bin, cache_bin_info_t *info,
@@ -552,7 +585,8 @@ cache_bin_nitems_get_remote(cache_bin_t *bin, cache_bin_info_t *info,
 	    (uint16_t)(uintptr_t)bin->stack_head;
 	cache_bin_sz_t n = diff / sizeof(void *);
 
-	assert(n <= cache_bin_info_ncached_max(info));
+	cache_bin_sz_t ncached_max = cache_bin_info_ncached_max_get(bin, info);
+	assert(n <= ncached_max);
 	*ncached = n;
 
 	/* Racy version of cache_bin_nstashed_get_internal. */
@@ -560,7 +594,7 @@ cache_bin_nitems_get_remote(cache_bin_t *bin, cache_bin_info_t *info,
 	    info);
 	n = (bin->low_bits_full - low_bits_low_bound) / sizeof(void *);
 
-	assert(n <= cache_bin_info_ncached_max(info));
+	assert(n <= ncached_max);
 	*nstashed = n;
 	/* Note that cannot assert ncached + nstashed <= ncached_max (racy). */
 }
@@ -697,13 +731,8 @@ void cache_bin_preincrement(cache_bin_info_t *infos, szind_t ninfos,
 void cache_bin_postincrement(void *alloc, size_t *cur_offset);
 void cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
     size_t *cur_offset);
+void cache_bin_init_disabled(cache_bin_t *bin, cache_bin_sz_t ncached_max);
 
-/*
- * If a cache bin was zero initialized (either because it lives in static or
- * thread-local storage, or was memset to 0), this function indicates whether or
- * not cache_bin_init was called on it.
- */
-bool cache_bin_still_zero_initialized(cache_bin_t *bin);
 bool cache_bin_stack_use_thp(void);
 
 #endif /* JEMALLOC_INTERNAL_CACHE_BIN_H */
diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index 87d243a1..8ca966d7 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -24,9 +24,9 @@ extern unsigned opt_lg_tcache_flush_large_div;
  * large-object bins.  This is only used during threads initialization and
  * changing it will not reflect on initialized threads as expected.  Thus,
  * it should not be changed on the fly.  To change the number of tcache bins
- * in use, refer to tcache_nhbins of each tcache.
+ * in use, refer to tcache_nbins of each tcache.
  */
-extern unsigned	global_do_not_change_nhbins;
+extern unsigned	global_do_not_change_nbins;
 
 /*
  * Maximum cached size class.  Same as above, this is only used during threads
@@ -58,6 +58,7 @@ void tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache,
 void tcache_arena_reassociate(tsdn_t *tsdn, tcache_slow_t *tcache_slow,
     tcache_t *tcache, arena_t *arena);
 tcache_t *tcache_create_explicit(tsd_t *tsd);
+void thread_tcache_max_set(tsd_t *tsd, size_t tcache_max);
 void tcache_cleanup(tsd_t *tsd);
 void tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena);
 bool tcaches_create(tsd_t *tsd, base_t *base, unsigned *r_ind);
@@ -70,8 +71,8 @@ void tcache_prefork(tsdn_t *tsdn);
 void tcache_postfork_parent(tsdn_t *tsdn);
 void tcache_postfork_child(tsdn_t *tsdn);
 void tcache_flush(tsd_t *tsd);
-bool tsd_tcache_data_init(tsd_t *tsd, arena_t *arena);
 bool tsd_tcache_enabled_data_init(tsd_t *tsd);
+void tcache_enabled_set(tsd_t *tsd, bool enabled);
 
 void tcache_assert_initialized(tcache_t *tcache);
 
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 97501ee2..68481113 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -18,94 +18,72 @@ tcache_enabled_get(tsd_t *tsd) {
 	return tsd_tcache_enabled_get(tsd);
 }
 
-static inline void
-tcache_enabled_set(tsd_t *tsd, bool enabled) {
-	bool was_enabled = tsd_tcache_enabled_get(tsd);
-
-	if (!was_enabled && enabled) {
-		tsd_tcache_data_init(tsd, NULL);
-	} else if (was_enabled && !enabled) {
-		tcache_cleanup(tsd);
-	}
-	/* Commit the state last.  Above calls check current state. */
-	tsd_tcache_enabled_set(tsd, enabled);
-	tsd_slow_update(tsd);
-}
-
 static inline unsigned
-tcache_nhbins_get(tcache_t *tcache) {
-	assert(tcache != NULL);
-	assert(tcache->tcache_nhbins <= TCACHE_NBINS_MAX);
-	return tcache->tcache_nhbins;
+tcache_nbins_get(tcache_slow_t *tcache_slow) {
+	assert(tcache_slow != NULL);
+	unsigned nbins = tcache_slow->tcache_nbins;
+	assert(nbins <= TCACHE_NBINS_MAX);
+	return nbins;
 }
 
 static inline size_t
-tcache_max_get(tcache_t *tcache) {
-	assert(tcache != NULL);
-	assert(tcache->tcache_max <= TCACHE_MAXCLASS_LIMIT);
-	return tcache->tcache_max;
+tcache_max_get(tcache_slow_t *tcache_slow) {
+	assert(tcache_slow != NULL);
+	size_t tcache_max = sz_index2size(tcache_nbins_get(tcache_slow) - 1);
+	assert(tcache_max <= TCACHE_MAXCLASS_LIMIT);
+	return tcache_max;
 }
 
 static inline void
-tcache_max_and_nhbins_set(tcache_t *tcache, size_t tcache_max) {
-	assert(tcache != NULL);
+tcache_max_set(tcache_slow_t *tcache_slow, size_t tcache_max) {
+	assert(tcache_slow != NULL);
 	assert(tcache_max <= TCACHE_MAXCLASS_LIMIT);
-	tcache->tcache_max = tcache_max;
-	tcache->tcache_nhbins = sz_size2index(tcache_max) + 1;
+	tcache_slow->tcache_nbins = sz_size2index(tcache_max) + 1;
 }
 
 static inline void
-thread_tcache_max_and_nhbins_set(tsd_t *tsd, size_t tcache_max) {
-	assert(tcache_max <= TCACHE_MAXCLASS_LIMIT);
-	assert(tcache_max == sz_s2u(tcache_max));
-	tcache_t *tcache = tsd_tcachep_get(tsd);
-	tcache_slow_t *tcache_slow;
-	assert(tcache != NULL);
-
-	bool enabled = tcache_available(tsd);
-	arena_t *assigned_arena;
-	if (enabled) {
-		tcache_slow = tcache_slow_get(tsd);
-		assert(tcache != NULL && tcache_slow != NULL);
-		assigned_arena = tcache_slow->arena;
-		/* Shutdown and reboot the tcache for a clean slate. */
-		tcache_cleanup(tsd);
+tcache_bin_settings_backup(tcache_t *tcache,
+    cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]) {
+	for (unsigned i = 0; i < TCACHE_NBINS_MAX; i++) {
+		cache_bin_info_init(&tcache_bin_info[i],
+		    tcache->bins[i].bin_info.ncached_max);
 	}
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tcache_bin_disabled(szind_t ind, cache_bin_t *bin,
+    tcache_slow_t *tcache_slow) {
+	assert(bin != NULL);
+	bool disabled = cache_bin_disabled(bin);
 
 	/*
-	* Still set tcache_max and tcache_nhbins of the tcache even if
-	* the tcache is not available yet because the values are
-	* stored in tsd_t and are always available for changing.
-	*/
-	tcache_max_and_nhbins_set(tcache, tcache_max);
-
-	if (enabled) {
-		tsd_tcache_data_init(tsd, assigned_arena);
+	 * If a bin's ind >= nbins or ncached_max == 0, it must be disabled.
+	 * However, when ind < nbins, it could be either enabled
+	 * (ncached_max > 0) or disabled (ncached_max == 0). Similarly, when
+	 * ncached_max > 0, it could be either enabled (ind < nbins) or
+	 * disabled (ind >= nbins).  Thus, if a bin is disabled, it has either
+	 * ind >= nbins or ncached_max == 0.  If a bin is enabled, it has
+	 * ind < nbins and ncached_max > 0.
+	 */
+	unsigned nbins = tcache_nbins_get(tcache_slow);
+	cache_bin_sz_t ncached_max = bin->bin_info.ncached_max;
+	if (ind >= nbins) {
+		assert(disabled);
+	} else {
+		assert(!disabled || ncached_max == 0);
+	}
+	if (ncached_max == 0) {
+		assert(disabled);
+	} else {
+		assert(!disabled || ind >= nbins);
+	}
+	if (disabled) {
+		assert(ind >= nbins || ncached_max == 0);
+	} else {
+		assert(ind < nbins && ncached_max > 0);
 	}
 
-	assert(tcache_nhbins_get(tcache) == sz_size2index(tcache_max) + 1);
-}
-
-JEMALLOC_ALWAYS_INLINE bool
-tcache_small_bin_disabled(szind_t ind, cache_bin_t *bin) {
-	assert(ind < SC_NBINS);
-	assert(bin != NULL);
-	bool ret = cache_bin_info_ncached_max(&bin->bin_info) == 0;
-	if (ret) {
-		/* small size class but cache bin disabled. */
-		assert((uintptr_t)(*bin->stack_head) ==
-		    cache_bin_preceding_junk);
-	}
-
-	return ret;
-}
-
-JEMALLOC_ALWAYS_INLINE bool
-tcache_large_bin_disabled(szind_t ind, cache_bin_t *bin) {
-	assert(ind >= SC_NBINS);
-	assert(bin != NULL);
-	return (cache_bin_info_ncached_max(&bin->bin_info) == 0 ||
-	    cache_bin_still_zero_initialized(bin));
+	return disabled;
 }
 
 JEMALLOC_ALWAYS_INLINE void *
@@ -124,7 +102,8 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
 		if (unlikely(arena == NULL)) {
 			return NULL;
 		}
-		if (unlikely(tcache_small_bin_disabled(binind, bin))) {
+		if (unlikely(tcache_bin_disabled(binind, bin,
+		    tcache->tcache_slow))) {
 			/* stats and zero are handled directly by the arena. */
 			return arena_malloc_hard(tsd_tsdn(tsd), arena, size,
 			    binind, zero, /* slab */ true);
@@ -157,8 +136,9 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 	void *ret;
 	bool tcache_success;
 
-	assert(binind >= SC_NBINS && binind < tcache_nhbins_get(tcache));
 	cache_bin_t *bin = &tcache->bins[binind];
+	assert(binind >= SC_NBINS &&
+	    !tcache_bin_disabled(binind, bin, tcache->tcache_slow));
 	ret = cache_bin_alloc(bin, &tcache_success);
 	assert(tcache_success == (ret != NULL));
 	if (unlikely(!tcache_success)) {
@@ -180,7 +160,7 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 	} else {
 		if (unlikely(zero)) {
 			size_t usize = sz_index2size(binind);
-			assert(usize <= tcache_max_get(tcache));
+			assert(usize <= tcache_max_get(tcache->tcache_slow));
 			memset(ret, 0, usize);
 		}
 
@@ -214,12 +194,13 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 	}
 
 	if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
-		if (unlikely(tcache_small_bin_disabled(binind, bin))) {
+		if (unlikely(tcache_bin_disabled(binind, bin,
+		    tcache->tcache_slow))) {
 			arena_dalloc_small(tsd_tsdn(tsd), ptr);
 			return;
 		}
-		cache_bin_sz_t max = cache_bin_info_ncached_max(
-		    &bin->bin_info);
+		cache_bin_sz_t max = cache_bin_info_ncached_max_get(
+		    bin, &bin->bin_info);
 		unsigned remain = max >> opt_lg_tcache_flush_small_div;
 		tcache_bin_flush_small(tsd, tcache, bin, binind, remain);
 		bool ret = cache_bin_dalloc_easy(bin, ptr);
@@ -232,12 +213,13 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
     bool slow_path) {
 
 	assert(tcache_salloc(tsd_tsdn(tsd), ptr) > SC_SMALL_MAXCLASS);
-	assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= tcache_max_get(tcache));
+	assert(tcache_salloc(tsd_tsdn(tsd), ptr) <=
+	    tcache_max_get(tcache->tcache_slow));
 
 	cache_bin_t *bin = &tcache->bins[binind];
 	if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
-		unsigned remain = cache_bin_info_ncached_max(
-		    &bin->bin_info) >> opt_lg_tcache_flush_large_div;
+		unsigned remain = cache_bin_info_ncached_max_get(
+		    bin, &bin->bin_info) >> opt_lg_tcache_flush_large_div;
 		tcache_bin_flush_large(tsd, tcache, bin, binind, remain);
 		bool ret = cache_bin_dalloc_easy(bin, ptr);
 		assert(ret);
diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h
index b51e10a7..d94099b0 100644
--- a/include/jemalloc/internal/tcache_structs.h
+++ b/include/jemalloc/internal/tcache_structs.h
@@ -31,6 +31,8 @@ struct tcache_slow_s {
 
 	/* The arena this tcache is associated with. */
 	arena_t		*arena;
+	/* The number of bins activated in the tcache. */
+	unsigned	tcache_nbins;
 	/* Next bin to GC. */
 	szind_t		next_gc_bin;
 	/* For small bins, fill (ncached_max >> lg_fill_div). */
@@ -55,8 +57,6 @@ struct tcache_slow_s {
 
 struct tcache_s {
 	tcache_slow_t	*tcache_slow;
-	unsigned	tcache_nhbins;
-	size_t		tcache_max;
 	cache_bin_t	bins[TCACHE_NBINS_MAX];
 };
 
diff --git a/src/arena.c b/src/arena.c
index d937c349..4a383670 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -163,17 +163,13 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	ql_foreach(descriptor, &arena->cache_bin_array_descriptor_ql, link) {
 		for (szind_t i = 0; i < TCACHE_NBINS_MAX; i++) {
 			cache_bin_t *cache_bin = &descriptor->bins[i];
+			if (cache_bin_disabled(cache_bin)) {
+				continue;
+			}
+
 			cache_bin_sz_t ncached, nstashed;
 			cache_bin_nitems_get_remote(cache_bin,
 			    &cache_bin->bin_info, &ncached, &nstashed);
-
-			if ((i < SC_NBINS &&
-			    tcache_small_bin_disabled(i, cache_bin)) ||
-			    (i >= SC_NBINS &&
-			    tcache_large_bin_disabled(i, cache_bin))) {
-				assert(ncached == 0 && nstashed == 0);
-			}
-
 			astats->tcache_bytes += ncached * sz_index2size(i);
 			astats->tcache_stashed_bytes += nstashed *
 			    sz_index2size(i);
@@ -730,11 +726,13 @@ arena_dalloc_promoted_impl(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 		 */
 		safety_check_verify_redzone(ptr, usize, bumped_usize);
 	}
+	szind_t bumped_ind = sz_size2index(bumped_usize);
 	if (bumped_usize >= SC_LARGE_MINCLASS &&
-	    tcache != NULL &&
-	    bumped_usize <= tcache_max_get(tcache)) {
-		tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr,
-		    sz_size2index(bumped_usize), slow_path);
+	    tcache != NULL && bumped_ind < TCACHE_NBINS_MAX &&
+	    !tcache_bin_disabled(bumped_ind, &tcache->bins[bumped_ind],
+	    tcache->tcache_slow)) {
+		tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr, bumped_ind,
+		    slow_path);
 	} else {
 		large_dalloc(tsdn, edata);
 	}
diff --git a/src/cache_bin.c b/src/cache_bin.c
index 2ad2062d..67b6327b 100644
--- a/src/cache_bin.c
+++ b/src/cache_bin.c
@@ -5,10 +5,11 @@
 #include "jemalloc/internal/cache_bin.h"
 #include "jemalloc/internal/safety_check.h"
 
+const uintptr_t disabled_bin = JUNK_ADDR;
+
 void
 cache_bin_info_init(cache_bin_info_t *info,
     cache_bin_sz_t ncached_max) {
-	assert(ncached_max <= CACHE_BIN_NCACHED_MAX);
 	size_t stack_size = (size_t)ncached_max * sizeof(void *);
 	assert(stack_size < ((size_t)1 << (sizeof(cache_bin_sz_t) * 8)));
 	info->ncached_max = (cache_bin_sz_t)ncached_max;
@@ -37,7 +38,6 @@ cache_bin_info_compute_alloc(cache_bin_info_t *infos, szind_t ninfos,
 	 */
 	*size = sizeof(void *) * 2;
 	for (szind_t i = 0; i < ninfos; i++) {
-		assert(infos[i].ncached_max > 0);
 		*size += infos[i].ncached_max * sizeof(void *);
 	}
 
@@ -98,13 +98,21 @@ cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
 	cache_bin_sz_t free_spots = cache_bin_diff(bin,
 	    bin->low_bits_full, (uint16_t)(uintptr_t)bin->stack_head);
 	assert(free_spots == bin_stack_size);
-	assert(cache_bin_ncached_get_local(bin, info) == 0);
+	if (!cache_bin_disabled(bin)) {
+		assert(cache_bin_ncached_get_local(bin, &bin->bin_info) == 0);
+	}
 	assert(cache_bin_empty_position_get(bin) == empty_position);
 
 	assert(bin_stack_size > 0 || empty_position == full_position);
 }
 
-bool
-cache_bin_still_zero_initialized(cache_bin_t *bin) {
-	return bin->stack_head == NULL;
+void
+cache_bin_init_disabled(cache_bin_t *bin, cache_bin_sz_t ncached_max) {
+	const void *fake_stack = cache_bin_disabled_bin_stack();
+	size_t fake_offset = 0;
+	cache_bin_info_t fake_info;
+	cache_bin_info_init(&fake_info, 0);
+	cache_bin_init(bin, &fake_info, (void *)fake_stack, &fake_offset);
+	cache_bin_info_init(&bin->bin_info, ncached_max);
+	assert(fake_offset == 0);
 }
diff --git a/src/ctl.c b/src/ctl.c
index b0277c0a..af22d0aa 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -2317,7 +2317,7 @@ thread_tcache_max_ctl(tsd_t *tsd, const size_t *mib,
 	/* pointer to tcache_t always exists even with tcache disabled. */
 	tcache_t *tcache = tsd_tcachep_get(tsd);
 	assert(tcache != NULL);
-	oldval = tcache_max_get(tcache);
+	oldval = tcache_max_get(tcache->tcache_slow);
 	READ(oldval, size_t);
 
 	if (newp != NULL) {
@@ -2332,7 +2332,7 @@ thread_tcache_max_ctl(tsd_t *tsd, const size_t *mib,
 		}
 		new_tcache_max = sz_s2u(new_tcache_max);
 		if(new_tcache_max != oldval) {
-			thread_tcache_max_and_nhbins_set(tsd, new_tcache_max);
+			thread_tcache_max_set(tsd, new_tcache_max);
 		}
 	}
 
@@ -3155,7 +3155,7 @@ CTL_RO_NL_GEN(arenas_quantum, QUANTUM, size_t)
 CTL_RO_NL_GEN(arenas_page, PAGE, size_t)
 CTL_RO_NL_GEN(arenas_tcache_max, global_do_not_change_tcache_maxclass, size_t)
 CTL_RO_NL_GEN(arenas_nbins, SC_NBINS, unsigned)
-CTL_RO_NL_GEN(arenas_nhbins, global_do_not_change_nhbins, unsigned)
+CTL_RO_NL_GEN(arenas_nhbins, global_do_not_change_nbins, unsigned)
 CTL_RO_NL_GEN(arenas_bin_i_size, bin_infos[mib[2]].reg_size, size_t)
 CTL_RO_NL_GEN(arenas_bin_i_nregs, bin_infos[mib[2]].nregs, uint32_t)
 CTL_RO_NL_GEN(arenas_bin_i_slab_size, bin_infos[mib[2]].slab_size, size_t)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 7aa6a1cd..4bf5cbff 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -4140,7 +4140,8 @@ batch_alloc(void **ptrs, size_t num, size_t size, int flags) {
 		tcache_t *tcache = tcache_get_from_ind(tsd, tcache_ind,
 		    /* slow */ true, /* is_alloc */ true);
 		if (likely(tcache != NULL &&
-		    ind < tcache_nhbins_get(tcache)) && progress < batch) {
+		    !tcache_bin_disabled(ind, &tcache->bins[ind],
+		    tcache->tcache_slow)) && progress < batch) {
 			if (bin == NULL) {
 				bin = &tcache->bins[ind];
 			}
diff --git a/src/tcache.c b/src/tcache.c
index 2c0a7e2e..3070193c 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -60,10 +60,10 @@ unsigned opt_lg_tcache_flush_large_div = 1;
 
 /*
  * Number of cache bins enabled, including both large and small.  This value
- * is only used to initialize tcache_nhbins in the per-thread tcache.
+ * is only used to initialize tcache_nbins in the per-thread tcache.
  * Directly modifying it will not affect threads already launched.
  */
-unsigned		global_do_not_change_nhbins;
+unsigned		global_do_not_change_nbins;
 /*
  * Max size class to be cached (can be small or large). This value is only used
  * to initialize tcache_max in the per-thread tcache.   Directly modifying it
@@ -129,6 +129,7 @@ tcache_gc_small(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	assert(szind < SC_NBINS);
 
 	cache_bin_t *cache_bin = &tcache->bins[szind];
+	assert(!tcache_bin_disabled(szind, cache_bin, tcache->tcache_slow));
 	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin,
 	    &cache_bin->bin_info);
 	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin,
@@ -155,7 +156,7 @@ tcache_gc_small(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	 * Reduce fill count by 2X.  Limit lg_fill_div such that
 	 * the fill count is always at least 1.
 	 */
-	if ((cache_bin_info_ncached_max(&cache_bin->bin_info)
+	if ((cache_bin_info_ncached_max_get(cache_bin, &cache_bin->bin_info)
 	    >> (tcache_slow->lg_fill_div[szind] + 1)) >= 1) {
 		tcache_slow->lg_fill_div[szind]++;
 	}
@@ -167,6 +168,7 @@ tcache_gc_large(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	/* Like the small GC; flush 3/4 of untouched items. */
 	assert(szind >= SC_NBINS);
 	cache_bin_t *cache_bin = &tcache->bins[szind];
+	assert(!tcache_bin_disabled(szind, cache_bin, tcache->tcache_slow));
 	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin,
 	    &cache_bin->bin_info);
 	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin,
@@ -187,8 +189,12 @@ tcache_event(tsd_t *tsd) {
 	bool is_small = (szind < SC_NBINS);
 	cache_bin_t *cache_bin = &tcache->bins[szind];
 
-	tcache_bin_flush_stashed(tsd, tcache, cache_bin, szind, is_small);
+	if (tcache_bin_disabled(szind, cache_bin, tcache_slow)) {
+		goto label_done;
+	}
 
+	tcache_bin_flush_stashed(tsd, tcache, cache_bin, szind,
+	    is_small);
 	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin,
 	    &cache_bin->bin_info);
 	if (low_water > 0) {
@@ -210,8 +216,9 @@ tcache_event(tsd_t *tsd) {
 	}
 	cache_bin_low_water_set(cache_bin);
 
+label_done:
 	tcache_slow->next_gc_bin++;
-	if (tcache_slow->next_gc_bin == tcache_nhbins_get(tcache)) {
+	if (tcache_slow->next_gc_bin == tcache_nbins_get(tcache_slow)) {
 		tcache_slow->next_gc_bin = 0;
 	}
 }
@@ -236,8 +243,9 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena,
 	void *ret;
 
 	assert(tcache_slow->arena != NULL);
-	unsigned nfill = cache_bin_info_ncached_max(&cache_bin->bin_info)
-	    >> tcache_slow->lg_fill_div[binind];
+	assert(!tcache_bin_disabled(binind, cache_bin, tcache_slow));
+	unsigned nfill = cache_bin_info_ncached_max_get(cache_bin,
+	    &cache_bin->bin_info) >> tcache_slow->lg_fill_div[binind];
 	arena_cache_bin_fill_small(tsdn, arena, cache_bin,
 	    &cache_bin->bin_info, binind, nfill);
 	tcache_slow->bin_refilled[binind] = true;
@@ -321,7 +329,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 	if (small) {
 		assert(binind < SC_NBINS);
 	} else {
-		assert(binind < tcache_nhbins_get(tcache));
+		assert(binind < tcache_nbins_get(tcache_slow));
 	}
 	arena_t *tcache_arena = tcache_slow->arena;
 	assert(tcache_arena != NULL);
@@ -508,6 +516,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 JEMALLOC_ALWAYS_INLINE void
 tcache_bin_flush_bottom(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
     szind_t binind, unsigned rem, bool small) {
+	assert(!tcache_bin_disabled(binind, cache_bin, tcache->tcache_slow));
 	tcache_bin_flush_stashed(tsd, tcache, cache_bin, binind, small);
 
 	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin,
@@ -551,6 +560,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 void
 tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
     szind_t binind, bool is_small) {
+	assert(!tcache_bin_disabled(binind, cache_bin, tcache->tcache_slow));
 	cache_bin_info_t *info = &cache_bin->bin_info;
 	/*
 	 * The two below are for assertion only.  The content of original cached
@@ -562,7 +572,8 @@ tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 	    info);
 
 	cache_bin_sz_t nstashed = cache_bin_nstashed_get_local(cache_bin, info);
-	assert(orig_cached + nstashed <= cache_bin_info_ncached_max(info));
+	assert(orig_cached + nstashed <=
+	    cache_bin_info_ncached_max_get(cache_bin, info));
 	if (nstashed == 0) {
 		return;
 	}
@@ -637,33 +648,11 @@ tcache_arena_reassociate(tsdn_t *tsdn, tcache_slow_t *tcache_slow,
 }
 
 static void
-tcache_max_and_nhbins_init(tcache_t *tcache) {
-	assert(tcache != NULL);
+tcache_default_settings_init(tcache_slow_t *tcache_slow) {
+	assert(tcache_slow != NULL);
 	assert(global_do_not_change_tcache_maxclass != 0);
-	assert(global_do_not_change_nhbins != 0);
-	tcache->tcache_max = global_do_not_change_tcache_maxclass;
-	tcache->tcache_nhbins = global_do_not_change_nhbins;
-	assert(tcache->tcache_nhbins == sz_size2index(tcache->tcache_max) + 1);
-}
-
-bool
-tsd_tcache_enabled_data_init(tsd_t *tsd) {
-	/* Called upon tsd initialization. */
-	tsd_tcache_enabled_set(tsd, opt_tcache);
-	/*
-	 * tcache is not available yet, but we need to set up its tcache_max
-	 * and tcache_nhbins in advance.
-	 */
-	tcache_t *tcache = tsd_tcachep_get(tsd);
-	tcache_max_and_nhbins_init(tcache);
-	tsd_slow_update(tsd);
-
-	if (opt_tcache) {
-		/* Trigger tcache init. */
-		tsd_tcache_data_init(tsd, NULL);
-	}
-
-	return false;
+	assert(global_do_not_change_nbins != 0);
+	tcache_slow->tcache_nbins = global_do_not_change_nbins;
 }
 
 static void
@@ -679,19 +668,15 @@ tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 
 	/*
 	 * We reserve cache bins for all small size classes, even if some may
-	 * not get used (i.e. bins higher than tcache_nhbins).  This allows
+	 * not get used (i.e. bins higher than tcache_nbins).  This allows
 	 * the fast and common paths to access cache bin metadata safely w/o
 	 * worrying about which ones are disabled.
 	 */
-	unsigned tcache_nhbins = tcache_nhbins_get(tcache);
-	unsigned n_reserved_bins = tcache_nhbins < SC_NBINS ? SC_NBINS
-	    : tcache_nhbins;
-	memset(tcache->bins, 0, sizeof(cache_bin_t) * n_reserved_bins);
-
+	unsigned tcache_nbins = tcache_nbins_get(tcache_slow);
 	size_t cur_offset = 0;
-	cache_bin_preincrement(tcache_bin_info, tcache_nhbins, mem,
+	cache_bin_preincrement(tcache_bin_info, tcache_nbins, mem,
 	    &cur_offset);
-	for (unsigned i = 0; i < tcache_nhbins; i++) {
+	for (unsigned i = 0; i < tcache_nbins; i++) {
 		if (i < SC_NBINS) {
 			tcache_slow->lg_fill_div[i] = 1;
 			tcache_slow->bin_refilled[i] = false;
@@ -699,40 +684,40 @@ tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 			    = tcache_gc_item_delay_compute(i);
 		}
 		cache_bin_t *cache_bin = &tcache->bins[i];
-		cache_bin_init(cache_bin, &tcache_bin_info[i], mem,
-		    &cur_offset);
+		if (tcache_bin_info[i].ncached_max > 0) {
+			cache_bin_init(cache_bin, &tcache_bin_info[i], mem,
+			    &cur_offset);
+		} else {
+			cache_bin_init_disabled(cache_bin,
+			    tcache_bin_info[i].ncached_max);
+		}
 	}
 	/*
-	 * For small size classes beyond tcache_max(i.e.
-	 * tcache_nhbins< NBINS), their cache bins are initialized to a state
-	 * to safely and efficiently fail all fastpath alloc / free, so that
-	 * no additional check around tcache_nhbins is needed on fastpath.
+	 * Initialize all disabled bins to a state that can safely and
+	 * efficiently fail all fastpath alloc / free, so that no additional
+	 * check around tcache_nbins is needed on fastpath.  Yet we still
+	 * store the ncached_max in the bin_info for future usage.
 	 */
-	for (unsigned i = tcache_nhbins; i < SC_NBINS; i++) {
-		/* Disabled small bins. */
+	for (unsigned i = tcache_nbins; i < TCACHE_NBINS_MAX; i++) {
 		cache_bin_t *cache_bin = &tcache->bins[i];
-		void *fake_stack = mem;
-		size_t fake_offset = 0;
-
-		cache_bin_init(cache_bin, &tcache_bin_info[i], fake_stack,
-		    &fake_offset);
-		assert(tcache_small_bin_disabled(i, cache_bin));
+		cache_bin_init_disabled(cache_bin,
+		    tcache_bin_info[i].ncached_max);
+		assert(tcache_bin_disabled(i, cache_bin, tcache->tcache_slow));
 	}
 
 	cache_bin_postincrement(mem, &cur_offset);
 	if (config_debug) {
 		/* Sanity check that the whole stack is used. */
 		size_t size, alignment;
-		cache_bin_info_compute_alloc(tcache_bin_info, tcache_nhbins,
+		cache_bin_info_compute_alloc(tcache_bin_info, tcache_nbins,
 		    &size, &alignment);
 		assert(cur_offset == size);
 	}
 }
 
 static inline unsigned
-tcache_ncached_max_compute(szind_t szind, unsigned current_nhbins) {
+tcache_ncached_max_compute(szind_t szind) {
 	if (szind >= SC_NBINS) {
-		assert(szind < current_nhbins);
 		return opt_tcache_nslots_large;
 	}
 	unsigned slab_nregs = bin_infos[szind].nregs;
@@ -788,32 +773,28 @@ tcache_ncached_max_compute(szind_t szind, unsigned current_nhbins) {
 }
 
 static void
-tcache_bin_info_compute(cache_bin_info_t *tcache_bin_info,
-    unsigned tcache_nhbins) {
-	for (szind_t i = 0; i < tcache_nhbins; i++) {
-		unsigned ncached_max = tcache_ncached_max_compute(i,
-		    tcache_nhbins);
+tcache_bin_info_compute(cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]) {
+	/*
+	 * Compute the values for each bin, but for bins with indices larger
+	 * than tcache_nbins, no items will be cached.
+	 */
+	for (szind_t i = 0; i < TCACHE_NBINS_MAX; i++) {
+		unsigned ncached_max = tcache_ncached_max_compute(i);
+		assert(ncached_max <= CACHE_BIN_NCACHED_MAX);
 		cache_bin_info_init(&tcache_bin_info[i], ncached_max);
 	}
-	for (szind_t i = tcache_nhbins; i < SC_NBINS; i++) {
-		/* Disabled small bins. */
-		cache_bin_info_init(&tcache_bin_info[i], 0);
-	}
 }
 
-/* Initialize auto tcache (embedded in TSD). */
-bool
-tsd_tcache_data_init(tsd_t *tsd, arena_t *arena) {
+static bool
+tsd_tcache_data_init_impl(tsd_t *tsd, arena_t *arena,
+    cache_bin_info_t *tcache_bin_info) {
 	tcache_slow_t *tcache_slow = tsd_tcache_slowp_get_unsafe(tsd);
 	tcache_t *tcache = tsd_tcachep_get_unsafe(tsd);
 
 	assert(cache_bin_still_zero_initialized(&tcache->bins[0]));
-	unsigned tcache_nhbins = tcache_nhbins_get(tcache);
+	unsigned tcache_nbins = tcache_nbins_get(tcache_slow);
 	size_t size, alignment;
-	/* Takes 146B stack space. */
-	cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX] = {0};
-	tcache_bin_info_compute(tcache_bin_info, tcache_nhbins);
-	cache_bin_info_compute_alloc(tcache_bin_info, tcache_nhbins,
+	cache_bin_info_compute_alloc(tcache_bin_info, tcache_nbins,
 	    &size, &alignment);
 
 	void *mem;
@@ -860,6 +841,23 @@ tsd_tcache_data_init(tsd_t *tsd, arena_t *arena) {
 	return false;
 }
 
+static bool
+tsd_tcache_data_init_with_bin_settings(tsd_t *tsd, arena_t *arena,
+    cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]) {
+	assert(tcache_bin_info != NULL);
+	return tsd_tcache_data_init_impl(tsd, arena, tcache_bin_info);
+}
+
+/* Initialize auto tcache (embedded in TSD). */
+static bool
+tsd_tcache_data_init(tsd_t *tsd, arena_t *arena) {
+	/* Takes 146B stack space. */
+	cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX] = {{0}};
+	tcache_bin_info_compute(tcache_bin_info);
+
+	return tsd_tcache_data_init_impl(tsd, arena, tcache_bin_info);
+}
+
 /* Created manual tcache for tcache.create mallctl. */
 tcache_t *
 tcache_create_explicit(tsd_t *tsd) {
@@ -868,11 +866,11 @@ tcache_create_explicit(tsd_t *tsd) {
 	 * the beginning of the whole allocation (for freeing).  The makes sure
 	 * the cache bins have the requested alignment.
 	 */
-	unsigned tcache_nhbins = global_do_not_change_nhbins;
+	unsigned tcache_nbins = global_do_not_change_nbins;
 	size_t tcache_size, alignment;
-	cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX] = {0};
-	tcache_bin_info_compute(tcache_bin_info, tcache_nhbins);
-	cache_bin_info_compute_alloc(tcache_bin_info, tcache_nhbins,
+	cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX] = {{0}};
+	tcache_bin_info_compute(tcache_bin_info);
+	cache_bin_info_compute_alloc(tcache_bin_info, tcache_nbins,
 	    &tcache_size, &alignment);
 
 	size_t size = tcache_size + sizeof(tcache_t)
@@ -889,7 +887,7 @@ tcache_create_explicit(tsd_t *tsd) {
 	tcache_t *tcache = (void *)((byte_t *)mem + tcache_size);
 	tcache_slow_t *tcache_slow =
 	    (void *)((byte_t *)mem + tcache_size + sizeof(tcache_t));
-	tcache_max_and_nhbins_init(tcache);
+	tcache_default_settings_init(tcache_slow);
 	tcache_init(tsd, tcache_slow, tcache, mem, tcache_bin_info);
 
 	tcache_arena_associate(tsd_tsdn(tsd), tcache_slow, tcache,
@@ -898,13 +896,83 @@ tcache_create_explicit(tsd_t *tsd) {
 	return tcache;
 }
 
+bool
+tsd_tcache_enabled_data_init(tsd_t *tsd) {
+	/* Called upon tsd initialization. */
+	tsd_tcache_enabled_set(tsd, opt_tcache);
+	/*
+	 * tcache is not available yet, but we need to set up its tcache_nbins
+	 * in advance.
+	 */
+	tcache_default_settings_init(tsd_tcache_slowp_get(tsd));
+	tsd_slow_update(tsd);
+
+	if (opt_tcache) {
+		/* Trigger tcache init. */
+		tsd_tcache_data_init(tsd, NULL);
+	}
+
+	return false;
+}
+
+void
+tcache_enabled_set(tsd_t *tsd, bool enabled) {
+	bool was_enabled = tsd_tcache_enabled_get(tsd);
+
+	if (!was_enabled && enabled) {
+		tsd_tcache_data_init(tsd, NULL);
+	} else if (was_enabled && !enabled) {
+		tcache_cleanup(tsd);
+	}
+	/* Commit the state last.  Above calls check current state. */
+	tsd_tcache_enabled_set(tsd, enabled);
+	tsd_slow_update(tsd);
+}
+
+void
+thread_tcache_max_set(tsd_t *tsd, size_t tcache_max) {
+	assert(tcache_max <= TCACHE_MAXCLASS_LIMIT);
+	assert(tcache_max == sz_s2u(tcache_max));
+	tcache_t *tcache = tsd_tcachep_get(tsd);
+	tcache_slow_t *tcache_slow = tcache->tcache_slow;
+	cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX] = {{0}};
+	assert(tcache != NULL && tcache_slow != NULL);
+
+	bool enabled = tcache_available(tsd);
+	arena_t *assigned_arena;
+	if (enabled) {
+		assigned_arena = tcache_slow->arena;
+		/* Carry over the bin settings during the reboot. */
+		tcache_bin_settings_backup(tcache, tcache_bin_info);
+		/* Shutdown and reboot the tcache for a clean slate. */
+		tcache_cleanup(tsd);
+	}
+
+	/*
+	* Still set tcache_nbins of the tcache even if the tcache is not
+	* available yet because the values are stored in tsd_t and are
+	* always available for changing.
+	*/
+	tcache_max_set(tcache_slow, tcache_max);
+
+	if (enabled) {
+		tsd_tcache_data_init_with_bin_settings(tsd, assigned_arena,
+		    tcache_bin_info);
+	}
+
+	assert(tcache_nbins_get(tcache_slow) == sz_size2index(tcache_max) + 1);
+}
+
 static void
 tcache_flush_cache(tsd_t *tsd, tcache_t *tcache) {
 	tcache_slow_t *tcache_slow = tcache->tcache_slow;
 	assert(tcache_slow->arena != NULL);
 
-	for (unsigned i = 0; i < tcache_nhbins_get(tcache); i++) {
+	for (unsigned i = 0; i < tcache_nbins_get(tcache_slow); i++) {
 		cache_bin_t *cache_bin = &tcache->bins[i];
+		if (tcache_bin_disabled(i, cache_bin, tcache_slow)) {
+			continue;
+		}
 		if (i < SC_NBINS) {
 			tcache_bin_flush_small(tsd, tcache, cache_bin, i, 0);
 		} else {
@@ -974,8 +1042,7 @@ tcache_cleanup(tsd_t *tsd) {
 
 	tcache_destroy(tsd, tcache, true);
 	/* Make sure all bins used are reinitialized to the clean state. */
-	memset(tcache->bins, 0, sizeof(cache_bin_t) *
-	    tcache_nhbins_get(tcache));
+	memset(tcache->bins, 0, sizeof(cache_bin_t) * TCACHE_NBINS_MAX);
 }
 
 void
@@ -983,8 +1050,11 @@ tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
 	cassert(config_stats);
 
 	/* Merge and reset tcache stats. */
-	for (unsigned i = 0; i < tcache_nhbins_get(tcache); i++) {
+	for (unsigned i = 0; i < tcache_nbins_get(tcache->tcache_slow); i++) {
 		cache_bin_t *cache_bin = &tcache->bins[i];
+		if (tcache_bin_disabled(i, cache_bin, tcache->tcache_slow)) {
+			continue;
+		}
 		if (i < SC_NBINS) {
 			bin_t *bin = arena_bin_choose(tsdn, arena, i, NULL);
 			malloc_mutex_lock(tsdn, &bin->lock);
@@ -1110,7 +1180,7 @@ bool
 tcache_boot(tsdn_t *tsdn, base_t *base) {
 	global_do_not_change_tcache_maxclass = sz_s2u(opt_tcache_max);
 	assert(global_do_not_change_tcache_maxclass <= TCACHE_MAXCLASS_LIMIT);
-	global_do_not_change_nhbins =
+	global_do_not_change_nbins =
 	    sz_size2index(global_do_not_change_tcache_maxclass) + 1;
 
 	if (malloc_mutex_init(&tcaches_mtx, "tcaches", WITNESS_RANK_TCACHES,
diff --git a/test/unit/cache_bin.c b/test/unit/cache_bin.c
index 50d51a6d..aed34585 100644
--- a/test/unit/cache_bin.c
+++ b/test/unit/cache_bin.c
@@ -106,11 +106,13 @@ TEST_BEGIN(test_cache_bin) {
 	cache_bin_info_init(&info, ncached_max);
 	cache_bin_t bin;
 	test_bin_init(&bin, &info);
+	cache_bin_info_t *bin_info = &bin.bin_info;
 
 	/* Initialize to empty; should then have 0 elements. */
-	expect_d_eq(ncached_max, cache_bin_info_ncached_max(&info), "");
-	expect_true(cache_bin_ncached_get_local(&bin, &info) == 0, "");
-	expect_true(cache_bin_low_water_get(&bin, &info) == 0, "");
+	expect_d_eq(ncached_max, cache_bin_info_ncached_max_get(&bin,
+	    &bin.bin_info), "");
+	expect_true(cache_bin_ncached_get_local(&bin, bin_info) == 0, "");
+	expect_true(cache_bin_low_water_get(&bin, bin_info) == 0, "");
 
 	ptr = cache_bin_alloc_easy(&bin, &success);
 	expect_false(success, "Shouldn't successfully allocate when empty");
@@ -127,14 +129,14 @@ TEST_BEGIN(test_cache_bin) {
 	void **ptrs = mallocx(sizeof(void *) * (ncached_max + 1), 0);
 	assert_ptr_not_null(ptrs, "Unexpected mallocx failure");
 	for  (cache_bin_sz_t i = 0; i < ncached_max; i++) {
-		expect_true(cache_bin_ncached_get_local(&bin, &info) == i, "");
+		expect_true(cache_bin_ncached_get_local(&bin, bin_info) == i, "");
 		success = cache_bin_dalloc_easy(&bin, &ptrs[i]);
 		expect_true(success,
 		    "Should be able to dalloc into a non-full cache bin.");
-		expect_true(cache_bin_low_water_get(&bin, &info) == 0,
+		expect_true(cache_bin_low_water_get(&bin, bin_info) == 0,
 		    "Pushes and pops shouldn't change low water of zero.");
 	}
-	expect_true(cache_bin_ncached_get_local(&bin, &info) == ncached_max,
+	expect_true(cache_bin_ncached_get_local(&bin, bin_info) == ncached_max,
 	    "");
 	success = cache_bin_dalloc_easy(&bin, &ptrs[ncached_max]);
 	expect_false(success, "Shouldn't be able to dalloc into a full bin.");
@@ -142,9 +144,9 @@ TEST_BEGIN(test_cache_bin) {
 	cache_bin_low_water_set(&bin);
 
 	for (cache_bin_sz_t i = 0; i < ncached_max; i++) {
-		expect_true(cache_bin_low_water_get(&bin, &info)
+		expect_true(cache_bin_low_water_get(&bin, bin_info)
 		    == ncached_max - i, "");
-		expect_true(cache_bin_ncached_get_local(&bin, &info)
+		expect_true(cache_bin_ncached_get_local(&bin, bin_info)
 		    == ncached_max - i, "");
 		/*
 		 * This should fail -- the easy variant can't change the low
@@ -153,9 +155,9 @@ TEST_BEGIN(test_cache_bin) {
 		ptr = cache_bin_alloc_easy(&bin, &success);
 		expect_ptr_null(ptr, "");
 		expect_false(success, "");
-		expect_true(cache_bin_low_water_get(&bin, &info)
+		expect_true(cache_bin_low_water_get(&bin, bin_info)
 		    == ncached_max - i, "");
-		expect_true(cache_bin_ncached_get_local(&bin, &info)
+		expect_true(cache_bin_ncached_get_local(&bin, bin_info)
 		    == ncached_max - i, "");
 
 		/* This should succeed, though. */
@@ -163,13 +165,13 @@ TEST_BEGIN(test_cache_bin) {
 		expect_true(success, "");
 		expect_ptr_eq(ptr, &ptrs[ncached_max - i - 1],
 		    "Alloc should pop in stack order");
-		expect_true(cache_bin_low_water_get(&bin, &info)
+		expect_true(cache_bin_low_water_get(&bin, bin_info)
 		    == ncached_max - i - 1, "");
-		expect_true(cache_bin_ncached_get_local(&bin, &info)
+		expect_true(cache_bin_ncached_get_local(&bin, bin_info)
 		    == ncached_max - i - 1, "");
 	}
 	/* Now we're empty -- all alloc attempts should fail. */
-	expect_true(cache_bin_ncached_get_local(&bin, &info) == 0, "");
+	expect_true(cache_bin_ncached_get_local(&bin, bin_info) == 0, "");
 	ptr = cache_bin_alloc_easy(&bin, &success);
 	expect_ptr_null(ptr, "");
 	expect_false(success, "");
@@ -185,7 +187,7 @@ TEST_BEGIN(test_cache_bin) {
 	for (cache_bin_sz_t i = ncached_max / 2; i < ncached_max; i++) {
 		cache_bin_dalloc_easy(&bin, &ptrs[i]);
 	}
-	expect_true(cache_bin_ncached_get_local(&bin, &info) == ncached_max,
+	expect_true(cache_bin_ncached_get_local(&bin, bin_info) == ncached_max,
 	    "");
 	for (cache_bin_sz_t i = ncached_max - 1; i >= ncached_max / 2; i--) {
 		/*
@@ -202,60 +204,64 @@ TEST_BEGIN(test_cache_bin) {
 	expect_ptr_null(ptr, "");
 
 	/* We're going to test filling -- we must be empty to start. */
-	while (cache_bin_ncached_get_local(&bin, &info)) {
+	while (cache_bin_ncached_get_local(&bin, bin_info)) {
 		cache_bin_alloc(&bin, &success);
 		expect_true(success, "");
 	}
 
 	/* Test fill. */
 	/* Try to fill all, succeed fully. */
-	do_fill_test(&bin, &info, ptrs, ncached_max, ncached_max, ncached_max);
+	do_fill_test(&bin, bin_info, ptrs, ncached_max, ncached_max,
+	    ncached_max);
 	/* Try to fill all, succeed partially. */
-	do_fill_test(&bin, &info, ptrs, ncached_max, ncached_max,
+	do_fill_test(&bin, bin_info, ptrs, ncached_max, ncached_max,
 	    ncached_max / 2);
 	/* Try to fill all, fail completely. */
-	do_fill_test(&bin, &info, ptrs, ncached_max, ncached_max, 0);
+	do_fill_test(&bin, bin_info, ptrs, ncached_max, ncached_max, 0);
 
 	/* Try to fill some, succeed fully. */
-	do_fill_test(&bin, &info, ptrs, ncached_max, ncached_max / 2,
+	do_fill_test(&bin, bin_info, ptrs, ncached_max, ncached_max / 2,
 	    ncached_max / 2);
 	/* Try to fill some, succeed partially. */
-	do_fill_test(&bin, &info, ptrs, ncached_max, ncached_max / 2,
+	do_fill_test(&bin, bin_info, ptrs, ncached_max, ncached_max / 2,
 	    ncached_max / 4);
 	/* Try to fill some, fail completely. */
-	do_fill_test(&bin, &info, ptrs, ncached_max, ncached_max / 2, 0);
+	do_fill_test(&bin, bin_info, ptrs, ncached_max, ncached_max / 2, 0);
 
-	do_flush_test(&bin, &info, ptrs, ncached_max, ncached_max);
-	do_flush_test(&bin, &info, ptrs, ncached_max, ncached_max / 2);
-	do_flush_test(&bin, &info, ptrs, ncached_max, 0);
-	do_flush_test(&bin, &info, ptrs, ncached_max / 2, ncached_max / 2);
-	do_flush_test(&bin, &info, ptrs, ncached_max / 2, ncached_max / 4);
-	do_flush_test(&bin, &info, ptrs, ncached_max / 2, 0);
+	do_flush_test(&bin, bin_info, ptrs, ncached_max, ncached_max);
+	do_flush_test(&bin, bin_info, ptrs, ncached_max, ncached_max / 2);
+	do_flush_test(&bin, bin_info, ptrs, ncached_max, 0);
+	do_flush_test(&bin, bin_info, ptrs, ncached_max / 2, ncached_max / 2);
+	do_flush_test(&bin, bin_info, ptrs, ncached_max / 2, ncached_max / 4);
+	do_flush_test(&bin, bin_info, ptrs, ncached_max / 2, 0);
 
-	do_batch_alloc_test(&bin, &info, ptrs, ncached_max, ncached_max);
-	do_batch_alloc_test(&bin, &info, ptrs, ncached_max, ncached_max * 2);
-	do_batch_alloc_test(&bin, &info, ptrs, ncached_max, ncached_max / 2);
-	do_batch_alloc_test(&bin, &info, ptrs, ncached_max, 2);
-	do_batch_alloc_test(&bin, &info, ptrs, ncached_max, 1);
-	do_batch_alloc_test(&bin, &info, ptrs, ncached_max, 0);
-	do_batch_alloc_test(&bin, &info, ptrs, ncached_max / 2,
+	do_batch_alloc_test(&bin, bin_info, ptrs, ncached_max, ncached_max);
+	do_batch_alloc_test(&bin, bin_info, ptrs, ncached_max,
+	    ncached_max * 2);
+	do_batch_alloc_test(&bin, bin_info, ptrs, ncached_max,
 	    ncached_max / 2);
-	do_batch_alloc_test(&bin, &info, ptrs, ncached_max / 2, ncached_max);
-	do_batch_alloc_test(&bin, &info, ptrs, ncached_max / 2,
+	do_batch_alloc_test(&bin, bin_info, ptrs, ncached_max, 2);
+	do_batch_alloc_test(&bin, bin_info, ptrs, ncached_max, 1);
+	do_batch_alloc_test(&bin, bin_info, ptrs, ncached_max, 0);
+	do_batch_alloc_test(&bin, bin_info, ptrs, ncached_max / 2,
+	    ncached_max / 2);
+	do_batch_alloc_test(&bin, bin_info, ptrs, ncached_max / 2,
+	    ncached_max);
+	do_batch_alloc_test(&bin, bin_info, ptrs, ncached_max / 2,
 	    ncached_max / 4);
-	do_batch_alloc_test(&bin, &info, ptrs, ncached_max / 2, 2);
-	do_batch_alloc_test(&bin, &info, ptrs, ncached_max / 2, 1);
-	do_batch_alloc_test(&bin, &info, ptrs, ncached_max / 2, 0);
-	do_batch_alloc_test(&bin, &info, ptrs, 2, ncached_max);
-	do_batch_alloc_test(&bin, &info, ptrs, 2, 2);
-	do_batch_alloc_test(&bin, &info, ptrs, 2, 1);
-	do_batch_alloc_test(&bin, &info, ptrs, 2, 0);
-	do_batch_alloc_test(&bin, &info, ptrs, 1, 2);
-	do_batch_alloc_test(&bin, &info, ptrs, 1, 1);
-	do_batch_alloc_test(&bin, &info, ptrs, 1, 0);
-	do_batch_alloc_test(&bin, &info, ptrs, 0, 2);
-	do_batch_alloc_test(&bin, &info, ptrs, 0, 1);
-	do_batch_alloc_test(&bin, &info, ptrs, 0, 0);
+	do_batch_alloc_test(&bin, bin_info, ptrs, ncached_max / 2, 2);
+	do_batch_alloc_test(&bin, bin_info, ptrs, ncached_max / 2, 1);
+	do_batch_alloc_test(&bin, bin_info, ptrs, ncached_max / 2, 0);
+	do_batch_alloc_test(&bin, bin_info, ptrs, 2, ncached_max);
+	do_batch_alloc_test(&bin, bin_info, ptrs, 2, 2);
+	do_batch_alloc_test(&bin, bin_info, ptrs, 2, 1);
+	do_batch_alloc_test(&bin, bin_info, ptrs, 2, 0);
+	do_batch_alloc_test(&bin, bin_info, ptrs, 1, 2);
+	do_batch_alloc_test(&bin, bin_info, ptrs, 1, 1);
+	do_batch_alloc_test(&bin, bin_info, ptrs, 1, 0);
+	do_batch_alloc_test(&bin, bin_info, ptrs, 0, 2);
+	do_batch_alloc_test(&bin, bin_info, ptrs, 0, 1);
+	do_batch_alloc_test(&bin, bin_info, ptrs, 0, 0);
 
 	free(ptrs);
 }
@@ -328,6 +334,7 @@ TEST_BEGIN(test_cache_bin_stash) {
 	cache_bin_info_t info;
 	cache_bin_info_init(&info, ncached_max);
 	test_bin_init(&bin, &info);
+	cache_bin_info_t *bin_info = &bin.bin_info;
 
 	/*
 	 * The content of this array is not accessed; instead the interior
@@ -337,10 +344,10 @@ TEST_BEGIN(test_cache_bin_stash) {
 	assert_ptr_not_null(ptrs, "Unexpected mallocx failure");
 	bool ret;
 	for (cache_bin_sz_t i = 0; i < ncached_max; i++) {
-		expect_true(cache_bin_ncached_get_local(&bin, &info) ==
+		expect_true(cache_bin_ncached_get_local(&bin, bin_info) ==
 		    (i / 2 + i % 2), "Wrong ncached value");
-		expect_true(cache_bin_nstashed_get_local(&bin, &info) == i / 2,
-		    "Wrong nstashed value");
+		expect_true(cache_bin_nstashed_get_local(&bin, bin_info) ==
+		    i / 2, "Wrong nstashed value");
 		if (i % 2 == 0) {
 			cache_bin_dalloc_easy(&bin, &ptrs[i]);
 		} else {
@@ -362,18 +369,23 @@ TEST_BEGIN(test_cache_bin_stash) {
 			expect_true(diff % 2 == 0, "Should be able to alloc");
 		} else {
 			expect_false(ret, "Should not alloc stashed");
-			expect_true(cache_bin_nstashed_get_local(&bin, &info) ==
-			    ncached_max / 2, "Wrong nstashed value");
+			expect_true(cache_bin_nstashed_get_local(&bin,
+			    bin_info) == ncached_max / 2,
+			    "Wrong nstashed value");
 		}
 	}
 
 	test_bin_init(&bin, &info);
-	do_flush_stashed_test(&bin, &info, ptrs, ncached_max, 0);
-	do_flush_stashed_test(&bin, &info, ptrs, 0, ncached_max);
-	do_flush_stashed_test(&bin, &info, ptrs, ncached_max / 2, ncached_max / 2);
-	do_flush_stashed_test(&bin, &info, ptrs, ncached_max / 4, ncached_max / 2);
-	do_flush_stashed_test(&bin, &info, ptrs, ncached_max / 2, ncached_max / 4);
-	do_flush_stashed_test(&bin, &info, ptrs, ncached_max / 4, ncached_max / 4);
+	do_flush_stashed_test(&bin, bin_info, ptrs, ncached_max, 0);
+	do_flush_stashed_test(&bin, bin_info, ptrs, 0, ncached_max);
+	do_flush_stashed_test(&bin, bin_info, ptrs, ncached_max / 2,
+	    ncached_max / 2);
+	do_flush_stashed_test(&bin, bin_info, ptrs, ncached_max / 4,
+	    ncached_max / 2);
+	do_flush_stashed_test(&bin, bin_info, ptrs, ncached_max / 2,
+	    ncached_max / 4);
+	do_flush_stashed_test(&bin, bin_info, ptrs, ncached_max / 4,
+	    ncached_max / 4);
 }
 TEST_END
 
diff --git a/test/unit/tcache_max.c b/test/unit/tcache_max.c
index 6481504e..53752463 100644
--- a/test/unit/tcache_max.c
+++ b/test/unit/tcache_max.c
@@ -76,8 +76,11 @@ tcache_bytes_read_local(void) {
 	size_t tcache_bytes = 0;
 	tsd_t *tsd = tsd_fetch();
 	tcache_t *tcache = tcache_get(tsd);
-	for (szind_t i = 0; i < tcache_nhbins_get(tcache); i++) {
+	for (szind_t i = 0; i < tcache_nbins_get(tcache->tcache_slow); i++) {
 		cache_bin_t *cache_bin = &tcache->bins[i];
+		if (tcache_bin_disabled(i, cache_bin, tcache->tcache_slow)) {
+			continue;
+		}
 		cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin,
 		    &cache_bin->bin_info);
 		tcache_bytes += ncached * sz_index2size(i);
@@ -211,7 +214,7 @@ TEST_BEGIN(test_tcache_max) {
 TEST_END
 
 static size_t
-tcache_max2nhbins(size_t tcache_max) {
+tcache_max2nbins(size_t tcache_max) {
 	return sz_size2index(tcache_max) + 1;
 }
 
@@ -241,23 +244,24 @@ validate_tcache_stack(tcache_t *tcache) {
 static void *
 tcache_check(void *arg) {
 	size_t old_tcache_max, new_tcache_max, min_tcache_max, sz;
-	unsigned tcache_nhbins;
+	unsigned tcache_nbins;
 	tsd_t *tsd = tsd_fetch();
 	tcache_t *tcache = tsd_tcachep_get(tsd);
+	tcache_slow_t *tcache_slow = tcache->tcache_slow;
 	sz = sizeof(size_t);
 	new_tcache_max = *(size_t *)arg;
 	min_tcache_max = 1;
 
 	/*
-	 * Check the default tcache_max and tcache_nhbins of each thread's
+	 * Check the default tcache_max and tcache_nbins of each thread's
 	 * auto tcache.
 	 */
-	old_tcache_max = tcache_max_get(tcache);
+	old_tcache_max = tcache_max_get(tcache_slow);
 	expect_zu_eq(old_tcache_max, opt_tcache_max,
 	    "Unexpected default value for tcache_max");
-	tcache_nhbins = tcache_nhbins_get(tcache);
-	expect_zu_eq(tcache_nhbins, (size_t)global_do_not_change_nhbins,
-	    "Unexpected default value for tcache_nhbins");
+	tcache_nbins = tcache_nbins_get(tcache_slow);
+	expect_zu_eq(tcache_nbins, (size_t)global_do_not_change_nbins,
+	    "Unexpected default value for tcache_nbins");
 	validate_tcache_stack(tcache);
 
 	/*
@@ -275,12 +279,12 @@ tcache_check(void *arg) {
 	assert_d_eq(mallctl("thread.tcache.max",
 	    NULL, NULL, (void *)&temp_tcache_max, sz),.0,
 	    "Unexpected.mallctl().failure");
-	old_tcache_max = tcache_max_get(tcache);
+	old_tcache_max = tcache_max_get(tcache_slow);
 	expect_zu_eq(old_tcache_max, TCACHE_MAXCLASS_LIMIT,
 	    "Unexpected value for tcache_max");
-	tcache_nhbins = tcache_nhbins_get(tcache);
-	expect_zu_eq(tcache_nhbins, TCACHE_NBINS_MAX,
-	    "Unexpected value for tcache_nhbins");
+	tcache_nbins = tcache_nbins_get(tcache_slow);
+	expect_zu_eq(tcache_nbins, TCACHE_NBINS_MAX,
+	    "Unexpected value for tcache_nbins");
 	assert_d_eq(mallctl("thread.tcache.max",
 	    (void *)&old_tcache_max, &sz,
 	    (void *)&min_tcache_max, sz),.0,
@@ -294,10 +298,10 @@ tcache_check(void *arg) {
 	    (void *)&e0, bool_sz), 0, "Unexpected mallctl() error");
 	expect_false(e1, "Unexpected previous tcache state");
 	min_tcache_max = sz_s2u(min_tcache_max);
-	expect_zu_eq(tcache_max_get(tcache), min_tcache_max,
+	expect_zu_eq(tcache_max_get(tcache_slow), min_tcache_max,
 	    "Unexpected value for tcache_max");
-	expect_zu_eq(tcache_nhbins_get(tcache),
-	    tcache_max2nhbins(min_tcache_max), "Unexpected value for nhbins");
+	expect_zu_eq(tcache_nbins_get(tcache_slow),
+	    tcache_max2nbins(min_tcache_max), "Unexpected value for nbins");
 	assert_d_eq(mallctl("thread.tcache.max",
 	    (void *)&old_tcache_max, &sz,
 	    (void *)&new_tcache_max, sz),.0,
@@ -307,18 +311,18 @@ tcache_check(void *arg) {
 	validate_tcache_stack(tcache);
 
 	/*
-	 * Check the thread's tcache_max and nhbins both through mallctl
+	 * Check the thread's tcache_max and nbins both through mallctl
 	 * and alloc tests.
 	 */
 	if (new_tcache_max > TCACHE_MAXCLASS_LIMIT) {
 		new_tcache_max = TCACHE_MAXCLASS_LIMIT;
 	}
-	old_tcache_max = tcache_max_get(tcache);
+	old_tcache_max = tcache_max_get(tcache_slow);
 	expect_zu_eq(old_tcache_max, new_tcache_max,
 	    "Unexpected value for tcache_max");
-	tcache_nhbins = tcache_nhbins_get(tcache);
-	expect_zu_eq(tcache_nhbins, tcache_max2nhbins(new_tcache_max),
-	    "Unexpected value for tcache_nhbins");
+	tcache_nbins = tcache_nbins_get(tcache_slow);
+	expect_zu_eq(tcache_nbins, tcache_max2nbins(new_tcache_max),
+	    "Unexpected value for tcache_nbins");
 	for (unsigned alloc_option = alloc_option_start;
 	     alloc_option < alloc_option_end;
 	     alloc_option++) {

From 630f7de9520efeec096a604ce02bc7aef7b46a94 Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@fb.com>
Date: Tue, 19 Sep 2023 14:37:09 -0700
Subject: [PATCH 138/395] Add mallctl to set and get ncached_max of each
 cache_bin.

1. `thread_tcache_ncached_max_read_sizeclass` allows users to get the
    ncached_max of the bin with the input sizeclass, passed in through
    oldp (will be upper casted if not an exact bin size is given).
2. `thread_tcache_ncached_max_write` takes in a char array
    representing the settings for bins in the tcache.
---
 Makefile.in                                   |   1 +
 include/jemalloc/internal/arena_inlines_b.h   |   3 +-
 include/jemalloc/internal/cache_bin.h         |  17 +-
 include/jemalloc/internal/ctl.h               |   1 +
 .../internal/jemalloc_internal_macros.h       |   2 +
 include/jemalloc/internal/tcache_externs.h    |   5 +-
 include/jemalloc/internal/tcache_inlines.h    |   7 +-
 include/jemalloc/internal/util.h              |   8 +
 src/cache_bin.c                               |   3 +-
 src/ctl.c                                     |  85 ++++++-
 src/jemalloc.c                                |  49 +---
 src/tcache.c                                  |  83 ++++++-
 src/util.c                                    |  49 ++++
 test/unit/tcache_max.c                        | 234 +++++++++++++++++-
 14 files changed, 477 insertions(+), 70 deletions(-)
 create mode 100644 src/util.c

diff --git a/Makefile.in b/Makefile.in
index 3a02b3fd..594ea4f2 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -155,6 +155,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/thread_event.c \
 	$(srcroot)src/ticker.c \
 	$(srcroot)src/tsd.c \
+	$(srcroot)src/util.c \
 	$(srcroot)src/witness.c
 ifeq ($(enable_zone_allocator), 1)
 C_SRCS += $(srcroot)src/zone.c
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index a4bacd8b..f8928a01 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -198,7 +198,8 @@ arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero,
 			assert(sz_can_use_slab(size));
 			return tcache_alloc_small(tsdn_tsd(tsdn), arena,
 			    tcache, size, ind, zero, slow_path);
-		} else if (likely(ind < TCACHE_NBINS_MAX &&
+		} else if (likely(
+		    ind < tcache_nbins_get(tcache->tcache_slow) &&
 		    !tcache_bin_disabled(ind, &tcache->bins[ind],
 		    tcache->tcache_slow))) {
 			return tcache_alloc_large(tsdn_tsd(tsdn), arena,
diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 2c831caf..e2da3b90 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -210,6 +210,11 @@ cache_bin_info_ncached_max_get(cache_bin_t *bin, cache_bin_info_t *info) {
 	return info->ncached_max;
 }
 
+/* Gets ncached_max without asserting that the bin is enabled. */
+static inline cache_bin_sz_t
+cache_bin_ncached_max_get_unsafe(cache_bin_t *bin) {
+	return bin->bin_info.ncached_max;
+}
 /*
  * Internal.
  *
@@ -229,7 +234,7 @@ cache_bin_assert_earlier(cache_bin_t *bin, uint16_t earlier, uint16_t later) {
  * Does difference calculations that handle wraparound correctly.  Earlier must
  * be associated with the position earlier in memory.
  */
-static inline uint16_t
+static inline cache_bin_sz_t
 cache_bin_diff(cache_bin_t *bin, uint16_t earlier, uint16_t later) {
 	cache_bin_assert_earlier(bin, earlier, later);
 	return later - earlier;
@@ -584,19 +589,17 @@ cache_bin_nitems_get_remote(cache_bin_t *bin, cache_bin_info_t *info,
 	cache_bin_sz_t diff = bin->low_bits_empty -
 	    (uint16_t)(uintptr_t)bin->stack_head;
 	cache_bin_sz_t n = diff / sizeof(void *);
-
-	cache_bin_sz_t ncached_max = cache_bin_info_ncached_max_get(bin, info);
-	assert(n <= ncached_max);
 	*ncached = n;
 
 	/* Racy version of cache_bin_nstashed_get_internal. */
 	uint16_t low_bits_low_bound = cache_bin_low_bits_low_bound_get(bin,
 	    info);
 	n = (bin->low_bits_full - low_bits_low_bound) / sizeof(void *);
-
-	assert(n <= ncached_max);
 	*nstashed = n;
-	/* Note that cannot assert ncached + nstashed <= ncached_max (racy). */
+	/*
+	 * Note that cannot assert anything regarding ncached_max because
+	 * it can be configured on the fly and is thus racy.
+	 */
 }
 
 /*
diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index f38236f6..1f124bfc 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -14,6 +14,7 @@
 
 /* Maximum ctl tree depth. */
 #define CTL_MAX_DEPTH	7
+#define CTL_MULTI_SETTING_MAX_LEN 1000
 
 typedef struct ctl_node_s {
 	bool named;
diff --git a/include/jemalloc/internal/jemalloc_internal_macros.h b/include/jemalloc/internal/jemalloc_internal_macros.h
index 9abcbb20..40df5feb 100644
--- a/include/jemalloc/internal/jemalloc_internal_macros.h
+++ b/include/jemalloc/internal/jemalloc_internal_macros.h
@@ -37,8 +37,10 @@
 /* Various function pointers are static and immutable except during testing. */
 #ifdef JEMALLOC_JET
 #  define JET_MUTABLE
+#  define JET_EXTERN extern
 #else
 #  define JET_MUTABLE const
+#  define JET_EXTERN static
 #endif
 
 #define JEMALLOC_VA_ARGS_HEAD(head, ...) head
diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index 8ca966d7..aa7ca00f 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -26,7 +26,7 @@ extern unsigned opt_lg_tcache_flush_large_div;
  * it should not be changed on the fly.  To change the number of tcache bins
  * in use, refer to tcache_nbins of each tcache.
  */
-extern unsigned	global_do_not_change_nbins;
+extern unsigned	global_do_not_change_tcache_nbins;
 
 /*
  * Maximum cached size class.  Same as above, this is only used during threads
@@ -55,6 +55,9 @@ void tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache,
     cache_bin_t *cache_bin, szind_t binind, unsigned rem);
 void tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache,
     cache_bin_t *cache_bin, szind_t binind, bool is_small);
+bool tcache_bins_ncached_max_write(tsd_t *tsd, char *settings, size_t len);
+bool tcache_bin_ncached_max_read(tsd_t *tsd, size_t bin_size,
+    cache_bin_sz_t *ncached_max);
 void tcache_arena_reassociate(tsdn_t *tsdn, tcache_slow_t *tcache_slow,
     tcache_t *tcache, arena_t *arena);
 tcache_t *tcache_create_explicit(tsd_t *tsd);
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 68481113..05599a5b 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -46,7 +46,7 @@ tcache_bin_settings_backup(tcache_t *tcache,
     cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]) {
 	for (unsigned i = 0; i < TCACHE_NBINS_MAX; i++) {
 		cache_bin_info_init(&tcache_bin_info[i],
-		    tcache->bins[i].bin_info.ncached_max);
+		    cache_bin_ncached_max_get_unsafe(&tcache->bins[i]));
 	}
 }
 
@@ -54,6 +54,7 @@ JEMALLOC_ALWAYS_INLINE bool
 tcache_bin_disabled(szind_t ind, cache_bin_t *bin,
     tcache_slow_t *tcache_slow) {
 	assert(bin != NULL);
+	assert(ind < TCACHE_NBINS_MAX);
 	bool disabled = cache_bin_disabled(bin);
 
 	/*
@@ -66,7 +67,7 @@ tcache_bin_disabled(szind_t ind, cache_bin_t *bin,
 	 * ind < nbins and ncached_max > 0.
 	 */
 	unsigned nbins = tcache_nbins_get(tcache_slow);
-	cache_bin_sz_t ncached_max = bin->bin_info.ncached_max;
+	cache_bin_sz_t ncached_max = cache_bin_ncached_max_get_unsafe(bin);
 	if (ind >= nbins) {
 		assert(disabled);
 	} else {
@@ -215,6 +216,8 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 	assert(tcache_salloc(tsd_tsdn(tsd), ptr) > SC_SMALL_MAXCLASS);
 	assert(tcache_salloc(tsd_tsdn(tsd), ptr) <=
 	    tcache_max_get(tcache->tcache_slow));
+	assert(!tcache_bin_disabled(binind, &tcache->bins[binind],
+	    tcache->tcache_slow));
 
 	cache_bin_t *bin = &tcache->bins[binind];
 	if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h
index 2c35ef76..f4035095 100644
--- a/include/jemalloc/internal/util.h
+++ b/include/jemalloc/internal/util.h
@@ -130,4 +130,12 @@ util_prefetch_write_range(void *ptr, size_t sz) {
 
 #undef UTIL_INLINE
 
+/*
+ * Reads the settings in the following format:
+ * key1-key2:value|key3-key4:value|...
+ * Note it does not handle the ending '\0'.
+ */
+bool
+multi_setting_parse_next(const char **setting_segment_cur, size_t *len_left,
+    size_t *key_start, size_t *key_end, size_t *value);
 #endif /* JEMALLOC_INTERNAL_UTIL_H */
diff --git a/src/cache_bin.c b/src/cache_bin.c
index 67b6327b..24dabd0b 100644
--- a/src/cache_bin.c
+++ b/src/cache_bin.c
@@ -10,8 +10,9 @@ const uintptr_t disabled_bin = JUNK_ADDR;
 void
 cache_bin_info_init(cache_bin_info_t *info,
     cache_bin_sz_t ncached_max) {
+	assert(ncached_max <= CACHE_BIN_NCACHED_MAX);
 	size_t stack_size = (size_t)ncached_max * sizeof(void *);
-	assert(stack_size < ((size_t)1 << (sizeof(cache_bin_sz_t) * 8)));
+	assert(stack_size <= UINT16_MAX);
 	info->ncached_max = (cache_bin_sz_t)ncached_max;
 }
 
diff --git a/src/ctl.c b/src/ctl.c
index af22d0aa..93144752 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -68,6 +68,8 @@ CTL_PROTO(max_background_threads)
 CTL_PROTO(thread_tcache_enabled)
 CTL_PROTO(thread_tcache_max)
 CTL_PROTO(thread_tcache_flush)
+CTL_PROTO(thread_tcache_ncached_max_write)
+CTL_PROTO(thread_tcache_ncached_max_read_sizeclass)
 CTL_PROTO(thread_peak_read)
 CTL_PROTO(thread_peak_reset)
 CTL_PROTO(thread_prof_name)
@@ -374,10 +376,17 @@ CTL_PROTO(stats_mutexes_reset)
  */
 #define INDEX(i)	{false},	i##_index
 
+static const ctl_named_node_t	thread_tcache_ncached_max_node[] = {
+	{NAME("read_sizeclass"),
+	    CTL(thread_tcache_ncached_max_read_sizeclass)},
+	{NAME("write"),		CTL(thread_tcache_ncached_max_write)}
+};
+
 static const ctl_named_node_t	thread_tcache_node[] = {
 	{NAME("enabled"),	CTL(thread_tcache_enabled)},
 	{NAME("max"),		CTL(thread_tcache_max)},
-	{NAME("flush"),		CTL(thread_tcache_flush)}
+	{NAME("flush"),		CTL(thread_tcache_flush)},
+	{NAME("ncached_max"),	CHILD(named, thread_tcache_ncached_max)}
 };
 
 static const ctl_named_node_t	thread_peak_node[] = {
@@ -2282,6 +2291,78 @@ label_return:
 
 CTL_RO_NL_GEN(thread_allocated, tsd_thread_allocated_get(tsd), uint64_t)
 CTL_RO_NL_GEN(thread_allocatedp, tsd_thread_allocatedp_get(tsd), uint64_t *)
+
+static int
+thread_tcache_ncached_max_read_sizeclass_ctl(tsd_t *tsd, const size_t *mib,
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
+    size_t newlen) {
+	int ret;
+	size_t bin_size = 0;
+
+	/* Read the bin size from newp. */
+	if (newp == NULL) {
+		ret = EINVAL;
+		goto label_return;
+	}
+	WRITE(bin_size, size_t);
+
+	cache_bin_sz_t ncached_max = 0;
+	if (tcache_bin_ncached_max_read(tsd, bin_size, &ncached_max)) {
+		ret = EINVAL;
+		goto label_return;
+	}
+	size_t result = (size_t)ncached_max;
+	READ(result, size_t);
+	ret = 0;
+label_return:
+	return ret;
+}
+
+static int
+thread_tcache_ncached_max_write_ctl(tsd_t *tsd, const size_t *mib,
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
+    size_t newlen) {
+	int ret;
+	WRITEONLY();
+	if (newp != NULL) {
+		if (!tcache_available(tsd)) {
+			ret = ENOENT;
+			goto label_return;
+		}
+		char *settings = NULL;
+		WRITE(settings, char *);
+		if (settings == NULL) {
+			ret = EINVAL;
+			goto label_return;
+		}
+		/* Get the length of the setting string safely. */
+		char *end = (char *)memchr(settings, '\0',
+		    CTL_MULTI_SETTING_MAX_LEN);
+		if (end == NULL) {
+			ret = EINVAL;
+			goto label_return;
+		}
+		/*
+		 * Exclude the last '\0' for len since it is not handled by
+		 * multi_setting_parse_next.
+		 */
+		size_t len = (uintptr_t)end - (uintptr_t)settings;
+		if (len == 0) {
+			ret = 0;
+			goto label_return;
+		}
+
+		if (tcache_bins_ncached_max_write(tsd, settings, len)) {
+			ret = EINVAL;
+			goto label_return;
+		}
+	}
+
+	ret = 0;
+label_return:
+	return ret;
+}
+
 CTL_RO_NL_GEN(thread_deallocated, tsd_thread_deallocated_get(tsd), uint64_t)
 CTL_RO_NL_GEN(thread_deallocatedp, tsd_thread_deallocatedp_get(tsd), uint64_t *)
 
@@ -3155,7 +3236,7 @@ CTL_RO_NL_GEN(arenas_quantum, QUANTUM, size_t)
 CTL_RO_NL_GEN(arenas_page, PAGE, size_t)
 CTL_RO_NL_GEN(arenas_tcache_max, global_do_not_change_tcache_maxclass, size_t)
 CTL_RO_NL_GEN(arenas_nbins, SC_NBINS, unsigned)
-CTL_RO_NL_GEN(arenas_nhbins, global_do_not_change_nbins, unsigned)
+CTL_RO_NL_GEN(arenas_nhbins, global_do_not_change_tcache_nbins, unsigned)
 CTL_RO_NL_GEN(arenas_bin_i_size, bin_infos[mib[2]].reg_size, size_t)
 CTL_RO_NL_GEN(arenas_bin_i_nregs, bin_infos[mib[2]].nregs, uint32_t)
 CTL_RO_NL_GEN(arenas_bin_i_slab_size, bin_infos[mib[2]].slab_size, size_t)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 4bf5cbff..9c4e578e 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -821,50 +821,6 @@ init_opt_stats_opts(const char *v, size_t vlen, char *dest) {
 	assert(opts_len == strlen(dest));
 }
 
-/* Reads the next size pair in a multi-sized option. */
-static bool
-malloc_conf_multi_sizes_next(const char **slab_size_segment_cur,
-    size_t *vlen_left, size_t *slab_start, size_t *slab_end, size_t *new_size) {
-	const char *cur = *slab_size_segment_cur;
-	char *end;
-	uintmax_t um;
-
-	set_errno(0);
-
-	/* First number, then '-' */
-	um = malloc_strtoumax(cur, &end, 0);
-	if (get_errno() != 0 || *end != '-') {
-		return true;
-	}
-	*slab_start = (size_t)um;
-	cur = end + 1;
-
-	/* Second number, then ':' */
-	um = malloc_strtoumax(cur, &end, 0);
-	if (get_errno() != 0 || *end != ':') {
-		return true;
-	}
-	*slab_end = (size_t)um;
-	cur = end + 1;
-
-	/* Last number */
-	um = malloc_strtoumax(cur, &end, 0);
-	if (get_errno() != 0) {
-		return true;
-	}
-	*new_size = (size_t)um;
-
-	/* Consume the separator if there is one. */
-	if (*end == '|') {
-		end++;
-	}
-
-	*vlen_left -= end - *slab_size_segment_cur;
-	*slab_size_segment_cur = end;
-
-	return false;
-}
-
 static void
 malloc_conf_format_error(const char *msg, const char *begin, const char *end) {
 	size_t len = end - begin + 1;
@@ -1351,7 +1307,7 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 					size_t size_start;
 					size_t size_end;
 					size_t nshards;
-					bool err = malloc_conf_multi_sizes_next(
+					bool err = multi_setting_parse_next(
 					    &bin_shards_segment_cur, &vlen_left,
 					    &size_start, &size_end, &nshards);
 					if (err || bin_update_shard_size(
@@ -1613,7 +1569,7 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 					size_t slab_start;
 					size_t slab_end;
 					size_t pgs;
-					err = malloc_conf_multi_sizes_next(
+					err = multi_setting_parse_next(
 					    &slab_size_segment_cur,
 					    &vlen_left, &slab_start, &slab_end,
 					    &pgs);
@@ -4140,6 +4096,7 @@ batch_alloc(void **ptrs, size_t num, size_t size, int flags) {
 		tcache_t *tcache = tcache_get_from_ind(tsd, tcache_ind,
 		    /* slow */ true, /* is_alloc */ true);
 		if (likely(tcache != NULL &&
+		    ind < tcache_nbins_get(tcache->tcache_slow) &&
 		    !tcache_bin_disabled(ind, &tcache->bins[ind],
 		    tcache->tcache_slow)) && progress < batch) {
 			if (bin == NULL) {
diff --git a/src/tcache.c b/src/tcache.c
index 3070193c..3fc2cae2 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -63,7 +63,7 @@ unsigned opt_lg_tcache_flush_large_div = 1;
  * is only used to initialize tcache_nbins in the per-thread tcache.
  * Directly modifying it will not affect threads already launched.
  */
-unsigned		global_do_not_change_nbins;
+unsigned		global_do_not_change_tcache_nbins;
 /*
  * Max size class to be cached (can be small or large). This value is only used
  * to initialize tcache_max in the per-thread tcache.   Directly modifying it
@@ -193,8 +193,7 @@ tcache_event(tsd_t *tsd) {
 		goto label_done;
 	}
 
-	tcache_bin_flush_stashed(tsd, tcache, cache_bin, szind,
-	    is_small);
+	tcache_bin_flush_stashed(tsd, tcache, cache_bin, szind, is_small);
 	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin,
 	    &cache_bin->bin_info);
 	if (low_water > 0) {
@@ -591,6 +590,28 @@ tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 	assert(head_content == *cache_bin->stack_head);
 }
 
+bool
+tcache_bin_ncached_max_read(tsd_t *tsd, size_t bin_size,
+    cache_bin_sz_t *ncached_max) {
+	if (bin_size > TCACHE_MAXCLASS_LIMIT) {
+		return true;
+	}
+
+	if (!tcache_available(tsd)) {
+		*ncached_max = 0;
+		return false;
+	}
+
+	tcache_t *tcache = tsd_tcachep_get(tsd);
+	assert(tcache != NULL);
+	szind_t bin_ind = sz_size2index(bin_size);
+
+	cache_bin_t *bin = &tcache->bins[bin_ind];
+	*ncached_max = tcache_bin_disabled(bin_ind, bin, tcache->tcache_slow) ?
+	    0: cache_bin_info_ncached_max_get(bin, &bin->bin_info);
+	return false;
+}
+
 void
 tcache_arena_associate(tsdn_t *tsdn, tcache_slow_t *tcache_slow,
     tcache_t *tcache, arena_t *arena) {
@@ -651,8 +672,8 @@ static void
 tcache_default_settings_init(tcache_slow_t *tcache_slow) {
 	assert(tcache_slow != NULL);
 	assert(global_do_not_change_tcache_maxclass != 0);
-	assert(global_do_not_change_nbins != 0);
-	tcache_slow->tcache_nbins = global_do_not_change_nbins;
+	assert(global_do_not_change_tcache_nbins != 0);
+	tcache_slow->tcache_nbins = global_do_not_change_tcache_nbins;
 }
 
 static void
@@ -772,7 +793,7 @@ tcache_ncached_max_compute(szind_t szind) {
 	}
 }
 
-static void
+JET_EXTERN void
 tcache_bin_info_compute(cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]) {
 	/*
 	 * Compute the values for each bin, but for bins with indices larger
@@ -866,7 +887,7 @@ tcache_create_explicit(tsd_t *tsd) {
 	 * the beginning of the whole allocation (for freeing).  The makes sure
 	 * the cache bins have the requested alignment.
 	 */
-	unsigned tcache_nbins = global_do_not_change_nbins;
+	unsigned tcache_nbins = global_do_not_change_tcache_nbins;
 	size_t tcache_size, alignment;
 	cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX] = {{0}};
 	tcache_bin_info_compute(tcache_bin_info);
@@ -963,6 +984,52 @@ thread_tcache_max_set(tsd_t *tsd, size_t tcache_max) {
 	assert(tcache_nbins_get(tcache_slow) == sz_size2index(tcache_max) + 1);
 }
 
+bool
+tcache_bins_ncached_max_write(tsd_t *tsd, char *settings, size_t len) {
+	assert(tcache_available(tsd));
+	tcache_t *tcache = tsd_tcachep_get(tsd);
+	assert(tcache != NULL);
+	cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX];
+	tcache_bin_settings_backup(tcache, tcache_bin_info);
+	const char *bin_settings_segment_cur = settings;
+	size_t len_left = len;
+	assert(len_left != 0);
+
+	do {
+		size_t size_start, size_end;
+		size_t ncached_max;
+		bool err = multi_setting_parse_next(&bin_settings_segment_cur,
+		    &len_left, &size_start, &size_end, &ncached_max);
+		if (err) {
+			return true;
+		}
+		if (size_end > TCACHE_MAXCLASS_LIMIT) {
+			size_end = TCACHE_MAXCLASS_LIMIT;
+		}
+		if (size_start > TCACHE_MAXCLASS_LIMIT ||
+		    size_start > size_end) {
+			continue;
+		}
+		/* May get called before sz_init (during malloc_conf_init). */
+		szind_t bin_start = sz_size2index_compute(size_start);
+		szind_t bin_end = sz_size2index_compute(size_end);
+		if (ncached_max > CACHE_BIN_NCACHED_MAX) {
+			ncached_max = (size_t)CACHE_BIN_NCACHED_MAX;
+		}
+		for (szind_t i = bin_start; i <= bin_end; i++) {
+			cache_bin_info_init(&tcache_bin_info[i],
+			    (cache_bin_sz_t)ncached_max);
+		}
+	} while (len_left > 0);
+
+	arena_t *assigned_arena = tcache->tcache_slow->arena;
+	tcache_cleanup(tsd);
+	tsd_tcache_data_init_with_bin_settings(tsd, assigned_arena,
+	    tcache_bin_info);
+
+	return false;
+}
+
 static void
 tcache_flush_cache(tsd_t *tsd, tcache_t *tcache) {
 	tcache_slow_t *tcache_slow = tcache->tcache_slow;
@@ -1180,7 +1247,7 @@ bool
 tcache_boot(tsdn_t *tsdn, base_t *base) {
 	global_do_not_change_tcache_maxclass = sz_s2u(opt_tcache_max);
 	assert(global_do_not_change_tcache_maxclass <= TCACHE_MAXCLASS_LIMIT);
-	global_do_not_change_nbins =
+	global_do_not_change_tcache_nbins =
 	    sz_size2index(global_do_not_change_tcache_maxclass) + 1;
 
 	if (malloc_mutex_init(&tcaches_mtx, "tcaches", WITNESS_RANK_TCACHES,
diff --git a/src/util.c b/src/util.c
new file mode 100644
index 00000000..b73848fb
--- /dev/null
+++ b/src/util.c
@@ -0,0 +1,49 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/util.h"
+
+/* Reads the next size pair in a multi-sized option. */
+bool
+multi_setting_parse_next(const char **setting_segment_cur, size_t *len_left,
+    size_t *key_start, size_t *key_end, size_t *value) {
+	const char *cur = *setting_segment_cur;
+	char *end;
+	uintmax_t um;
+
+	set_errno(0);
+
+	/* First number, then '-' */
+	um = malloc_strtoumax(cur, &end, 0);
+	if (get_errno() != 0 || *end != '-') {
+		return true;
+	}
+	*key_start = (size_t)um;
+	cur = end + 1;
+
+	/* Second number, then ':' */
+	um = malloc_strtoumax(cur, &end, 0);
+	if (get_errno() != 0 || *end != ':') {
+		return true;
+	}
+	*key_end = (size_t)um;
+	cur = end + 1;
+
+	/* Last number */
+	um = malloc_strtoumax(cur, &end, 0);
+	if (get_errno() != 0) {
+		return true;
+	}
+	*value = (size_t)um;
+
+	/* Consume the separator if there is one. */
+	if (*end == '|') {
+		end++;
+	}
+
+	*len_left -= end - *setting_segment_cur;
+	*setting_segment_cur = end;
+
+	return false;
+}
+
diff --git a/test/unit/tcache_max.c b/test/unit/tcache_max.c
index 53752463..5793cb6b 100644
--- a/test/unit/tcache_max.c
+++ b/test/unit/tcache_max.c
@@ -2,6 +2,8 @@
 #include "test/san.h"
 
 const char *malloc_conf = TEST_SAN_UAF_ALIGN_DISABLE;
+extern void tcache_bin_info_compute(
+    cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]);
 
 enum {
 	alloc_option_start = 0,
@@ -260,7 +262,7 @@ tcache_check(void *arg) {
 	expect_zu_eq(old_tcache_max, opt_tcache_max,
 	    "Unexpected default value for tcache_max");
 	tcache_nbins = tcache_nbins_get(tcache_slow);
-	expect_zu_eq(tcache_nbins, (size_t)global_do_not_change_nbins,
+	expect_zu_eq(tcache_nbins, (size_t)global_do_not_change_tcache_nbins,
 	    "Unexpected default value for tcache_nbins");
 	validate_tcache_stack(tcache);
 
@@ -364,10 +366,238 @@ TEST_BEGIN(test_thread_tcache_max) {
 }
 TEST_END
 
+static void
+check_bins_info(cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]) {
+	size_t mib_get[4], mib_get_len;
+	mib_get_len = sizeof(mib_get) / sizeof(size_t);
+	const char *get_name = "thread.tcache.ncached_max.read_sizeclass";
+	size_t ncached_max;
+	size_t sz = sizeof(size_t);
+	expect_d_eq(mallctlnametomib(get_name, mib_get, &mib_get_len), 0,
+	    "Unexpected mallctlnametomib() failure");
+
+	for (szind_t i = 0; i < TCACHE_NBINS_MAX; i++) {
+		size_t bin_size = sz_index2size(i);
+		expect_d_eq(mallctlbymib(mib_get, mib_get_len,
+		    (void *)&ncached_max, &sz,
+		    (void *)&bin_size, sizeof(size_t)), 0,
+		    "Unexpected mallctlbymib() failure");
+		expect_zu_eq(ncached_max, tcache_bin_info[i].ncached_max,
+		    "Unexpected ncached_max for bin %d", i);
+		/* Check ncached_max returned under a non-bin size. */
+		bin_size--;
+		size_t temp_ncached_max = 0;
+		expect_d_eq(mallctlbymib(mib_get, mib_get_len,
+		    (void *)&temp_ncached_max, &sz,
+		    (void *)&bin_size, sizeof(size_t)), 0,
+		    "Unexpected mallctlbymib() failure");
+		expect_zu_eq(temp_ncached_max, ncached_max,
+		    "Unexpected ncached_max for inaccurate bin size.");
+	}
+}
+
+static void *
+ncached_max_check(void* args) {
+	cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX];
+	cache_bin_info_t tcache_bin_info_backup[TCACHE_NBINS_MAX];
+	tsd_t *tsd = tsd_fetch();
+	tcache_t *tcache = tsd_tcachep_get(tsd);
+	assert(tcache != NULL);
+	tcache_slow_t *tcache_slow = tcache->tcache_slow;
+
+	/* Check the initial bin settings. */
+	tcache_bin_info_compute(tcache_bin_info);
+	memcpy(tcache_bin_info_backup, tcache_bin_info,
+	    sizeof(tcache_bin_info));
+	unsigned nbins = tcache_nbins_get(tcache_slow);
+	for (szind_t i = nbins; i < TCACHE_NBINS_MAX; i++) {
+		cache_bin_info_init(&tcache_bin_info[i], 0);
+	}
+	check_bins_info(tcache_bin_info);
+
+	size_t mib_set[4], mib_set_len;
+	mib_set_len = sizeof(mib_set) / sizeof(size_t);
+	const char *set_name = "thread.tcache.ncached_max.write";
+	expect_d_eq(mallctlnametomib(set_name, mib_set, &mib_set_len), 0,
+	    "Unexpected mallctlnametomib() failure");
+
+	/* Test the ncached_max set with tcache on. */
+	char inputs[100] = "8-128:1|160-160:11|170-320:22|224-8388609:0";
+	char *inputp = inputs;
+	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
+	    (void *)&inputp, sizeof(char *)), 0,
+	    "Unexpected mallctlbymib() failure");
+	for (szind_t i = 0; i < TCACHE_NBINS_MAX; i++) {
+		if (i >= sz_size2index(8) &&i <= sz_size2index(128)) {
+			cache_bin_info_init(&tcache_bin_info[i], 1);
+		}
+		if (i == sz_size2index(160)) {
+			cache_bin_info_init(&tcache_bin_info[i], 11);
+		}
+		if (i >= sz_size2index(170) && i <= sz_size2index(320)) {
+			cache_bin_info_init(&tcache_bin_info[i], 22);
+		}
+		if (i >= sz_size2index(224)) {
+			cache_bin_info_init(&tcache_bin_info[i], 0);
+		}
+		if (i >= nbins) {
+			cache_bin_info_init(&tcache_bin_info[i], 0);
+		}
+	}
+	check_bins_info(tcache_bin_info);
+
+	/*
+	 * Close the tcache and set ncached_max of some bins.  It will be
+	 * set properly but thread.tcache.ncached_max.read still returns 0
+	 * since the bin is not available yet.  After enabling the tcache,
+	 * the new setting will not be carried on.  Instead, the default
+	 * settings will be applied.
+	 */
+	bool e0 = false, e1;
+	size_t bool_sz = sizeof(bool);
+	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e1, &bool_sz,
+	    (void *)&e0, bool_sz), 0, "Unexpected mallctl() error");
+	expect_true(e1, "Unexpected previous tcache state");
+	strcpy(inputs, "0-112:8");
+	/* Setting returns ENOENT when the tcache is disabled. */
+	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
+	    (void *)&inputp, sizeof(char *)), ENOENT,
+	    "Unexpected mallctlbymib() failure");
+	/* All ncached_max should return 0 once tcache is disabled. */
+	for (szind_t i = 0; i < TCACHE_NBINS_MAX; i++) {
+		cache_bin_info_init(&tcache_bin_info[i], 0);
+	}
+	check_bins_info(tcache_bin_info);
+
+	e0 = true;
+	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e1, &bool_sz,
+	    (void *)&e0, bool_sz), 0, "Unexpected mallctl() error");
+	expect_false(e1, "Unexpected previous tcache state");
+	memcpy(tcache_bin_info, tcache_bin_info_backup,
+	    sizeof(tcache_bin_info_backup));
+	for (szind_t i = tcache_nbins_get(tcache_slow); i < TCACHE_NBINS_MAX;
+	    i++) {
+		cache_bin_info_init(&tcache_bin_info[i], 0);
+	}
+	check_bins_info(tcache_bin_info);
+
+	/*
+	 * Set ncached_max of bins not enabled yet.  Then, enable them by
+	 * resetting tcache_max.  The ncached_max changes should stay.
+	 */
+	size_t tcache_max = 1024;
+	assert_d_eq(mallctl("thread.tcache.max",
+	    NULL, NULL, (void *)&tcache_max, sizeof(size_t)),.0,
+	    "Unexpected.mallctl().failure");
+	for (szind_t i = sz_size2index(1024) + 1; i < TCACHE_NBINS_MAX; i++) {
+		cache_bin_info_init(&tcache_bin_info[i], 0);
+	}
+	strcpy(inputs, "2048-6144:123");
+	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
+	    (void *)&inputp, sizeof(char *)), 0,
+	    "Unexpected mallctlbymib() failure");
+	check_bins_info(tcache_bin_info);
+
+	tcache_max = 6144;
+	assert_d_eq(mallctl("thread.tcache.max",
+	    NULL, NULL, (void *)&tcache_max, sizeof(size_t)),.0,
+	    "Unexpected.mallctl().failure");
+	memcpy(tcache_bin_info, tcache_bin_info_backup,
+	    sizeof(tcache_bin_info_backup));
+	for (szind_t i = sz_size2index(2048); i < TCACHE_NBINS_MAX; i++) {
+		if (i <= sz_size2index(6144)) {
+			cache_bin_info_init(&tcache_bin_info[i], 123);
+		} else if (i > sz_size2index(6144)) {
+			cache_bin_info_init(&tcache_bin_info[i], 0);
+		}
+	}
+	check_bins_info(tcache_bin_info);
+
+	/* Test an empty input, it should do nothing. */
+	strcpy(inputs, "");
+	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
+	    (void *)&inputp, sizeof(char *)), 0,
+	    "Unexpected mallctlbymib() failure");
+	check_bins_info(tcache_bin_info);
+
+	/* Test a half-done string, it should return EINVAL and do nothing. */
+	strcpy(inputs, "4-1024:7|256-1024");
+	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
+	    (void *)&inputp, sizeof(char *)), EINVAL,
+	    "Unexpected mallctlbymib() failure");
+	check_bins_info(tcache_bin_info);
+
+	/*
+	 * Test an invalid string with start size larger than end size.  It
+	 * should return success but do nothing.
+	 */
+	strcpy(inputs, "1024-256:7");
+	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
+	    (void *)&inputp, sizeof(char *)), 0,
+	    "Unexpected mallctlbymib() failure");
+	check_bins_info(tcache_bin_info);
+
+	/*
+	 * Test a string exceeding the length limit, it should return EINVAL
+	 * and do nothing.
+	 */
+	char *long_inputs = (char *)malloc(10000 * sizeof(char));
+	expect_true(long_inputs != NULL, "Unexpected allocation failure.");
+	for (int i = 0; i < 200; i++) {
+		memcpy(long_inputs + i * 9, "4-1024:3|", 9);
+	}
+	memcpy(long_inputs + 200 * 9, "4-1024:3", 8);
+	long_inputs[200 * 9 + 8] = '\0';
+	inputp = long_inputs;
+	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
+	    (void *)&inputp, sizeof(char *)), EINVAL,
+	    "Unexpected mallctlbymib() failure");
+	check_bins_info(tcache_bin_info);
+	free(long_inputs);
+
+	/*
+	 * Test a string with invalid characters, it should return EINVAL
+	 * and do nothing.
+	 */
+	strcpy(inputs, "k8-1024:77p");
+	inputp = inputs;
+	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
+	    (void *)&inputp, sizeof(char *)), EINVAL,
+	    "Unexpected mallctlbymib() failure");
+	check_bins_info(tcache_bin_info);
+
+	/* Test large ncached_max, it should return success but capped. */
+	strcpy(inputs, "1024-1024:65540");
+	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
+	    (void *)&inputp, sizeof(char *)), 0,
+	    "Unexpected mallctlbymib() failure");
+	cache_bin_info_init(&tcache_bin_info[sz_size2index(1024)],
+	    CACHE_BIN_NCACHED_MAX);
+	check_bins_info(tcache_bin_info);
+
+	return NULL;
+}
+
+TEST_BEGIN(test_ncached_max) {
+	test_skip_if(!config_stats);
+	test_skip_if(!opt_tcache);
+	test_skip_if(san_uaf_detection_enabled());
+	unsigned nthreads = 8;
+	VARIABLE_ARRAY(thd_t, threads, nthreads);
+	for (unsigned i = 0; i < nthreads; i++) {
+		thd_create(&threads[i], ncached_max_check, NULL);
+	}
+	for (unsigned i = 0; i < nthreads; i++) {
+		thd_join(threads[i], NULL);
+	}
+}
+TEST_END
+
 int
 main(void) {
 	return test(
 	    test_tcache_max,
-	    test_thread_tcache_max);
+	    test_thread_tcache_max,
+	    test_ncached_max);
 }
 

From 867eedfc589039257deafe7492afa7aa9ab6169f Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@fb.com>
Date: Mon, 16 Oct 2023 15:31:13 -0700
Subject: [PATCH 139/395] Fix the bug in dalloc promoted allocations.

An allocation small enough will be promoted so that it does not
share an extent with others.  However, when dalloc, such allocations
may not be dalloc as a promoted one if nbins < SC_NBINS.  This
commit fixes the bug.
---
 include/jemalloc/internal/arena_inlines_b.h | 29 +++++++++++----------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index f8928a01..a891b35c 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -301,23 +301,24 @@ JEMALLOC_ALWAYS_INLINE void
 arena_dalloc_large(tsdn_t *tsdn, void *ptr, tcache_t *tcache, szind_t szind,
     bool slow_path) {
 	assert (!tsdn_null(tsdn) && tcache != NULL);
-	if (szind < TCACHE_NBINS_MAX &&
-	    !tcache_bin_disabled(szind, &tcache->bins[szind],
-	    tcache->tcache_slow)) {
-		if (config_prof && unlikely(szind < SC_NBINS)) {
-			arena_dalloc_promoted(tsdn, ptr, tcache, slow_path);
-		} else {
+	bool is_sample_promoted = config_prof && szind < SC_NBINS;
+	if (unlikely(is_sample_promoted)) {
+		arena_dalloc_promoted(tsdn, ptr, tcache, slow_path);
+	} else {
+		if (szind < tcache_nbins_get(tcache->tcache_slow) &&
+		    !tcache_bin_disabled(szind, &tcache->bins[szind],
+		    tcache->tcache_slow)) {
 			tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr, szind,
 			    slow_path);
+		} else {
+			edata_t *edata = emap_edata_lookup(tsdn,
+			    &arena_emap_global, ptr);
+			if (large_dalloc_safety_checks(edata, ptr, szind)) {
+				/* See the comment in isfree. */
+				return;
+			}
+			large_dalloc(tsdn, edata);
 		}
-	} else {
-		edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global,
-		    ptr);
-		if (large_dalloc_safety_checks(edata, ptr, szind)) {
-			/* See the comment in isfree. */
-			return;
-		}
-		large_dalloc(tsdn, edata);
 	}
 }
 

From 8a22d10b834cb66cce3e62dfc7606d8a491fe50b Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@fb.com>
Date: Wed, 11 Oct 2023 00:30:52 -0700
Subject: [PATCH 140/395] Allow setting default ncached_max for each bin
 through malloc_conf

---
 Makefile.in                                |   1 +
 include/jemalloc/internal/tcache_externs.h |   8 +
 src/jemalloc.c                             |  12 +
 src/tcache.c                               |  47 +++-
 test/unit/ncached_max.c                    | 264 +++++++++++++++++++++
 test/unit/tcache_max.c                     | 230 +-----------------
 6 files changed, 321 insertions(+), 241 deletions(-)
 create mode 100644 test/unit/ncached_max.c

diff --git a/Makefile.in b/Makefile.in
index 594ea4f2..df244adb 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -242,6 +242,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/mq.c \
 	$(srcroot)test/unit/mtx.c \
 	$(srcroot)test/unit/nstime.c \
+	$(srcroot)test/unit/ncached_max.c \
 	$(srcroot)test/unit/oversize_threshold.c \
 	$(srcroot)test/unit/pa.c \
 	$(srcroot)test/unit/pack.c \
diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index aa7ca00f..973dbfe9 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -35,6 +35,11 @@ extern unsigned	global_do_not_change_tcache_nbins;
  */
 extern size_t	global_do_not_change_tcache_maxclass;
 
+/* Default bin info for each bin. */
+extern cache_bin_info_t opt_tcache_ncached_max[TCACHE_NBINS_MAX];
+/* Records whether a bin's info is specified by malloc_conf. */
+extern bool opt_tcache_ncached_max_set[TCACHE_NBINS_MAX];
+
 /*
  * Explicit tcaches, managed via the tcache.{create,flush,destroy} mallctls and
  * usable via the MALLOCX_TCACHE() flag.  The automatic per thread tcaches are
@@ -55,6 +60,9 @@ void tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache,
     cache_bin_t *cache_bin, szind_t binind, unsigned rem);
 void tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache,
     cache_bin_t *cache_bin, szind_t binind, bool is_small);
+bool tcache_bin_info_settings_parse(const char *bin_settings_segment_cur,
+    size_t len_left, cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX],
+    bool bin_info_is_set[TCACHE_NBINS_MAX]);
 bool tcache_bins_ncached_max_write(tsd_t *tsd, char *settings, size_t len);
 bool tcache_bin_ncached_max_read(tsd_t *tsd, size_t bin_size,
     cache_bin_sz_t *ncached_max);
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 9c4e578e..c77f2ef2 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1322,6 +1322,18 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				} while (vlen_left > 0);
 				CONF_CONTINUE;
 			}
+			if (CONF_MATCH("tcache_ncached_max")) {
+				bool err = tcache_bin_info_settings_parse(
+				    v, vlen, opt_tcache_ncached_max,
+				    opt_tcache_ncached_max_set);
+				if (err) {
+					CONF_ERROR("Invalid settings for "
+					    "tcache_ncached_max", k, klen, v,
+					    vlen);
+					break;
+				}
+				CONF_CONTINUE;
+			}
 			CONF_HANDLE_INT64_T(opt_mutex_max_spin,
 			    "mutex_max_spin", -1, INT64_MAX, CONF_CHECK_MIN,
 			    CONF_DONT_CHECK_MAX, false);
diff --git a/src/tcache.c b/src/tcache.c
index 3fc2cae2..a8eaf296 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -71,6 +71,15 @@ unsigned		global_do_not_change_tcache_nbins;
  */
 size_t			global_do_not_change_tcache_maxclass;
 
+/* Default bin info for each bin.  Will be initialized when thread starts. */
+cache_bin_info_t opt_tcache_ncached_max[TCACHE_NBINS_MAX] = {{0}};
+/*
+ * Marks whether a bin's info is set already.  This is used in
+ * tcache_bin_info_compute to avoid overwriting ncached_max specified by
+ * malloc_conf.
+ */
+bool opt_tcache_ncached_max_set[TCACHE_NBINS_MAX] = {0};
+
 tcaches_t		*tcaches;
 
 /* Index of first element within tcaches that has never been used. */
@@ -800,7 +809,9 @@ tcache_bin_info_compute(cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]) {
 	 * than tcache_nbins, no items will be cached.
 	 */
 	for (szind_t i = 0; i < TCACHE_NBINS_MAX; i++) {
-		unsigned ncached_max = tcache_ncached_max_compute(i);
+		unsigned ncached_max = opt_tcache_ncached_max_set[i] ?
+		    opt_tcache_ncached_max[i].ncached_max:
+		    tcache_ncached_max_compute(i);
 		assert(ncached_max <= CACHE_BIN_NCACHED_MAX);
 		cache_bin_info_init(&tcache_bin_info[i], ncached_max);
 	}
@@ -984,17 +995,9 @@ thread_tcache_max_set(tsd_t *tsd, size_t tcache_max) {
 	assert(tcache_nbins_get(tcache_slow) == sz_size2index(tcache_max) + 1);
 }
 
-bool
-tcache_bins_ncached_max_write(tsd_t *tsd, char *settings, size_t len) {
-	assert(tcache_available(tsd));
-	tcache_t *tcache = tsd_tcachep_get(tsd);
-	assert(tcache != NULL);
-	cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX];
-	tcache_bin_settings_backup(tcache, tcache_bin_info);
-	const char *bin_settings_segment_cur = settings;
-	size_t len_left = len;
-	assert(len_left != 0);
-
+bool tcache_bin_info_settings_parse(const char *bin_settings_segment_cur,
+    size_t len_left, cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX],
+    bool bin_info_is_set[TCACHE_NBINS_MAX]) {
 	do {
 		size_t size_start, size_end;
 		size_t ncached_max;
@@ -1019,9 +1022,29 @@ tcache_bins_ncached_max_write(tsd_t *tsd, char *settings, size_t len) {
 		for (szind_t i = bin_start; i <= bin_end; i++) {
 			cache_bin_info_init(&tcache_bin_info[i],
 			    (cache_bin_sz_t)ncached_max);
+			if (bin_info_is_set != NULL) {
+				bin_info_is_set[i] = true;
+			}
 		}
 	} while (len_left > 0);
 
+	return false;
+}
+
+bool
+tcache_bins_ncached_max_write(tsd_t *tsd, char *settings, size_t len) {
+	assert(tcache_available(tsd));
+	assert(len != 0);
+	tcache_t *tcache = tsd_tcachep_get(tsd);
+	assert(tcache != NULL);
+	cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX];
+	tcache_bin_settings_backup(tcache, tcache_bin_info);
+
+	if(tcache_bin_info_settings_parse(settings, len, tcache_bin_info,
+	    NULL)) {
+		return true;
+	}
+
 	arena_t *assigned_arena = tcache->tcache_slow->arena;
 	tcache_cleanup(tsd);
 	tsd_tcache_data_init_with_bin_settings(tsd, assigned_arena,
diff --git a/test/unit/ncached_max.c b/test/unit/ncached_max.c
new file mode 100644
index 00000000..da35d7c9
--- /dev/null
+++ b/test/unit/ncached_max.c
@@ -0,0 +1,264 @@
+#include "test/jemalloc_test.h"
+#include "test/san.h"
+
+const char *malloc_conf =
+"tcache_ncached_max:256-1024:1001|2048-2048:0,tcache_max:4096";
+extern void tcache_bin_info_compute(
+    cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]);
+
+static void
+check_bins_info(cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]) {
+	size_t mib_get[4], mib_get_len;
+	mib_get_len = sizeof(mib_get) / sizeof(size_t);
+	const char *get_name = "thread.tcache.ncached_max.read_sizeclass";
+	size_t ncached_max;
+	size_t sz = sizeof(size_t);
+	expect_d_eq(mallctlnametomib(get_name, mib_get, &mib_get_len), 0,
+	    "Unexpected mallctlnametomib() failure");
+
+	for (szind_t i = 0; i < TCACHE_NBINS_MAX; i++) {
+		size_t bin_size = sz_index2size(i);
+		expect_d_eq(mallctlbymib(mib_get, mib_get_len,
+		    (void *)&ncached_max, &sz,
+		    (void *)&bin_size, sizeof(size_t)), 0,
+		    "Unexpected mallctlbymib() failure");
+		expect_zu_eq(ncached_max, tcache_bin_info[i].ncached_max,
+		    "Unexpected ncached_max for bin %d", i);
+		/* Check ncached_max returned under a non-bin size. */
+		bin_size--;
+		size_t temp_ncached_max = 0;
+		expect_d_eq(mallctlbymib(mib_get, mib_get_len,
+		    (void *)&temp_ncached_max, &sz,
+		    (void *)&bin_size, sizeof(size_t)), 0,
+		    "Unexpected mallctlbymib() failure");
+		expect_zu_eq(temp_ncached_max, ncached_max,
+		    "Unexpected ncached_max for inaccurate bin size.");
+	}
+}
+
+static void *
+ncached_max_check(void* args) {
+	cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX];
+	cache_bin_info_t tcache_bin_info_backup[TCACHE_NBINS_MAX];
+	tsd_t *tsd = tsd_fetch();
+	tcache_t *tcache = tsd_tcachep_get(tsd);
+	assert(tcache != NULL);
+	tcache_slow_t *tcache_slow = tcache->tcache_slow;
+
+
+	tcache_bin_info_compute(tcache_bin_info);
+	memcpy(tcache_bin_info_backup, tcache_bin_info,
+	    sizeof(tcache_bin_info));
+	/* Check ncached_max set by malloc_conf. */
+	for (szind_t i = 0; i < TCACHE_NBINS_MAX; i++) {
+		bool first_range = (i >= sz_size2index(256) &&
+		    i <= sz_size2index(1024));
+		bool second_range = (i == sz_size2index(2048));
+		cache_bin_sz_t target_ncached_max = 0;
+		if (first_range || second_range) {
+			target_ncached_max = first_range ? 1001: 0;
+			expect_true(opt_tcache_ncached_max_set[i],
+			    "Unexpected state for bin %u", i);
+			expect_zu_eq(target_ncached_max,
+			    tcache_bin_info[i].ncached_max,
+			    "Unexpected generated ncached_max for bin %u", i);
+		} else {
+			expect_false(opt_tcache_ncached_max_set[i],
+			    "Unexpected state for bin %u", i);
+		}
+		expect_zu_eq(target_ncached_max,
+		    opt_tcache_ncached_max[i].ncached_max,
+		    "Unexpected pre-set ncached_max for bin %u", i);
+	}
+	unsigned nbins = tcache_nbins_get(tcache_slow);
+	for (szind_t i = nbins; i < TCACHE_NBINS_MAX; i++) {
+		cache_bin_info_init(&tcache_bin_info[i], 0);
+	}
+	/* Check the initial bin settings. */
+	check_bins_info(tcache_bin_info);
+
+	size_t mib_set[4], mib_set_len;
+	mib_set_len = sizeof(mib_set) / sizeof(size_t);
+	const char *set_name = "thread.tcache.ncached_max.write";
+	expect_d_eq(mallctlnametomib(set_name, mib_set, &mib_set_len), 0,
+	    "Unexpected mallctlnametomib() failure");
+
+	/* Test the ncached_max set with tcache on. */
+	char inputs[100] = "8-128:1|160-160:11|170-320:22|224-8388609:0";
+	char *inputp = inputs;
+	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
+	    (void *)&inputp, sizeof(char *)), 0,
+	    "Unexpected mallctlbymib() failure");
+	for (szind_t i = 0; i < TCACHE_NBINS_MAX; i++) {
+		if (i >= sz_size2index(8) &&i <= sz_size2index(128)) {
+			cache_bin_info_init(&tcache_bin_info[i], 1);
+		}
+		if (i == sz_size2index(160)) {
+			cache_bin_info_init(&tcache_bin_info[i], 11);
+		}
+		if (i >= sz_size2index(170) && i <= sz_size2index(320)) {
+			cache_bin_info_init(&tcache_bin_info[i], 22);
+		}
+		if (i >= sz_size2index(224)) {
+			cache_bin_info_init(&tcache_bin_info[i], 0);
+		}
+		if (i >= nbins) {
+			cache_bin_info_init(&tcache_bin_info[i], 0);
+		}
+	}
+	check_bins_info(tcache_bin_info);
+
+	/*
+	 * Close the tcache and set ncached_max of some bins.  It will be
+	 * set properly but thread.tcache.ncached_max.read still returns 0
+	 * since the bin is not available yet.  After enabling the tcache,
+	 * the new setting will not be carried on.  Instead, the default
+	 * settings will be applied.
+	 */
+	bool e0 = false, e1;
+	size_t bool_sz = sizeof(bool);
+	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e1, &bool_sz,
+	    (void *)&e0, bool_sz), 0, "Unexpected mallctl() error");
+	expect_true(e1, "Unexpected previous tcache state");
+	strcpy(inputs, "0-112:8");
+	/* Setting returns ENOENT when the tcache is disabled. */
+	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
+	    (void *)&inputp, sizeof(char *)), ENOENT,
+	    "Unexpected mallctlbymib() failure");
+	/* All ncached_max should return 0 once tcache is disabled. */
+	for (szind_t i = 0; i < TCACHE_NBINS_MAX; i++) {
+		cache_bin_info_init(&tcache_bin_info[i], 0);
+	}
+	check_bins_info(tcache_bin_info);
+
+	e0 = true;
+	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e1, &bool_sz,
+	    (void *)&e0, bool_sz), 0, "Unexpected mallctl() error");
+	expect_false(e1, "Unexpected previous tcache state");
+	memcpy(tcache_bin_info, tcache_bin_info_backup,
+	    sizeof(tcache_bin_info_backup));
+	for (szind_t i = tcache_nbins_get(tcache_slow); i < TCACHE_NBINS_MAX;
+	    i++) {
+		cache_bin_info_init(&tcache_bin_info[i], 0);
+	}
+	check_bins_info(tcache_bin_info);
+
+	/*
+	 * Set ncached_max of bins not enabled yet.  Then, enable them by
+	 * resetting tcache_max.  The ncached_max changes should stay.
+	 */
+	size_t tcache_max = 1024;
+	assert_d_eq(mallctl("thread.tcache.max",
+	    NULL, NULL, (void *)&tcache_max, sizeof(size_t)),.0,
+	    "Unexpected.mallctl().failure");
+	for (szind_t i = sz_size2index(1024) + 1; i < TCACHE_NBINS_MAX; i++) {
+		cache_bin_info_init(&tcache_bin_info[i], 0);
+	}
+	strcpy(inputs, "2048-6144:123");
+	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
+	    (void *)&inputp, sizeof(char *)), 0,
+	    "Unexpected mallctlbymib() failure");
+	check_bins_info(tcache_bin_info);
+
+	tcache_max = 6144;
+	assert_d_eq(mallctl("thread.tcache.max",
+	    NULL, NULL, (void *)&tcache_max, sizeof(size_t)),.0,
+	    "Unexpected.mallctl().failure");
+	memcpy(tcache_bin_info, tcache_bin_info_backup,
+	    sizeof(tcache_bin_info_backup));
+	for (szind_t i = sz_size2index(2048); i < TCACHE_NBINS_MAX; i++) {
+		if (i <= sz_size2index(6144)) {
+			cache_bin_info_init(&tcache_bin_info[i], 123);
+		} else if (i > sz_size2index(6144)) {
+			cache_bin_info_init(&tcache_bin_info[i], 0);
+		}
+	}
+	check_bins_info(tcache_bin_info);
+
+	/* Test an empty input, it should do nothing. */
+	strcpy(inputs, "");
+	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
+	    (void *)&inputp, sizeof(char *)), 0,
+	    "Unexpected mallctlbymib() failure");
+	check_bins_info(tcache_bin_info);
+
+	/* Test a half-done string, it should return EINVAL and do nothing. */
+	strcpy(inputs, "4-1024:7|256-1024");
+	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
+	    (void *)&inputp, sizeof(char *)), EINVAL,
+	    "Unexpected mallctlbymib() failure");
+	check_bins_info(tcache_bin_info);
+
+	/*
+	 * Test an invalid string with start size larger than end size.  It
+	 * should return success but do nothing.
+	 */
+	strcpy(inputs, "1024-256:7");
+	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
+	    (void *)&inputp, sizeof(char *)), 0,
+	    "Unexpected mallctlbymib() failure");
+	check_bins_info(tcache_bin_info);
+
+	/*
+	 * Test a string exceeding the length limit, it should return EINVAL
+	 * and do nothing.
+	 */
+	char *long_inputs = (char *)malloc(10000 * sizeof(char));
+	expect_true(long_inputs != NULL, "Unexpected allocation failure.");
+	for (int i = 0; i < 200; i++) {
+		memcpy(long_inputs + i * 9, "4-1024:3|", 9);
+	}
+	memcpy(long_inputs + 200 * 9, "4-1024:3", 8);
+	long_inputs[200 * 9 + 8] = '\0';
+	inputp = long_inputs;
+	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
+	    (void *)&inputp, sizeof(char *)), EINVAL,
+	    "Unexpected mallctlbymib() failure");
+	check_bins_info(tcache_bin_info);
+	free(long_inputs);
+
+	/*
+	 * Test a string with invalid characters, it should return EINVAL
+	 * and do nothing.
+	 */
+	strcpy(inputs, "k8-1024:77p");
+	inputp = inputs;
+	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
+	    (void *)&inputp, sizeof(char *)), EINVAL,
+	    "Unexpected mallctlbymib() failure");
+	check_bins_info(tcache_bin_info);
+
+	/* Test large ncached_max, it should return success but capped. */
+	strcpy(inputs, "1024-1024:65540");
+	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
+	    (void *)&inputp, sizeof(char *)), 0,
+	    "Unexpected mallctlbymib() failure");
+	cache_bin_info_init(&tcache_bin_info[sz_size2index(1024)],
+	    CACHE_BIN_NCACHED_MAX);
+	check_bins_info(tcache_bin_info);
+
+	return NULL;
+}
+
+TEST_BEGIN(test_ncached_max) {
+	test_skip_if(!config_stats);
+	test_skip_if(!opt_tcache);
+	test_skip_if(san_uaf_detection_enabled());
+	/* TODO: change nthreads to 8 to reduce CI loads. */
+	unsigned nthreads = 108;
+	VARIABLE_ARRAY(thd_t, threads, nthreads);
+	for (unsigned i = 0; i < nthreads; i++) {
+		thd_create(&threads[i], ncached_max_check, NULL);
+	}
+	for (unsigned i = 0; i < nthreads; i++) {
+		thd_join(threads[i], NULL);
+	}
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_ncached_max);
+}
+
diff --git a/test/unit/tcache_max.c b/test/unit/tcache_max.c
index 5793cb6b..32eacadf 100644
--- a/test/unit/tcache_max.c
+++ b/test/unit/tcache_max.c
@@ -366,238 +366,10 @@ TEST_BEGIN(test_thread_tcache_max) {
 }
 TEST_END
 
-static void
-check_bins_info(cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]) {
-	size_t mib_get[4], mib_get_len;
-	mib_get_len = sizeof(mib_get) / sizeof(size_t);
-	const char *get_name = "thread.tcache.ncached_max.read_sizeclass";
-	size_t ncached_max;
-	size_t sz = sizeof(size_t);
-	expect_d_eq(mallctlnametomib(get_name, mib_get, &mib_get_len), 0,
-	    "Unexpected mallctlnametomib() failure");
-
-	for (szind_t i = 0; i < TCACHE_NBINS_MAX; i++) {
-		size_t bin_size = sz_index2size(i);
-		expect_d_eq(mallctlbymib(mib_get, mib_get_len,
-		    (void *)&ncached_max, &sz,
-		    (void *)&bin_size, sizeof(size_t)), 0,
-		    "Unexpected mallctlbymib() failure");
-		expect_zu_eq(ncached_max, tcache_bin_info[i].ncached_max,
-		    "Unexpected ncached_max for bin %d", i);
-		/* Check ncached_max returned under a non-bin size. */
-		bin_size--;
-		size_t temp_ncached_max = 0;
-		expect_d_eq(mallctlbymib(mib_get, mib_get_len,
-		    (void *)&temp_ncached_max, &sz,
-		    (void *)&bin_size, sizeof(size_t)), 0,
-		    "Unexpected mallctlbymib() failure");
-		expect_zu_eq(temp_ncached_max, ncached_max,
-		    "Unexpected ncached_max for inaccurate bin size.");
-	}
-}
-
-static void *
-ncached_max_check(void* args) {
-	cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX];
-	cache_bin_info_t tcache_bin_info_backup[TCACHE_NBINS_MAX];
-	tsd_t *tsd = tsd_fetch();
-	tcache_t *tcache = tsd_tcachep_get(tsd);
-	assert(tcache != NULL);
-	tcache_slow_t *tcache_slow = tcache->tcache_slow;
-
-	/* Check the initial bin settings. */
-	tcache_bin_info_compute(tcache_bin_info);
-	memcpy(tcache_bin_info_backup, tcache_bin_info,
-	    sizeof(tcache_bin_info));
-	unsigned nbins = tcache_nbins_get(tcache_slow);
-	for (szind_t i = nbins; i < TCACHE_NBINS_MAX; i++) {
-		cache_bin_info_init(&tcache_bin_info[i], 0);
-	}
-	check_bins_info(tcache_bin_info);
-
-	size_t mib_set[4], mib_set_len;
-	mib_set_len = sizeof(mib_set) / sizeof(size_t);
-	const char *set_name = "thread.tcache.ncached_max.write";
-	expect_d_eq(mallctlnametomib(set_name, mib_set, &mib_set_len), 0,
-	    "Unexpected mallctlnametomib() failure");
-
-	/* Test the ncached_max set with tcache on. */
-	char inputs[100] = "8-128:1|160-160:11|170-320:22|224-8388609:0";
-	char *inputp = inputs;
-	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
-	    (void *)&inputp, sizeof(char *)), 0,
-	    "Unexpected mallctlbymib() failure");
-	for (szind_t i = 0; i < TCACHE_NBINS_MAX; i++) {
-		if (i >= sz_size2index(8) &&i <= sz_size2index(128)) {
-			cache_bin_info_init(&tcache_bin_info[i], 1);
-		}
-		if (i == sz_size2index(160)) {
-			cache_bin_info_init(&tcache_bin_info[i], 11);
-		}
-		if (i >= sz_size2index(170) && i <= sz_size2index(320)) {
-			cache_bin_info_init(&tcache_bin_info[i], 22);
-		}
-		if (i >= sz_size2index(224)) {
-			cache_bin_info_init(&tcache_bin_info[i], 0);
-		}
-		if (i >= nbins) {
-			cache_bin_info_init(&tcache_bin_info[i], 0);
-		}
-	}
-	check_bins_info(tcache_bin_info);
-
-	/*
-	 * Close the tcache and set ncached_max of some bins.  It will be
-	 * set properly but thread.tcache.ncached_max.read still returns 0
-	 * since the bin is not available yet.  After enabling the tcache,
-	 * the new setting will not be carried on.  Instead, the default
-	 * settings will be applied.
-	 */
-	bool e0 = false, e1;
-	size_t bool_sz = sizeof(bool);
-	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e1, &bool_sz,
-	    (void *)&e0, bool_sz), 0, "Unexpected mallctl() error");
-	expect_true(e1, "Unexpected previous tcache state");
-	strcpy(inputs, "0-112:8");
-	/* Setting returns ENOENT when the tcache is disabled. */
-	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
-	    (void *)&inputp, sizeof(char *)), ENOENT,
-	    "Unexpected mallctlbymib() failure");
-	/* All ncached_max should return 0 once tcache is disabled. */
-	for (szind_t i = 0; i < TCACHE_NBINS_MAX; i++) {
-		cache_bin_info_init(&tcache_bin_info[i], 0);
-	}
-	check_bins_info(tcache_bin_info);
-
-	e0 = true;
-	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e1, &bool_sz,
-	    (void *)&e0, bool_sz), 0, "Unexpected mallctl() error");
-	expect_false(e1, "Unexpected previous tcache state");
-	memcpy(tcache_bin_info, tcache_bin_info_backup,
-	    sizeof(tcache_bin_info_backup));
-	for (szind_t i = tcache_nbins_get(tcache_slow); i < TCACHE_NBINS_MAX;
-	    i++) {
-		cache_bin_info_init(&tcache_bin_info[i], 0);
-	}
-	check_bins_info(tcache_bin_info);
-
-	/*
-	 * Set ncached_max of bins not enabled yet.  Then, enable them by
-	 * resetting tcache_max.  The ncached_max changes should stay.
-	 */
-	size_t tcache_max = 1024;
-	assert_d_eq(mallctl("thread.tcache.max",
-	    NULL, NULL, (void *)&tcache_max, sizeof(size_t)),.0,
-	    "Unexpected.mallctl().failure");
-	for (szind_t i = sz_size2index(1024) + 1; i < TCACHE_NBINS_MAX; i++) {
-		cache_bin_info_init(&tcache_bin_info[i], 0);
-	}
-	strcpy(inputs, "2048-6144:123");
-	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
-	    (void *)&inputp, sizeof(char *)), 0,
-	    "Unexpected mallctlbymib() failure");
-	check_bins_info(tcache_bin_info);
-
-	tcache_max = 6144;
-	assert_d_eq(mallctl("thread.tcache.max",
-	    NULL, NULL, (void *)&tcache_max, sizeof(size_t)),.0,
-	    "Unexpected.mallctl().failure");
-	memcpy(tcache_bin_info, tcache_bin_info_backup,
-	    sizeof(tcache_bin_info_backup));
-	for (szind_t i = sz_size2index(2048); i < TCACHE_NBINS_MAX; i++) {
-		if (i <= sz_size2index(6144)) {
-			cache_bin_info_init(&tcache_bin_info[i], 123);
-		} else if (i > sz_size2index(6144)) {
-			cache_bin_info_init(&tcache_bin_info[i], 0);
-		}
-	}
-	check_bins_info(tcache_bin_info);
-
-	/* Test an empty input, it should do nothing. */
-	strcpy(inputs, "");
-	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
-	    (void *)&inputp, sizeof(char *)), 0,
-	    "Unexpected mallctlbymib() failure");
-	check_bins_info(tcache_bin_info);
-
-	/* Test a half-done string, it should return EINVAL and do nothing. */
-	strcpy(inputs, "4-1024:7|256-1024");
-	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
-	    (void *)&inputp, sizeof(char *)), EINVAL,
-	    "Unexpected mallctlbymib() failure");
-	check_bins_info(tcache_bin_info);
-
-	/*
-	 * Test an invalid string with start size larger than end size.  It
-	 * should return success but do nothing.
-	 */
-	strcpy(inputs, "1024-256:7");
-	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
-	    (void *)&inputp, sizeof(char *)), 0,
-	    "Unexpected mallctlbymib() failure");
-	check_bins_info(tcache_bin_info);
-
-	/*
-	 * Test a string exceeding the length limit, it should return EINVAL
-	 * and do nothing.
-	 */
-	char *long_inputs = (char *)malloc(10000 * sizeof(char));
-	expect_true(long_inputs != NULL, "Unexpected allocation failure.");
-	for (int i = 0; i < 200; i++) {
-		memcpy(long_inputs + i * 9, "4-1024:3|", 9);
-	}
-	memcpy(long_inputs + 200 * 9, "4-1024:3", 8);
-	long_inputs[200 * 9 + 8] = '\0';
-	inputp = long_inputs;
-	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
-	    (void *)&inputp, sizeof(char *)), EINVAL,
-	    "Unexpected mallctlbymib() failure");
-	check_bins_info(tcache_bin_info);
-	free(long_inputs);
-
-	/*
-	 * Test a string with invalid characters, it should return EINVAL
-	 * and do nothing.
-	 */
-	strcpy(inputs, "k8-1024:77p");
-	inputp = inputs;
-	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
-	    (void *)&inputp, sizeof(char *)), EINVAL,
-	    "Unexpected mallctlbymib() failure");
-	check_bins_info(tcache_bin_info);
-
-	/* Test large ncached_max, it should return success but capped. */
-	strcpy(inputs, "1024-1024:65540");
-	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
-	    (void *)&inputp, sizeof(char *)), 0,
-	    "Unexpected mallctlbymib() failure");
-	cache_bin_info_init(&tcache_bin_info[sz_size2index(1024)],
-	    CACHE_BIN_NCACHED_MAX);
-	check_bins_info(tcache_bin_info);
-
-	return NULL;
-}
-
-TEST_BEGIN(test_ncached_max) {
-	test_skip_if(!config_stats);
-	test_skip_if(!opt_tcache);
-	test_skip_if(san_uaf_detection_enabled());
-	unsigned nthreads = 8;
-	VARIABLE_ARRAY(thd_t, threads, nthreads);
-	for (unsigned i = 0; i < nthreads; i++) {
-		thd_create(&threads[i], ncached_max_check, NULL);
-	}
-	for (unsigned i = 0; i < nthreads; i++) {
-		thd_join(threads[i], NULL);
-	}
-}
-TEST_END
-
 int
 main(void) {
 	return test(
 	    test_tcache_max,
-	    test_thread_tcache_max,
-	    test_ncached_max);
+	    test_thread_tcache_max);
 }
 

From 6fb3b6a8e45d3e5f83b331ce8a1d41c5e5da3f4c Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@fb.com>
Date: Tue, 17 Oct 2023 20:17:42 -0700
Subject: [PATCH 141/395] Refactor the tcache initiailization

1. Pre-generate all default tcache ncached_max in tcache_boot;
2. Add getters returning default ncached_max and ncached_max_set;
3. Refactor tcache init so that it is always init with a given setting.
---
 include/jemalloc/internal/cache_bin.h      | 10 +--
 include/jemalloc/internal/tcache_externs.h | 10 +--
 src/cache_bin.c                            |  6 +-
 src/jemalloc.c                             |  6 +-
 src/tcache.c                               | 82 ++++++++++++++--------
 test/unit/ncached_max.c                    | 20 +++---
 test/unit/tcache_max.c                     |  2 -
 7 files changed, 75 insertions(+), 61 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index e2da3b90..67565835 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -719,8 +719,8 @@ void cache_bin_info_init(cache_bin_info_t *bin_info,
  * Given an array of initialized cache_bin_info_ts, determine how big an
  * allocation is required to initialize a full set of cache_bin_ts.
  */
-void cache_bin_info_compute_alloc(cache_bin_info_t *infos, szind_t ninfos,
-    size_t *size, size_t *alignment);
+void cache_bin_info_compute_alloc(const cache_bin_info_t *infos,
+    szind_t ninfos, size_t *size, size_t *alignment);
 
 /*
  * Actually initialize some cache bins.  Callers should allocate the backing
@@ -729,11 +729,11 @@ void cache_bin_info_compute_alloc(cache_bin_info_t *infos, szind_t ninfos,
  * cache_bin_postincrement.  *alloc_cur will then point immediately past the end
  * of the allocation.
  */
-void cache_bin_preincrement(cache_bin_info_t *infos, szind_t ninfos,
+void cache_bin_preincrement(const cache_bin_info_t *infos, szind_t ninfos,
     void *alloc, size_t *cur_offset);
 void cache_bin_postincrement(void *alloc, size_t *cur_offset);
-void cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
-    size_t *cur_offset);
+void cache_bin_init(cache_bin_t *bin, const cache_bin_info_t *info,
+    void *alloc, size_t *cur_offset);
 void cache_bin_init_disabled(cache_bin_t *bin, cache_bin_sz_t ncached_max);
 
 bool cache_bin_stack_use_thp(void);
diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index 973dbfe9..732adacb 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -35,11 +35,6 @@ extern unsigned	global_do_not_change_tcache_nbins;
  */
 extern size_t	global_do_not_change_tcache_maxclass;
 
-/* Default bin info for each bin. */
-extern cache_bin_info_t opt_tcache_ncached_max[TCACHE_NBINS_MAX];
-/* Records whether a bin's info is specified by malloc_conf. */
-extern bool opt_tcache_ncached_max_set[TCACHE_NBINS_MAX];
-
 /*
  * Explicit tcaches, managed via the tcache.{create,flush,destroy} mallctls and
  * usable via the MALLOCX_TCACHE() flag.  The automatic per thread tcaches are
@@ -60,9 +55,8 @@ void tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache,
     cache_bin_t *cache_bin, szind_t binind, unsigned rem);
 void tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache,
     cache_bin_t *cache_bin, szind_t binind, bool is_small);
-bool tcache_bin_info_settings_parse(const char *bin_settings_segment_cur,
-    size_t len_left, cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX],
-    bool bin_info_is_set[TCACHE_NBINS_MAX]);
+bool tcache_bin_info_default_init(const char *bin_settings_segment_cur,
+    size_t len_left);
 bool tcache_bins_ncached_max_write(tsd_t *tsd, char *settings, size_t len);
 bool tcache_bin_ncached_max_read(tsd_t *tsd, size_t bin_size,
     cache_bin_sz_t *ncached_max);
diff --git a/src/cache_bin.c b/src/cache_bin.c
index 24dabd0b..c3b94e54 100644
--- a/src/cache_bin.c
+++ b/src/cache_bin.c
@@ -28,7 +28,7 @@ cache_bin_stack_use_thp(void) {
 }
 
 void
-cache_bin_info_compute_alloc(cache_bin_info_t *infos, szind_t ninfos,
+cache_bin_info_compute_alloc(const cache_bin_info_t *infos, szind_t ninfos,
     size_t *size, size_t *alignment) {
 	/* For the total bin stack region (per tcache), reserve 2 more slots so
 	 * that
@@ -51,7 +51,7 @@ cache_bin_info_compute_alloc(cache_bin_info_t *infos, szind_t ninfos,
 }
 
 void
-cache_bin_preincrement(cache_bin_info_t *infos, szind_t ninfos, void *alloc,
+cache_bin_preincrement(const cache_bin_info_t *infos, szind_t ninfos, void *alloc,
     size_t *cur_offset) {
 	if (config_debug) {
 		size_t computed_size;
@@ -76,7 +76,7 @@ cache_bin_postincrement(void *alloc, size_t *cur_offset) {
 }
 
 void
-cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
+cache_bin_init(cache_bin_t *bin, const cache_bin_info_t *info, void *alloc,
     size_t *cur_offset) {
 	/*
 	 * The full_position points to the lowest available space.  Allocations
diff --git a/src/jemalloc.c b/src/jemalloc.c
index c77f2ef2..5da22a53 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1323,14 +1323,12 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				CONF_CONTINUE;
 			}
 			if (CONF_MATCH("tcache_ncached_max")) {
-				bool err = tcache_bin_info_settings_parse(
-				    v, vlen, opt_tcache_ncached_max,
-				    opt_tcache_ncached_max_set);
+				bool err = tcache_bin_info_default_init(
+				    v, vlen);
 				if (err) {
 					CONF_ERROR("Invalid settings for "
 					    "tcache_ncached_max", k, klen, v,
 					    vlen);
-					break;
 				}
 				CONF_CONTINUE;
 			}
diff --git a/src/tcache.c b/src/tcache.c
index a8eaf296..02627896 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -71,14 +71,17 @@ unsigned		global_do_not_change_tcache_nbins;
  */
 size_t			global_do_not_change_tcache_maxclass;
 
-/* Default bin info for each bin.  Will be initialized when thread starts. */
-cache_bin_info_t opt_tcache_ncached_max[TCACHE_NBINS_MAX] = {{0}};
+/*
+ * Default bin info for each bin.  Will be initialized in malloc_conf_init
+ * and tcache_boot and should not be modified after that.
+ */
+static cache_bin_info_t opt_tcache_ncached_max[TCACHE_NBINS_MAX] = {{0}};
 /*
  * Marks whether a bin's info is set already.  This is used in
  * tcache_bin_info_compute to avoid overwriting ncached_max specified by
- * malloc_conf.
+ * malloc_conf.  It should be set only when parsing malloc_conf.
  */
-bool opt_tcache_ncached_max_set[TCACHE_NBINS_MAX] = {0};
+static bool opt_tcache_ncached_max_set[TCACHE_NBINS_MAX] = {0};
 
 tcaches_t		*tcaches;
 
@@ -599,6 +602,16 @@ tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 	assert(head_content == *cache_bin->stack_head);
 }
 
+JET_EXTERN bool
+tcache_get_default_ncached_max_set(szind_t ind) {
+	return opt_tcache_ncached_max_set[ind];
+}
+
+JET_EXTERN const cache_bin_info_t *
+tcache_get_default_ncached_max(void) {
+	return opt_tcache_ncached_max;
+}
+
 bool
 tcache_bin_ncached_max_read(tsd_t *tsd, size_t bin_size,
     cache_bin_sz_t *ncached_max) {
@@ -687,7 +700,7 @@ tcache_default_settings_init(tcache_slow_t *tcache_slow) {
 
 static void
 tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
-    void *mem, cache_bin_info_t *tcache_bin_info) {
+    void *mem, const cache_bin_info_t *tcache_bin_info) {
 	tcache->tcache_slow = tcache_slow;
 	tcache_slow->tcache = tcache;
 
@@ -809,8 +822,8 @@ tcache_bin_info_compute(cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]) {
 	 * than tcache_nbins, no items will be cached.
 	 */
 	for (szind_t i = 0; i < TCACHE_NBINS_MAX; i++) {
-		unsigned ncached_max = opt_tcache_ncached_max_set[i] ?
-		    opt_tcache_ncached_max[i].ncached_max:
+		unsigned ncached_max = tcache_get_default_ncached_max_set(i) ?
+		    (unsigned)tcache_get_default_ncached_max()[i].ncached_max:
 		    tcache_ncached_max_compute(i);
 		assert(ncached_max <= CACHE_BIN_NCACHED_MAX);
 		cache_bin_info_init(&tcache_bin_info[i], ncached_max);
@@ -819,7 +832,7 @@ tcache_bin_info_compute(cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]) {
 
 static bool
 tsd_tcache_data_init_impl(tsd_t *tsd, arena_t *arena,
-    cache_bin_info_t *tcache_bin_info) {
+    const cache_bin_info_t *tcache_bin_info) {
 	tcache_slow_t *tcache_slow = tsd_tcache_slowp_get_unsafe(tsd);
 	tcache_t *tcache = tsd_tcachep_get_unsafe(tsd);
 
@@ -873,20 +886,11 @@ tsd_tcache_data_init_impl(tsd_t *tsd, arena_t *arena,
 	return false;
 }
 
-static bool
-tsd_tcache_data_init_with_bin_settings(tsd_t *tsd, arena_t *arena,
-    cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]) {
-	assert(tcache_bin_info != NULL);
-	return tsd_tcache_data_init_impl(tsd, arena, tcache_bin_info);
-}
-
 /* Initialize auto tcache (embedded in TSD). */
 static bool
-tsd_tcache_data_init(tsd_t *tsd, arena_t *arena) {
-	/* Takes 146B stack space. */
-	cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX] = {{0}};
-	tcache_bin_info_compute(tcache_bin_info);
-
+tsd_tcache_data_init(tsd_t *tsd, arena_t *arena,
+    const cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]) {
+	assert(tcache_bin_info != NULL);
 	return tsd_tcache_data_init_impl(tsd, arena, tcache_bin_info);
 }
 
@@ -900,10 +904,8 @@ tcache_create_explicit(tsd_t *tsd) {
 	 */
 	unsigned tcache_nbins = global_do_not_change_tcache_nbins;
 	size_t tcache_size, alignment;
-	cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX] = {{0}};
-	tcache_bin_info_compute(tcache_bin_info);
-	cache_bin_info_compute_alloc(tcache_bin_info, tcache_nbins,
-	    &tcache_size, &alignment);
+	cache_bin_info_compute_alloc(tcache_get_default_ncached_max(),
+	    tcache_nbins, &tcache_size, &alignment);
 
 	size_t size = tcache_size + sizeof(tcache_t)
 	    + sizeof(tcache_slow_t);
@@ -920,7 +922,8 @@ tcache_create_explicit(tsd_t *tsd) {
 	tcache_slow_t *tcache_slow =
 	    (void *)((byte_t *)mem + tcache_size + sizeof(tcache_t));
 	tcache_default_settings_init(tcache_slow);
-	tcache_init(tsd, tcache_slow, tcache, mem, tcache_bin_info);
+	tcache_init(tsd, tcache_slow, tcache, mem,
+	    tcache_get_default_ncached_max());
 
 	tcache_arena_associate(tsd_tsdn(tsd), tcache_slow, tcache,
 	    arena_ichoose(tsd, NULL));
@@ -941,7 +944,8 @@ tsd_tcache_enabled_data_init(tsd_t *tsd) {
 
 	if (opt_tcache) {
 		/* Trigger tcache init. */
-		tsd_tcache_data_init(tsd, NULL);
+		tsd_tcache_data_init(tsd, NULL,
+		    tcache_get_default_ncached_max());
 	}
 
 	return false;
@@ -952,7 +956,8 @@ tcache_enabled_set(tsd_t *tsd, bool enabled) {
 	bool was_enabled = tsd_tcache_enabled_get(tsd);
 
 	if (!was_enabled && enabled) {
-		tsd_tcache_data_init(tsd, NULL);
+		tsd_tcache_data_init(tsd, NULL,
+		    tcache_get_default_ncached_max());
 	} else if (was_enabled && !enabled) {
 		tcache_cleanup(tsd);
 	}
@@ -988,14 +993,14 @@ thread_tcache_max_set(tsd_t *tsd, size_t tcache_max) {
 	tcache_max_set(tcache_slow, tcache_max);
 
 	if (enabled) {
-		tsd_tcache_data_init_with_bin_settings(tsd, assigned_arena,
-		    tcache_bin_info);
+		tsd_tcache_data_init(tsd, assigned_arena, tcache_bin_info);
 	}
 
 	assert(tcache_nbins_get(tcache_slow) == sz_size2index(tcache_max) + 1);
 }
 
-bool tcache_bin_info_settings_parse(const char *bin_settings_segment_cur,
+static bool
+tcache_bin_info_settings_parse(const char *bin_settings_segment_cur,
     size_t len_left, cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX],
     bool bin_info_is_set[TCACHE_NBINS_MAX]) {
 	do {
@@ -1031,6 +1036,14 @@ bool tcache_bin_info_settings_parse(const char *bin_settings_segment_cur,
 	return false;
 }
 
+bool
+tcache_bin_info_default_init(const char *bin_settings_segment_cur,
+    size_t len_left) {
+	return tcache_bin_info_settings_parse(bin_settings_segment_cur,
+	    len_left, opt_tcache_ncached_max, opt_tcache_ncached_max_set);
+}
+
+
 bool
 tcache_bins_ncached_max_write(tsd_t *tsd, char *settings, size_t len) {
 	assert(tcache_available(tsd));
@@ -1047,7 +1060,7 @@ tcache_bins_ncached_max_write(tsd_t *tsd, char *settings, size_t len) {
 
 	arena_t *assigned_arena = tcache->tcache_slow->arena;
 	tcache_cleanup(tsd);
-	tsd_tcache_data_init_with_bin_settings(tsd, assigned_arena,
+	tsd_tcache_data_init(tsd, assigned_arena,
 	    tcache_bin_info);
 
 	return false;
@@ -1272,6 +1285,13 @@ tcache_boot(tsdn_t *tsdn, base_t *base) {
 	assert(global_do_not_change_tcache_maxclass <= TCACHE_MAXCLASS_LIMIT);
 	global_do_not_change_tcache_nbins =
 	    sz_size2index(global_do_not_change_tcache_maxclass) + 1;
+	/*
+	 * Pre-compute default bin info and store the results in
+	 * opt_tcache_ncached_max. After the changes here,
+	 * opt_tcache_ncached_max should not be modified and should always be
+	 * accessed using tcache_get_default_ncached_max.
+	 */
+	tcache_bin_info_compute(opt_tcache_ncached_max);
 
 	if (malloc_mutex_init(&tcaches_mtx, "tcaches", WITNESS_RANK_TCACHES,
 	    malloc_mutex_rank_exclusive)) {
diff --git a/test/unit/ncached_max.c b/test/unit/ncached_max.c
index da35d7c9..1a0d2885 100644
--- a/test/unit/ncached_max.c
+++ b/test/unit/ncached_max.c
@@ -2,9 +2,11 @@
 #include "test/san.h"
 
 const char *malloc_conf =
-"tcache_ncached_max:256-1024:1001|2048-2048:0,tcache_max:4096";
+"tcache_ncached_max:256-1024:1001|2048-2048:0|8192-8192:1,tcache_max:4096";
 extern void tcache_bin_info_compute(
     cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]);
+extern bool tcache_get_default_ncached_max_set(szind_t ind);
+extern const cache_bin_info_t *tcache_get_default_ncached_max(void);
 
 static void
 check_bins_info(cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]) {
@@ -54,21 +56,23 @@ ncached_max_check(void* args) {
 		bool first_range = (i >= sz_size2index(256) &&
 		    i <= sz_size2index(1024));
 		bool second_range = (i == sz_size2index(2048));
+		bool third_range = (i == sz_size2index(8192));
 		cache_bin_sz_t target_ncached_max = 0;
-		if (first_range || second_range) {
-			target_ncached_max = first_range ? 1001: 0;
-			expect_true(opt_tcache_ncached_max_set[i],
+		if (first_range || second_range || third_range) {
+			target_ncached_max = first_range ? 1001:
+			    (second_range ? 0: 1);
+			expect_true(tcache_get_default_ncached_max_set(i),
 			    "Unexpected state for bin %u", i);
 			expect_zu_eq(target_ncached_max,
 			    tcache_bin_info[i].ncached_max,
 			    "Unexpected generated ncached_max for bin %u", i);
+			expect_zu_eq(target_ncached_max,
+			    tcache_get_default_ncached_max()[i].ncached_max,
+			    "Unexpected pre-set ncached_max for bin %u", i);
 		} else {
-			expect_false(opt_tcache_ncached_max_set[i],
+			expect_false(tcache_get_default_ncached_max_set(i),
 			    "Unexpected state for bin %u", i);
 		}
-		expect_zu_eq(target_ncached_max,
-		    opt_tcache_ncached_max[i].ncached_max,
-		    "Unexpected pre-set ncached_max for bin %u", i);
 	}
 	unsigned nbins = tcache_nbins_get(tcache_slow);
 	for (szind_t i = nbins; i < TCACHE_NBINS_MAX; i++) {
diff --git a/test/unit/tcache_max.c b/test/unit/tcache_max.c
index 32eacadf..c740b5e7 100644
--- a/test/unit/tcache_max.c
+++ b/test/unit/tcache_max.c
@@ -2,8 +2,6 @@
 #include "test/san.h"
 
 const char *malloc_conf = TEST_SAN_UAF_ALIGN_DISABLE;
-extern void tcache_bin_info_compute(
-    cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]);
 
 enum {
 	alloc_option_start = 0,

From d88fa71bbd8f22814ead264eff07ba70f05f3291 Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@fb.com>
Date: Wed, 18 Oct 2023 10:40:32 -0700
Subject: [PATCH 142/395] Fix nfill = 0 bug when ncached_max is 1

---
 src/arena.c  | 1 +
 src/tcache.c | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/src/arena.c b/src/arena.c
index 4a383670..9a8e5d64 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1023,6 +1023,7 @@ arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena,
     cache_bin_t *cache_bin, cache_bin_info_t *cache_bin_info, szind_t binind,
     const unsigned nfill) {
 	assert(cache_bin_ncached_get_local(cache_bin, cache_bin_info) == 0);
+	assert(nfill != 0);
 
 	const bin_info_t *bin_info = &bin_infos[binind];
 
diff --git a/src/tcache.c b/src/tcache.c
index 02627896..d57574ca 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -257,6 +257,9 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena,
 	assert(!tcache_bin_disabled(binind, cache_bin, tcache_slow));
 	unsigned nfill = cache_bin_info_ncached_max_get(cache_bin,
 	    &cache_bin->bin_info) >> tcache_slow->lg_fill_div[binind];
+	if (nfill == 0) {
+		nfill = 1;
+	}
 	arena_cache_bin_fill_small(tsdn, arena, cache_bin,
 	    &cache_bin->bin_info, binind, nfill);
 	tcache_slow->bin_refilled[binind] = true;

From 04d1a87b78230931aa28cca72bef4424223a8d39 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 18 Oct 2023 12:13:35 -0700
Subject: [PATCH 143/395] Fix a zero-initializer warning on macOS.

---
 include/jemalloc/internal/tcache_types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h
index a91b3252..578a199e 100644
--- a/include/jemalloc/internal/tcache_types.h
+++ b/include/jemalloc/internal/tcache_types.h
@@ -10,7 +10,7 @@ typedef struct tcaches_s tcaches_t;
 
 /* Used in TSD static initializer only. Real init in tsd_tcache_data_init(). */
 #define TCACHE_ZERO_INITIALIZER {0}
-#define TCACHE_SLOW_ZERO_INITIALIZER {0}
+#define TCACHE_SLOW_ZERO_INITIALIZER {{0}}
 
 /* Used in TSD static initializer only. Will be initialized to opt_tcache. */
 #define TCACHE_ENABLED_ZERO_INITIALIZER false

From 756d4df2fd1b5bde025abed50c9b771376d72c6f Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@fb.com>
Date: Wed, 18 Oct 2023 16:44:36 -0700
Subject: [PATCH 144/395] Add util.c into vs project file.

---
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj         | 1 +
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters | 3 +++
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj         | 1 +
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters | 3 +++
 msvc/projects/vc2019/jemalloc/jemalloc.vcxproj         | 1 +
 msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters | 3 +++
 msvc/projects/vc2022/jemalloc/jemalloc.vcxproj         | 1 +
 msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters | 3 +++
 8 files changed, 16 insertions(+)

diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index ec028a1a..03c241ca 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -96,6 +96,7 @@
     <ClCompile Include="..\..\..\..\src\thread_event.c" />
     <ClCompile Include="..\..\..\..\src\ticker.c" />
     <ClCompile Include="..\..\..\..\src\tsd.c" />
+    <ClCompile Include="..\..\..\..\src\util.c" />
     <ClCompile Include="..\..\..\..\src\witness.c" />
   </ItemGroup>
   <PropertyGroup Label="Globals">
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 1b43e9f2..514368aa 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -166,6 +166,9 @@
     <ClCompile Include="..\..\..\..\src\tsd.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\util.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\witness.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index a8004dbd..5d23d8e2 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -96,6 +96,7 @@
     <ClCompile Include="..\..\..\..\src\thread_event.c" />
     <ClCompile Include="..\..\..\..\src\ticker.c" />
     <ClCompile Include="..\..\..\..\src\tsd.c" />
+    <ClCompile Include="..\..\..\..\src\util.c" />
     <ClCompile Include="..\..\..\..\src\witness.c" />
   </ItemGroup>
   <PropertyGroup Label="Globals">
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 1b43e9f2..514368aa 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -166,6 +166,9 @@
     <ClCompile Include="..\..\..\..\src\tsd.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\util.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\witness.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
index 66ba849d..8eaab36b 100644
--- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
@@ -96,6 +96,7 @@
     <ClCompile Include="..\..\..\..\src\thread_event.c" />
     <ClCompile Include="..\..\..\..\src\ticker.c" />
     <ClCompile Include="..\..\..\..\src\tsd.c" />
+    <ClCompile Include="..\..\..\..\src\util.c" />
     <ClCompile Include="..\..\..\..\src\witness.c" />
   </ItemGroup>
   <PropertyGroup Label="Globals">
diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
index 1b43e9f2..514368aa 100644
--- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
@@ -166,6 +166,9 @@
     <ClCompile Include="..\..\..\..\src\tsd.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\util.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\witness.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
index 7d9a1aa0..cd871379 100644
--- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
@@ -96,6 +96,7 @@
     <ClCompile Include="..\..\..\..\src\thread_event.c" />
     <ClCompile Include="..\..\..\..\src\ticker.c" />
     <ClCompile Include="..\..\..\..\src\tsd.c" />
+    <ClCompile Include="..\..\..\..\src\util.c" />
     <ClCompile Include="..\..\..\..\src\witness.c" />
   </ItemGroup>
   <PropertyGroup Label="Globals">
diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
index 1b43e9f2..514368aa 100644
--- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
@@ -166,6 +166,9 @@
     <ClCompile Include="..\..\..\..\src\tsd.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\util.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\witness.c">
       <Filter>Source Files</Filter>
     </ClCompile>

From e2cd27132acfe04604352dbaa9d95b124f9ea50e Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@meta.com>
Date: Sun, 22 Oct 2023 22:23:13 -0700
Subject: [PATCH 145/395] Change stack_size assertion back to the more
 compatabile one.

---
 src/cache_bin.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/cache_bin.c b/src/cache_bin.c
index c3b94e54..0454dbde 100644
--- a/src/cache_bin.c
+++ b/src/cache_bin.c
@@ -12,7 +12,7 @@ cache_bin_info_init(cache_bin_info_t *info,
     cache_bin_sz_t ncached_max) {
 	assert(ncached_max <= CACHE_BIN_NCACHED_MAX);
 	size_t stack_size = (size_t)ncached_max * sizeof(void *);
-	assert(stack_size <= UINT16_MAX);
+	assert(stack_size < ((size_t)1 << (sizeof(cache_bin_sz_t) * 8)));
 	info->ncached_max = (cache_bin_sz_t)ncached_max;
 }
 

From 3025b021b9206478d2edcf017f1df7657d35e615 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 23 Oct 2023 13:00:10 -0700
Subject: [PATCH 146/395] Optimize mutex and bin alignment / locality.

---
 include/jemalloc/internal/arena_structs.h |  5 ++++-
 include/jemalloc/internal/mutex.h         | 23 ++++++++++++-----------
 src/arena.c                               |  7 ++++++-
 3 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index 6f79be97..803ed25c 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -98,10 +98,13 @@ struct arena_s {
 	/*
 	 * The arena is allocated alongside its bins; really this is a
 	 * dynamically sized array determined by the binshard settings.
+	 * Enforcing cacheline-alignment to minimize the number of cachelines
+	 * touched on the hot paths.
 	 */
 	JEMALLOC_WARN_ON_USAGE("Do not use this field directly. "
 	                       "Use `arena_get_bin` instead.")
-	bin_t			 all_bins[0];
+	JEMALLOC_ALIGNED(CACHELINE)
+	bin_t			all_bins[0];
 };
 
 #endif /* JEMALLOC_INTERNAL_ARENA_STRUCTS_H */
diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h
index 46f22aec..75abf298 100644
--- a/include/jemalloc/internal/mutex.h
+++ b/include/jemalloc/internal/mutex.h
@@ -32,6 +32,12 @@ struct malloc_mutex_s {
 			 * unlocking thread).
 			 */
 			mutex_prof_data_t	prof_data;
+			/*
+			 * Hint flag to avoid exclusive cache line contention
+			 * during spin waiting.  Placed along with prof_data
+			 * since it's always modified even with no contention.
+			 */
+			atomic_b_t		locked;
 #ifdef _WIN32
 #  if _WIN32_WINNT >= 0x0600
 			SRWLOCK         	lock;
@@ -46,11 +52,6 @@ struct malloc_mutex_s {
 #else
 			pthread_mutex_t		lock;
 #endif
-			/*
-			 * Hint flag to avoid exclusive cache line contention
-			 * during spin waiting
-			 */
-			atomic_b_t		locked;
 		};
 		/*
 		 * We only touch witness when configured w/ debug.  However we
@@ -99,21 +100,21 @@ struct malloc_mutex_s {
 #elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
 #  if defined(JEMALLOC_DEBUG)
 #    define MALLOC_MUTEX_INITIALIZER					\
-  {{{LOCK_PROF_DATA_INITIALIZER, OS_UNFAIR_LOCK_INIT, ATOMIC_INIT(false)}}, \
+  {{{LOCK_PROF_DATA_INITIALIZER, ATOMIC_INIT(false), OS_UNFAIR_LOCK_INIT}}, \
          WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT), 0}
 #  else
 #    define MALLOC_MUTEX_INITIALIZER                      \
-  {{{LOCK_PROF_DATA_INITIALIZER, OS_UNFAIR_LOCK_INIT, ATOMIC_INIT(false)}},  \
+  {{{LOCK_PROF_DATA_INITIALIZER, ATOMIC_INIT(false), OS_UNFAIR_LOCK_INIT}},  \
       WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
 #  endif
 #elif (defined(JEMALLOC_MUTEX_INIT_CB))
 #  if (defined(JEMALLOC_DEBUG))
 #     define MALLOC_MUTEX_INITIALIZER					\
-      {{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, NULL, ATOMIC_INIT(false)}},	\
+      {{{LOCK_PROF_DATA_INITIALIZER, ATOMIC_INIT(false), PTHREAD_MUTEX_INITIALIZER, NULL}},	\
            WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT), 0}
 #  else
 #     define MALLOC_MUTEX_INITIALIZER					\
-      {{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, NULL, ATOMIC_INIT(false)}},	\
+      {{{LOCK_PROF_DATA_INITIALIZER, ATOMIC_INIT(false), PTHREAD_MUTEX_INITIALIZER, NULL}},	\
            WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
 #  endif
 
@@ -121,11 +122,11 @@ struct malloc_mutex_s {
 #    define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT
 #  if defined(JEMALLOC_DEBUG)
 #    define MALLOC_MUTEX_INITIALIZER					\
-     {{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, ATOMIC_INIT(false)}}, \
+     {{{LOCK_PROF_DATA_INITIALIZER, ATOMIC_INIT(false), PTHREAD_MUTEX_INITIALIZER}}, \
            WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT), 0}
 #  else
 #    define MALLOC_MUTEX_INITIALIZER                          \
-     {{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, ATOMIC_INIT(false)}},	\
+     {{{LOCK_PROF_DATA_INITIALIZER, ATOMIC_INIT(false), PTHREAD_MUTEX_INITIALIZER}},	\
       WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
 #  endif
 #endif
diff --git a/src/arena.c b/src/arena.c
index 9a8e5d64..b4ead26a 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1666,11 +1666,16 @@ arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) {
 		}
 	}
 
-	size_t arena_size = sizeof(arena_t) + sizeof(bin_t) * nbins_total;
+	size_t arena_size = ALIGNMENT_CEILING(sizeof(arena_t), CACHELINE) +
+	    sizeof(bin_t) * nbins_total;
 	arena = (arena_t *)base_alloc(tsdn, base, arena_size, CACHELINE);
 	if (arena == NULL) {
 		goto label_error;
 	}
+	JEMALLOC_SUPPRESS_WARN_ON_USAGE(
+	assert((uintptr_t)&arena->all_bins[nbins_total -1] + sizeof(bin_t) <=
+	    (uintptr_t)arena + arena_size);
+	)
 
 	atomic_store_u(&arena->nthreads[0], 0, ATOMIC_RELAXED);
 	atomic_store_u(&arena->nthreads[1], 0, ATOMIC_RELAXED);

From e4817c8d89a2a413e835c4adeab5c5c4412f9235 Mon Sep 17 00:00:00 2001
From: Shirui Cheng <sherrycheng@meta.com>
Date: Tue, 24 Oct 2023 13:51:14 -0700
Subject: [PATCH 147/395] Cleanup cache_bin_info_t* info input args

---
 include/jemalloc/internal/arena_externs.h  |   3 +-
 include/jemalloc/internal/cache_bin.h      |  97 +++++-----
 include/jemalloc/internal/tcache_inlines.h |   7 +-
 src/arena.c                                |  13 +-
 src/cache_bin.c                            |   2 +-
 src/tcache.c                               |  56 +++---
 test/unit/cache_bin.c                      | 200 ++++++++++-----------
 test/unit/tcache_max.c                     |   4 +-
 8 files changed, 175 insertions(+), 207 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index d79b607a..7a29fd8b 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -63,8 +63,7 @@ void arena_do_deferred_work(tsdn_t *tsdn, arena_t *arena);
 void arena_reset(tsd_t *tsd, arena_t *arena);
 void arena_destroy(tsd_t *tsd, arena_t *arena);
 void arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena,
-    cache_bin_t *cache_bin, cache_bin_info_t *cache_bin_info, szind_t binind,
-    const unsigned nfill);
+    cache_bin_t *cache_bin, szind_t binind, const unsigned nfill);
 
 void *arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size,
     szind_t ind, bool zero, bool slab);
diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 67565835..a26c3671 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -202,19 +202,19 @@ cache_bin_disabled(cache_bin_t *bin) {
 	return disabled;
 }
 
-/* Returns ncached_max: Upper limit on ncached. */
-static inline cache_bin_sz_t
-cache_bin_info_ncached_max_get(cache_bin_t *bin, cache_bin_info_t *info) {
-	assert(!cache_bin_disabled(bin));
-	assert(info == &bin->bin_info);
-	return info->ncached_max;
-}
-
 /* Gets ncached_max without asserting that the bin is enabled. */
 static inline cache_bin_sz_t
 cache_bin_ncached_max_get_unsafe(cache_bin_t *bin) {
 	return bin->bin_info.ncached_max;
 }
+
+/* Returns ncached_max: Upper limit on ncached. */
+static inline cache_bin_sz_t
+cache_bin_ncached_max_get(cache_bin_t *bin) {
+	assert(!cache_bin_disabled(bin));
+	return cache_bin_ncached_max_get_unsafe(bin);
+}
+
 /*
  * Internal.
  *
@@ -267,9 +267,9 @@ cache_bin_ncached_get_internal(cache_bin_t *bin) {
  * possible.
  */
 static inline cache_bin_sz_t
-cache_bin_ncached_get_local(cache_bin_t *bin, cache_bin_info_t *info) {
+cache_bin_ncached_get_local(cache_bin_t *bin) {
 	cache_bin_sz_t n = cache_bin_ncached_get_internal(bin);
-	assert(n <= cache_bin_info_ncached_max_get(bin, info));
+	assert(n <= cache_bin_ncached_max_get(bin));
 	return n;
 }
 
@@ -304,9 +304,9 @@ cache_bin_empty_position_get(cache_bin_t *bin) {
  * arena statistics collection.
  */
 static inline uint16_t
-cache_bin_low_bits_low_bound_get(cache_bin_t *bin, cache_bin_info_t *info) {
+cache_bin_low_bits_low_bound_get(cache_bin_t *bin) {
 	return (uint16_t)bin->low_bits_empty -
-	    cache_bin_info_ncached_max_get(bin, info) * sizeof(void *);
+	    cache_bin_ncached_max_get(bin) * sizeof(void *);
 }
 
 /*
@@ -315,8 +315,8 @@ cache_bin_low_bits_low_bound_get(cache_bin_t *bin, cache_bin_info_t *info) {
  * A pointer to the position with the lowest address of the backing array.
  */
 static inline void **
-cache_bin_low_bound_get(cache_bin_t *bin, cache_bin_info_t *info) {
-	cache_bin_sz_t ncached_max = cache_bin_info_ncached_max_get(bin, info);
+cache_bin_low_bound_get(cache_bin_t *bin) {
+	cache_bin_sz_t ncached_max = cache_bin_ncached_max_get(bin);
 	void **ret = cache_bin_empty_position_get(bin) - ncached_max;
 	assert(ret <= bin->stack_head);
 
@@ -328,8 +328,8 @@ cache_bin_low_bound_get(cache_bin_t *bin, cache_bin_info_t *info) {
  * batch fill a nonempty cache bin.
  */
 static inline void
-cache_bin_assert_empty(cache_bin_t *bin, cache_bin_info_t *info) {
-	assert(cache_bin_ncached_get_local(bin, info) == 0);
+cache_bin_assert_empty(cache_bin_t *bin) {
+	assert(cache_bin_ncached_get_local(bin) == 0);
 	assert(cache_bin_empty_position_get(bin) == bin->stack_head);
 }
 
@@ -346,10 +346,10 @@ cache_bin_low_water_get_internal(cache_bin_t *bin) {
 
 /* Returns the numeric value of low water in [0, ncached]. */
 static inline cache_bin_sz_t
-cache_bin_low_water_get(cache_bin_t *bin, cache_bin_info_t *info) {
+cache_bin_low_water_get(cache_bin_t *bin) {
 	cache_bin_sz_t low_water = cache_bin_low_water_get_internal(bin);
-	assert(low_water <= cache_bin_info_ncached_max_get(bin, info));
-	assert(low_water <= cache_bin_ncached_get_local(bin, info));
+	assert(low_water <= cache_bin_ncached_max_get(bin));
+	assert(low_water <= cache_bin_ncached_get_local(bin));
 
 	cache_bin_assert_earlier(bin, (uint16_t)(uintptr_t)bin->stack_head,
 	    bin->low_bits_low_water);
@@ -530,17 +530,16 @@ cache_bin_stash(cache_bin_t *bin, void *ptr) {
 
 /* Get the number of stashed pointers. */
 JEMALLOC_ALWAYS_INLINE cache_bin_sz_t
-cache_bin_nstashed_get_internal(cache_bin_t *bin, cache_bin_info_t *info) {
-	cache_bin_sz_t ncached_max = cache_bin_info_ncached_max_get(bin, info);
-	uint16_t low_bits_low_bound = cache_bin_low_bits_low_bound_get(bin,
-	    info);
+cache_bin_nstashed_get_internal(cache_bin_t *bin) {
+	cache_bin_sz_t ncached_max = cache_bin_ncached_max_get(bin);
+	uint16_t low_bits_low_bound = cache_bin_low_bits_low_bound_get(bin);
 
 	cache_bin_sz_t n = cache_bin_diff(bin, low_bits_low_bound,
 	    bin->low_bits_full) / sizeof(void *);
 	assert(n <= ncached_max);
 	if (config_debug && n != 0) {
 		/* Below are for assertions only. */
-		void **low_bound = cache_bin_low_bound_get(bin, info);
+		void **low_bound = cache_bin_low_bound_get(bin);
 
 		assert((uint16_t)(uintptr_t)low_bound == low_bits_low_bound);
 		void *stashed = *(low_bound + n - 1);
@@ -556,9 +555,9 @@ cache_bin_nstashed_get_internal(cache_bin_t *bin, cache_bin_info_t *info) {
 }
 
 JEMALLOC_ALWAYS_INLINE cache_bin_sz_t
-cache_bin_nstashed_get_local(cache_bin_t *bin, cache_bin_info_t *info) {
-	cache_bin_sz_t n = cache_bin_nstashed_get_internal(bin, info);
-	assert(n <= cache_bin_info_ncached_max_get(bin, info));
+cache_bin_nstashed_get_local(cache_bin_t *bin) {
+	cache_bin_sz_t n = cache_bin_nstashed_get_internal(bin);
+	assert(n <= cache_bin_ncached_max_get(bin));
 	return n;
 }
 
@@ -579,12 +578,12 @@ cache_bin_nstashed_get_local(cache_bin_t *bin, cache_bin_info_t *info) {
  * This function should not call other utility functions because the racy
  * condition may cause unexpected / undefined behaviors in unverified utility
  * functions.  Currently, this function calls two utility functions
- * cache_bin_info_ncached_max_get and cache_bin_low_bits_low_bound_get because
+ * cache_bin_ncached_max_get and cache_bin_low_bits_low_bound_get because
  * they help access values that will not be concurrently modified.
  */
 static inline void
-cache_bin_nitems_get_remote(cache_bin_t *bin, cache_bin_info_t *info,
-    cache_bin_sz_t *ncached, cache_bin_sz_t *nstashed) {
+cache_bin_nitems_get_remote(cache_bin_t *bin, cache_bin_sz_t *ncached,
+    cache_bin_sz_t *nstashed) {
 	/* Racy version of cache_bin_ncached_get_internal. */
 	cache_bin_sz_t diff = bin->low_bits_empty -
 	    (uint16_t)(uintptr_t)bin->stack_head;
@@ -592,8 +591,7 @@ cache_bin_nitems_get_remote(cache_bin_t *bin, cache_bin_info_t *info,
 	*ncached = n;
 
 	/* Racy version of cache_bin_nstashed_get_internal. */
-	uint16_t low_bits_low_bound = cache_bin_low_bits_low_bound_get(bin,
-	    info);
+	uint16_t low_bits_low_bound = cache_bin_low_bits_low_bound_get(bin);
 	n = (bin->low_bits_full - low_bits_low_bound) / sizeof(void *);
 	*nstashed = n;
 	/*
@@ -643,9 +641,9 @@ struct cache_bin_ptr_array_s {
  * finish_fill call before doing any alloc/dalloc operations on the bin.
  */
 static inline void
-cache_bin_init_ptr_array_for_fill(cache_bin_t *bin, cache_bin_info_t *info,
-    cache_bin_ptr_array_t *arr, cache_bin_sz_t nfill) {
-	cache_bin_assert_empty(bin, info);
+cache_bin_init_ptr_array_for_fill(cache_bin_t *bin, cache_bin_ptr_array_t *arr,
+    cache_bin_sz_t nfill) {
+	cache_bin_assert_empty(bin);
 	arr->ptr = cache_bin_empty_position_get(bin) - nfill;
 }
 
@@ -655,9 +653,9 @@ cache_bin_init_ptr_array_for_fill(cache_bin_t *bin, cache_bin_info_t *info,
  * case of OOM.
  */
 static inline void
-cache_bin_finish_fill(cache_bin_t *bin, cache_bin_info_t *info,
-    cache_bin_ptr_array_t *arr, cache_bin_sz_t nfilled) {
-	cache_bin_assert_empty(bin, info);
+cache_bin_finish_fill(cache_bin_t *bin, cache_bin_ptr_array_t *arr,
+    cache_bin_sz_t nfilled) {
+	cache_bin_assert_empty(bin);
 	void **empty_position = cache_bin_empty_position_get(bin);
 	if (nfilled < arr->n) {
 		memmove(empty_position - nfilled, empty_position - arr->n,
@@ -671,17 +669,17 @@ cache_bin_finish_fill(cache_bin_t *bin, cache_bin_info_t *info,
  * everything we give them.
  */
 static inline void
-cache_bin_init_ptr_array_for_flush(cache_bin_t *bin, cache_bin_info_t *info,
+cache_bin_init_ptr_array_for_flush(cache_bin_t *bin,
     cache_bin_ptr_array_t *arr, cache_bin_sz_t nflush) {
 	arr->ptr = cache_bin_empty_position_get(bin) - nflush;
-	assert(cache_bin_ncached_get_local(bin, info) == 0
+	assert(cache_bin_ncached_get_local(bin) == 0
 	    || *arr->ptr != NULL);
 }
 
 static inline void
-cache_bin_finish_flush(cache_bin_t *bin, cache_bin_info_t *info,
-    cache_bin_ptr_array_t *arr, cache_bin_sz_t nflushed) {
-	unsigned rem = cache_bin_ncached_get_local(bin, info) - nflushed;
+cache_bin_finish_flush(cache_bin_t *bin, cache_bin_ptr_array_t *arr,
+    cache_bin_sz_t nflushed) {
+	unsigned rem = cache_bin_ncached_get_local(bin) - nflushed;
 	memmove(bin->stack_head + nflushed, bin->stack_head,
 	    rem * sizeof(void *));
 	bin->stack_head += nflushed;
@@ -690,23 +688,22 @@ cache_bin_finish_flush(cache_bin_t *bin, cache_bin_info_t *info,
 
 static inline void
 cache_bin_init_ptr_array_for_stashed(cache_bin_t *bin, szind_t binind,
-    cache_bin_info_t *info, cache_bin_ptr_array_t *arr,
-    cache_bin_sz_t nstashed) {
+    cache_bin_ptr_array_t *arr, cache_bin_sz_t nstashed) {
 	assert(nstashed > 0);
-	assert(cache_bin_nstashed_get_local(bin, info) == nstashed);
+	assert(cache_bin_nstashed_get_local(bin) == nstashed);
 
-	void **low_bound = cache_bin_low_bound_get(bin, info);
+	void **low_bound = cache_bin_low_bound_get(bin);
 	arr->ptr = low_bound;
 	assert(*arr->ptr != NULL);
 }
 
 static inline void
-cache_bin_finish_flush_stashed(cache_bin_t *bin, cache_bin_info_t *info) {
-	void **low_bound = cache_bin_low_bound_get(bin, info);
+cache_bin_finish_flush_stashed(cache_bin_t *bin) {
+	void **low_bound = cache_bin_low_bound_get(bin);
 
 	/* Reset the bin local full position. */
 	bin->low_bits_full = (uint16_t)(uintptr_t)low_bound;
-	assert(cache_bin_nstashed_get_local(bin, info) == 0);
+	assert(cache_bin_nstashed_get_local(bin) == 0);
 }
 
 /*
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 05599a5b..e8e3b41f 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -200,8 +200,7 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 			arena_dalloc_small(tsd_tsdn(tsd), ptr);
 			return;
 		}
-		cache_bin_sz_t max = cache_bin_info_ncached_max_get(
-		    bin, &bin->bin_info);
+		cache_bin_sz_t max = cache_bin_ncached_max_get(bin);
 		unsigned remain = max >> opt_lg_tcache_flush_small_div;
 		tcache_bin_flush_small(tsd, tcache, bin, binind, remain);
 		bool ret = cache_bin_dalloc_easy(bin, ptr);
@@ -221,8 +220,8 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 
 	cache_bin_t *bin = &tcache->bins[binind];
 	if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
-		unsigned remain = cache_bin_info_ncached_max_get(
-		    bin, &bin->bin_info) >> opt_lg_tcache_flush_large_div;
+		unsigned remain = cache_bin_ncached_max_get(bin) >>
+		    opt_lg_tcache_flush_large_div;
 		tcache_bin_flush_large(tsd, tcache, bin, binind, remain);
 		bool ret = cache_bin_dalloc_easy(bin, ptr);
 		assert(ret);
diff --git a/src/arena.c b/src/arena.c
index b4ead26a..4e923015 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -168,8 +168,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 			}
 
 			cache_bin_sz_t ncached, nstashed;
-			cache_bin_nitems_get_remote(cache_bin,
-			    &cache_bin->bin_info, &ncached, &nstashed);
+			cache_bin_nitems_get_remote(cache_bin, &ncached, &nstashed);
 			astats->tcache_bytes += ncached * sz_index2size(i);
 			astats->tcache_stashed_bytes += nstashed *
 			    sz_index2size(i);
@@ -1020,16 +1019,14 @@ arena_bin_choose(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 
 void
 arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena,
-    cache_bin_t *cache_bin, cache_bin_info_t *cache_bin_info, szind_t binind,
-    const unsigned nfill) {
-	assert(cache_bin_ncached_get_local(cache_bin, cache_bin_info) == 0);
+    cache_bin_t *cache_bin, szind_t binind, const unsigned nfill) {
+	assert(cache_bin_ncached_get_local(cache_bin) == 0);
 	assert(nfill != 0);
 
 	const bin_info_t *bin_info = &bin_infos[binind];
 
 	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nfill);
-	cache_bin_init_ptr_array_for_fill(cache_bin, cache_bin_info, &ptrs,
-	    nfill);
+	cache_bin_init_ptr_array_for_fill(cache_bin, &ptrs, nfill);
 	/*
 	 * Bin-local resources are used first: 1) bin->slabcur, and 2) nonfull
 	 * slabs.  After both are exhausted, new slabs will be allocated through
@@ -1143,7 +1140,7 @@ label_refill:
 		fresh_slab = NULL;
 	}
 
-	cache_bin_finish_fill(cache_bin, cache_bin_info, &ptrs, filled);
+	cache_bin_finish_fill(cache_bin, &ptrs, filled);
 	arena_decay_tick(tsdn, arena);
 }
 
diff --git a/src/cache_bin.c b/src/cache_bin.c
index 0454dbde..6438705f 100644
--- a/src/cache_bin.c
+++ b/src/cache_bin.c
@@ -100,7 +100,7 @@ cache_bin_init(cache_bin_t *bin, const cache_bin_info_t *info, void *alloc,
 	    bin->low_bits_full, (uint16_t)(uintptr_t)bin->stack_head);
 	assert(free_spots == bin_stack_size);
 	if (!cache_bin_disabled(bin)) {
-		assert(cache_bin_ncached_get_local(bin, &bin->bin_info) == 0);
+		assert(cache_bin_ncached_get_local(bin) == 0);
 	}
 	assert(cache_bin_empty_position_get(bin) == empty_position);
 
diff --git a/src/tcache.c b/src/tcache.c
index d57574ca..015bdb11 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -142,10 +142,8 @@ tcache_gc_small(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 
 	cache_bin_t *cache_bin = &tcache->bins[szind];
 	assert(!tcache_bin_disabled(szind, cache_bin, tcache->tcache_slow));
-	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin,
-	    &cache_bin->bin_info);
-	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin,
-	    &cache_bin->bin_info);
+	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin);
+	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin);
 	assert(!tcache_slow->bin_refilled[szind]);
 
 	size_t nflush = low_water - (low_water >> 2);
@@ -168,8 +166,8 @@ tcache_gc_small(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	 * Reduce fill count by 2X.  Limit lg_fill_div such that
 	 * the fill count is always at least 1.
 	 */
-	if ((cache_bin_info_ncached_max_get(cache_bin, &cache_bin->bin_info)
-	    >> (tcache_slow->lg_fill_div[szind] + 1)) >= 1) {
+	if ((cache_bin_ncached_max_get(cache_bin) >>
+	    (tcache_slow->lg_fill_div[szind] + 1)) >= 1) {
 		tcache_slow->lg_fill_div[szind]++;
 	}
 }
@@ -181,10 +179,8 @@ tcache_gc_large(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	assert(szind >= SC_NBINS);
 	cache_bin_t *cache_bin = &tcache->bins[szind];
 	assert(!tcache_bin_disabled(szind, cache_bin, tcache->tcache_slow));
-	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin,
-	    &cache_bin->bin_info);
-	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin,
-	    &cache_bin->bin_info);
+	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin);
+	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin);
 	tcache_bin_flush_large(tsd, tcache, cache_bin, szind,
 	    (unsigned)(ncached - low_water + (low_water >> 2)));
 }
@@ -206,8 +202,7 @@ tcache_event(tsd_t *tsd) {
 	}
 
 	tcache_bin_flush_stashed(tsd, tcache, cache_bin, szind, is_small);
-	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin,
-	    &cache_bin->bin_info);
+	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin);
 	if (low_water > 0) {
 		if (is_small) {
 			tcache_gc_small(tsd, tcache_slow, tcache, szind);
@@ -255,13 +250,12 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena,
 
 	assert(tcache_slow->arena != NULL);
 	assert(!tcache_bin_disabled(binind, cache_bin, tcache_slow));
-	unsigned nfill = cache_bin_info_ncached_max_get(cache_bin,
-	    &cache_bin->bin_info) >> tcache_slow->lg_fill_div[binind];
+	unsigned nfill = cache_bin_ncached_max_get(cache_bin)
+	    >> tcache_slow->lg_fill_div[binind];
 	if (nfill == 0) {
 		nfill = 1;
 	}
-	arena_cache_bin_fill_small(tsdn, arena, cache_bin,
-	    &cache_bin->bin_info, binind, nfill);
+	arena_cache_bin_fill_small(tsdn, arena, cache_bin, binind, nfill);
 	tcache_slow->bin_refilled[binind] = true;
 	ret = cache_bin_alloc(cache_bin, tcache_success);
 
@@ -533,20 +527,17 @@ tcache_bin_flush_bottom(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 	assert(!tcache_bin_disabled(binind, cache_bin, tcache->tcache_slow));
 	tcache_bin_flush_stashed(tsd, tcache, cache_bin, binind, small);
 
-	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin,
-	    &cache_bin->bin_info);
+	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin);
 	assert((cache_bin_sz_t)rem <= ncached);
 	unsigned nflush = ncached - rem;
 
 	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nflush);
-	cache_bin_init_ptr_array_for_flush(cache_bin, &cache_bin->bin_info,
-	    &ptrs, nflush);
+	cache_bin_init_ptr_array_for_flush(cache_bin, &ptrs, nflush);
 
 	tcache_bin_flush_impl(tsd, tcache, cache_bin, binind, &ptrs, nflush,
 	    small);
 
-	cache_bin_finish_flush(cache_bin, &cache_bin->bin_info, &ptrs,
-	    ncached - rem);
+	cache_bin_finish_flush(cache_bin, &ptrs, ncached - rem);
 }
 
 void
@@ -575,33 +566,30 @@ void
 tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
     szind_t binind, bool is_small) {
 	assert(!tcache_bin_disabled(binind, cache_bin, tcache->tcache_slow));
-	cache_bin_info_t *info = &cache_bin->bin_info;
 	/*
 	 * The two below are for assertion only.  The content of original cached
 	 * items remain unchanged -- the stashed items reside on the other end
 	 * of the stack.  Checking the stack head and ncached to verify.
 	 */
 	void *head_content = *cache_bin->stack_head;
-	cache_bin_sz_t orig_cached = cache_bin_ncached_get_local(cache_bin,
-	    info);
+	cache_bin_sz_t orig_cached = cache_bin_ncached_get_local(cache_bin);
 
-	cache_bin_sz_t nstashed = cache_bin_nstashed_get_local(cache_bin, info);
-	assert(orig_cached + nstashed <=
-	    cache_bin_info_ncached_max_get(cache_bin, info));
+	cache_bin_sz_t nstashed = cache_bin_nstashed_get_local(cache_bin);
+	assert(orig_cached + nstashed <= cache_bin_ncached_max_get(cache_bin));
 	if (nstashed == 0) {
 		return;
 	}
 
 	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nstashed);
-	cache_bin_init_ptr_array_for_stashed(cache_bin, binind, info, &ptrs,
+	cache_bin_init_ptr_array_for_stashed(cache_bin, binind, &ptrs,
 	    nstashed);
 	san_check_stashed_ptrs(ptrs.ptr, nstashed, sz_index2size(binind));
 	tcache_bin_flush_impl(tsd, tcache, cache_bin, binind, &ptrs, nstashed,
 	    is_small);
-	cache_bin_finish_flush_stashed(cache_bin, info);
+	cache_bin_finish_flush_stashed(cache_bin);
 
-	assert(cache_bin_nstashed_get_local(cache_bin, info) == 0);
-	assert(cache_bin_ncached_get_local(cache_bin, info) == orig_cached);
+	assert(cache_bin_nstashed_get_local(cache_bin) == 0);
+	assert(cache_bin_ncached_get_local(cache_bin) == orig_cached);
 	assert(head_content == *cache_bin->stack_head);
 }
 
@@ -633,7 +621,7 @@ tcache_bin_ncached_max_read(tsd_t *tsd, size_t bin_size,
 
 	cache_bin_t *bin = &tcache->bins[bin_ind];
 	*ncached_max = tcache_bin_disabled(bin_ind, bin, tcache->tcache_slow) ?
-	    0: cache_bin_info_ncached_max_get(bin, &bin->bin_info);
+	    0: cache_bin_ncached_max_get(bin);
 	return false;
 }
 
@@ -1105,7 +1093,7 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) {
 
 	if (tsd_tcache) {
 		cache_bin_t *cache_bin = &tcache->bins[0];
-		cache_bin_assert_empty(cache_bin, &cache_bin->bin_info);
+		cache_bin_assert_empty(cache_bin);
 	}
 	if (tsd_tcache && cache_bin_stack_use_thp()) {
 		b0_dalloc_tcache_stack(tsd_tsdn(tsd), tcache_slow->dyn_alloc);
diff --git a/test/unit/cache_bin.c b/test/unit/cache_bin.c
index aed34585..1bb750d7 100644
--- a/test/unit/cache_bin.c
+++ b/test/unit/cache_bin.c
@@ -1,19 +1,18 @@
 #include "test/jemalloc_test.h"
 
 static void
-do_fill_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
-    cache_bin_sz_t ncached_max, cache_bin_sz_t nfill_attempt,
-    cache_bin_sz_t nfill_succeed) {
+do_fill_test(cache_bin_t *bin, void **ptrs, cache_bin_sz_t ncached_max,
+    cache_bin_sz_t nfill_attempt, cache_bin_sz_t nfill_succeed) {
 	bool success;
 	void *ptr;
-	assert_true(cache_bin_ncached_get_local(bin, info) == 0, "");
+	assert_true(cache_bin_ncached_get_local(bin) == 0, "");
 	CACHE_BIN_PTR_ARRAY_DECLARE(arr, nfill_attempt);
-	cache_bin_init_ptr_array_for_fill(bin, info, &arr, nfill_attempt);
+	cache_bin_init_ptr_array_for_fill(bin, &arr, nfill_attempt);
 	for (cache_bin_sz_t i = 0; i < nfill_succeed; i++) {
 		arr.ptr[i] = &ptrs[i];
 	}
-	cache_bin_finish_fill(bin, info, &arr, nfill_succeed);
-	expect_true(cache_bin_ncached_get_local(bin, info) == nfill_succeed,
+	cache_bin_finish_fill(bin, &arr, nfill_succeed);
+	expect_true(cache_bin_ncached_get_local(bin) == nfill_succeed,
 	    "");
 	cache_bin_low_water_set(bin);
 
@@ -22,18 +21,18 @@ do_fill_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
 		expect_true(success, "");
 		expect_ptr_eq(ptr, (void *)&ptrs[i],
 		    "Should pop in order filled");
-		expect_true(cache_bin_low_water_get(bin, info)
+		expect_true(cache_bin_low_water_get(bin)
 		    == nfill_succeed - i - 1, "");
 	}
-	expect_true(cache_bin_ncached_get_local(bin, info) == 0, "");
-	expect_true(cache_bin_low_water_get(bin, info) == 0, "");
+	expect_true(cache_bin_ncached_get_local(bin) == 0, "");
+	expect_true(cache_bin_low_water_get(bin) == 0, "");
 }
 
 static void
-do_flush_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
-    cache_bin_sz_t nfill, cache_bin_sz_t nflush) {
+do_flush_test(cache_bin_t *bin, void **ptrs, cache_bin_sz_t nfill,
+    cache_bin_sz_t nflush) {
 	bool success;
-	assert_true(cache_bin_ncached_get_local(bin, info) == 0, "");
+	assert_true(cache_bin_ncached_get_local(bin) == 0, "");
 
 	for (cache_bin_sz_t i = 0; i < nfill; i++) {
 		success = cache_bin_dalloc_easy(bin, &ptrs[i]);
@@ -41,30 +40,30 @@ do_flush_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
 	}
 
 	CACHE_BIN_PTR_ARRAY_DECLARE(arr, nflush);
-	cache_bin_init_ptr_array_for_flush(bin, info, &arr, nflush);
+	cache_bin_init_ptr_array_for_flush(bin, &arr, nflush);
 	for (cache_bin_sz_t i = 0; i < nflush; i++) {
 		expect_ptr_eq(arr.ptr[i], &ptrs[nflush - i - 1], "");
 	}
-	cache_bin_finish_flush(bin, info, &arr, nflush);
+	cache_bin_finish_flush(bin, &arr, nflush);
 
-	expect_true(cache_bin_ncached_get_local(bin, info) == nfill - nflush,
+	expect_true(cache_bin_ncached_get_local(bin) == nfill - nflush,
 	    "");
-	while (cache_bin_ncached_get_local(bin, info) > 0) {
+	while (cache_bin_ncached_get_local(bin) > 0) {
 		cache_bin_alloc(bin, &success);
 	}
 }
 
 static void
-do_batch_alloc_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
-    cache_bin_sz_t nfill, size_t batch) {
-	assert_true(cache_bin_ncached_get_local(bin, info) == 0, "");
+do_batch_alloc_test(cache_bin_t *bin, void **ptrs, cache_bin_sz_t nfill,
+    size_t batch) {
+	assert_true(cache_bin_ncached_get_local(bin) == 0, "");
 	CACHE_BIN_PTR_ARRAY_DECLARE(arr, nfill);
-	cache_bin_init_ptr_array_for_fill(bin, info, &arr, nfill);
+	cache_bin_init_ptr_array_for_fill(bin, &arr, nfill);
 	for (cache_bin_sz_t i = 0; i < nfill; i++) {
 		arr.ptr[i] = &ptrs[i];
 	}
-	cache_bin_finish_fill(bin, info, &arr, nfill);
-	assert_true(cache_bin_ncached_get_local(bin, info) == nfill, "");
+	cache_bin_finish_fill(bin, &arr, nfill);
+	assert_true(cache_bin_ncached_get_local(bin) == nfill, "");
 	cache_bin_low_water_set(bin);
 
 	void **out = malloc((batch + 1) * sizeof(void *));
@@ -73,9 +72,9 @@ do_batch_alloc_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
 	for (cache_bin_sz_t i = 0; i < (cache_bin_sz_t)n; i++) {
 		expect_ptr_eq(out[i], &ptrs[i], "");
 	}
-	expect_true(cache_bin_low_water_get(bin, info) == nfill -
+	expect_true(cache_bin_low_water_get(bin) == nfill -
 	    (cache_bin_sz_t)n, "");
-	while (cache_bin_ncached_get_local(bin, info) > 0) {
+	while (cache_bin_ncached_get_local(bin) > 0) {
 		bool success;
 		cache_bin_alloc(bin, &success);
 	}
@@ -106,13 +105,11 @@ TEST_BEGIN(test_cache_bin) {
 	cache_bin_info_init(&info, ncached_max);
 	cache_bin_t bin;
 	test_bin_init(&bin, &info);
-	cache_bin_info_t *bin_info = &bin.bin_info;
 
 	/* Initialize to empty; should then have 0 elements. */
-	expect_d_eq(ncached_max, cache_bin_info_ncached_max_get(&bin,
-	    &bin.bin_info), "");
-	expect_true(cache_bin_ncached_get_local(&bin, bin_info) == 0, "");
-	expect_true(cache_bin_low_water_get(&bin, bin_info) == 0, "");
+	expect_d_eq(ncached_max, cache_bin_ncached_max_get(&bin), "");
+	expect_true(cache_bin_ncached_get_local(&bin) == 0, "");
+	expect_true(cache_bin_low_water_get(&bin) == 0, "");
 
 	ptr = cache_bin_alloc_easy(&bin, &success);
 	expect_false(success, "Shouldn't successfully allocate when empty");
@@ -129,14 +126,14 @@ TEST_BEGIN(test_cache_bin) {
 	void **ptrs = mallocx(sizeof(void *) * (ncached_max + 1), 0);
 	assert_ptr_not_null(ptrs, "Unexpected mallocx failure");
 	for  (cache_bin_sz_t i = 0; i < ncached_max; i++) {
-		expect_true(cache_bin_ncached_get_local(&bin, bin_info) == i, "");
+		expect_true(cache_bin_ncached_get_local(&bin) == i, "");
 		success = cache_bin_dalloc_easy(&bin, &ptrs[i]);
 		expect_true(success,
 		    "Should be able to dalloc into a non-full cache bin.");
-		expect_true(cache_bin_low_water_get(&bin, bin_info) == 0,
+		expect_true(cache_bin_low_water_get(&bin) == 0,
 		    "Pushes and pops shouldn't change low water of zero.");
 	}
-	expect_true(cache_bin_ncached_get_local(&bin, bin_info) == ncached_max,
+	expect_true(cache_bin_ncached_get_local(&bin) == ncached_max,
 	    "");
 	success = cache_bin_dalloc_easy(&bin, &ptrs[ncached_max]);
 	expect_false(success, "Shouldn't be able to dalloc into a full bin.");
@@ -144,9 +141,9 @@ TEST_BEGIN(test_cache_bin) {
 	cache_bin_low_water_set(&bin);
 
 	for (cache_bin_sz_t i = 0; i < ncached_max; i++) {
-		expect_true(cache_bin_low_water_get(&bin, bin_info)
+		expect_true(cache_bin_low_water_get(&bin)
 		    == ncached_max - i, "");
-		expect_true(cache_bin_ncached_get_local(&bin, bin_info)
+		expect_true(cache_bin_ncached_get_local(&bin)
 		    == ncached_max - i, "");
 		/*
 		 * This should fail -- the easy variant can't change the low
@@ -155,9 +152,9 @@ TEST_BEGIN(test_cache_bin) {
 		ptr = cache_bin_alloc_easy(&bin, &success);
 		expect_ptr_null(ptr, "");
 		expect_false(success, "");
-		expect_true(cache_bin_low_water_get(&bin, bin_info)
+		expect_true(cache_bin_low_water_get(&bin)
 		    == ncached_max - i, "");
-		expect_true(cache_bin_ncached_get_local(&bin, bin_info)
+		expect_true(cache_bin_ncached_get_local(&bin)
 		    == ncached_max - i, "");
 
 		/* This should succeed, though. */
@@ -165,13 +162,13 @@ TEST_BEGIN(test_cache_bin) {
 		expect_true(success, "");
 		expect_ptr_eq(ptr, &ptrs[ncached_max - i - 1],
 		    "Alloc should pop in stack order");
-		expect_true(cache_bin_low_water_get(&bin, bin_info)
+		expect_true(cache_bin_low_water_get(&bin)
 		    == ncached_max - i - 1, "");
-		expect_true(cache_bin_ncached_get_local(&bin, bin_info)
+		expect_true(cache_bin_ncached_get_local(&bin)
 		    == ncached_max - i - 1, "");
 	}
 	/* Now we're empty -- all alloc attempts should fail. */
-	expect_true(cache_bin_ncached_get_local(&bin, bin_info) == 0, "");
+	expect_true(cache_bin_ncached_get_local(&bin) == 0, "");
 	ptr = cache_bin_alloc_easy(&bin, &success);
 	expect_ptr_null(ptr, "");
 	expect_false(success, "");
@@ -187,7 +184,7 @@ TEST_BEGIN(test_cache_bin) {
 	for (cache_bin_sz_t i = ncached_max / 2; i < ncached_max; i++) {
 		cache_bin_dalloc_easy(&bin, &ptrs[i]);
 	}
-	expect_true(cache_bin_ncached_get_local(&bin, bin_info) == ncached_max,
+	expect_true(cache_bin_ncached_get_local(&bin) == ncached_max,
 	    "");
 	for (cache_bin_sz_t i = ncached_max - 1; i >= ncached_max / 2; i--) {
 		/*
@@ -204,77 +201,72 @@ TEST_BEGIN(test_cache_bin) {
 	expect_ptr_null(ptr, "");
 
 	/* We're going to test filling -- we must be empty to start. */
-	while (cache_bin_ncached_get_local(&bin, bin_info)) {
+	while (cache_bin_ncached_get_local(&bin)) {
 		cache_bin_alloc(&bin, &success);
 		expect_true(success, "");
 	}
 
 	/* Test fill. */
 	/* Try to fill all, succeed fully. */
-	do_fill_test(&bin, bin_info, ptrs, ncached_max, ncached_max,
+	do_fill_test(&bin, ptrs, ncached_max, ncached_max,
 	    ncached_max);
 	/* Try to fill all, succeed partially. */
-	do_fill_test(&bin, bin_info, ptrs, ncached_max, ncached_max,
+	do_fill_test(&bin, ptrs, ncached_max, ncached_max,
 	    ncached_max / 2);
 	/* Try to fill all, fail completely. */
-	do_fill_test(&bin, bin_info, ptrs, ncached_max, ncached_max, 0);
+	do_fill_test(&bin, ptrs, ncached_max, ncached_max, 0);
 
 	/* Try to fill some, succeed fully. */
-	do_fill_test(&bin, bin_info, ptrs, ncached_max, ncached_max / 2,
+	do_fill_test(&bin, ptrs, ncached_max, ncached_max / 2,
 	    ncached_max / 2);
 	/* Try to fill some, succeed partially. */
-	do_fill_test(&bin, bin_info, ptrs, ncached_max, ncached_max / 2,
+	do_fill_test(&bin, ptrs, ncached_max, ncached_max / 2,
 	    ncached_max / 4);
 	/* Try to fill some, fail completely. */
-	do_fill_test(&bin, bin_info, ptrs, ncached_max, ncached_max / 2, 0);
+	do_fill_test(&bin, ptrs, ncached_max, ncached_max / 2, 0);
 
-	do_flush_test(&bin, bin_info, ptrs, ncached_max, ncached_max);
-	do_flush_test(&bin, bin_info, ptrs, ncached_max, ncached_max / 2);
-	do_flush_test(&bin, bin_info, ptrs, ncached_max, 0);
-	do_flush_test(&bin, bin_info, ptrs, ncached_max / 2, ncached_max / 2);
-	do_flush_test(&bin, bin_info, ptrs, ncached_max / 2, ncached_max / 4);
-	do_flush_test(&bin, bin_info, ptrs, ncached_max / 2, 0);
+	do_flush_test(&bin, ptrs, ncached_max, ncached_max);
+	do_flush_test(&bin, ptrs, ncached_max, ncached_max / 2);
+	do_flush_test(&bin, ptrs, ncached_max, 0);
+	do_flush_test(&bin, ptrs, ncached_max / 2, ncached_max / 2);
+	do_flush_test(&bin, ptrs, ncached_max / 2, ncached_max / 4);
+	do_flush_test(&bin, ptrs, ncached_max / 2, 0);
 
-	do_batch_alloc_test(&bin, bin_info, ptrs, ncached_max, ncached_max);
-	do_batch_alloc_test(&bin, bin_info, ptrs, ncached_max,
-	    ncached_max * 2);
-	do_batch_alloc_test(&bin, bin_info, ptrs, ncached_max,
-	    ncached_max / 2);
-	do_batch_alloc_test(&bin, bin_info, ptrs, ncached_max, 2);
-	do_batch_alloc_test(&bin, bin_info, ptrs, ncached_max, 1);
-	do_batch_alloc_test(&bin, bin_info, ptrs, ncached_max, 0);
-	do_batch_alloc_test(&bin, bin_info, ptrs, ncached_max / 2,
-	    ncached_max / 2);
-	do_batch_alloc_test(&bin, bin_info, ptrs, ncached_max / 2,
-	    ncached_max);
-	do_batch_alloc_test(&bin, bin_info, ptrs, ncached_max / 2,
-	    ncached_max / 4);
-	do_batch_alloc_test(&bin, bin_info, ptrs, ncached_max / 2, 2);
-	do_batch_alloc_test(&bin, bin_info, ptrs, ncached_max / 2, 1);
-	do_batch_alloc_test(&bin, bin_info, ptrs, ncached_max / 2, 0);
-	do_batch_alloc_test(&bin, bin_info, ptrs, 2, ncached_max);
-	do_batch_alloc_test(&bin, bin_info, ptrs, 2, 2);
-	do_batch_alloc_test(&bin, bin_info, ptrs, 2, 1);
-	do_batch_alloc_test(&bin, bin_info, ptrs, 2, 0);
-	do_batch_alloc_test(&bin, bin_info, ptrs, 1, 2);
-	do_batch_alloc_test(&bin, bin_info, ptrs, 1, 1);
-	do_batch_alloc_test(&bin, bin_info, ptrs, 1, 0);
-	do_batch_alloc_test(&bin, bin_info, ptrs, 0, 2);
-	do_batch_alloc_test(&bin, bin_info, ptrs, 0, 1);
-	do_batch_alloc_test(&bin, bin_info, ptrs, 0, 0);
+	do_batch_alloc_test(&bin, ptrs, ncached_max, ncached_max);
+	do_batch_alloc_test(&bin, ptrs, ncached_max, ncached_max * 2);
+	do_batch_alloc_test(&bin, ptrs, ncached_max, ncached_max / 2);
+	do_batch_alloc_test(&bin, ptrs, ncached_max, 2);
+	do_batch_alloc_test(&bin, ptrs, ncached_max, 1);
+	do_batch_alloc_test(&bin, ptrs, ncached_max, 0);
+	do_batch_alloc_test(&bin, ptrs, ncached_max / 2, ncached_max / 2);
+	do_batch_alloc_test(&bin, ptrs, ncached_max / 2, ncached_max);
+	do_batch_alloc_test(&bin, ptrs, ncached_max / 2, ncached_max / 4);
+	do_batch_alloc_test(&bin, ptrs, ncached_max / 2, 2);
+	do_batch_alloc_test(&bin, ptrs, ncached_max / 2, 1);
+	do_batch_alloc_test(&bin, ptrs, ncached_max / 2, 0);
+	do_batch_alloc_test(&bin, ptrs, 2, ncached_max);
+	do_batch_alloc_test(&bin, ptrs, 2, 2);
+	do_batch_alloc_test(&bin, ptrs, 2, 1);
+	do_batch_alloc_test(&bin, ptrs, 2, 0);
+	do_batch_alloc_test(&bin, ptrs, 1, 2);
+	do_batch_alloc_test(&bin, ptrs, 1, 1);
+	do_batch_alloc_test(&bin, ptrs, 1, 0);
+	do_batch_alloc_test(&bin, ptrs, 0, 2);
+	do_batch_alloc_test(&bin, ptrs, 0, 1);
+	do_batch_alloc_test(&bin, ptrs, 0, 0);
 
 	free(ptrs);
 }
 TEST_END
 
 static void
-do_flush_stashed_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
-    cache_bin_sz_t nfill, cache_bin_sz_t nstash) {
-	expect_true(cache_bin_ncached_get_local(bin, info) == 0,
+do_flush_stashed_test(cache_bin_t *bin, void **ptrs, cache_bin_sz_t nfill,
+    cache_bin_sz_t nstash) {
+	expect_true(cache_bin_ncached_get_local(bin) == 0,
 	    "Bin not empty");
-	expect_true(cache_bin_nstashed_get_local(bin, info) == 0,
+	expect_true(cache_bin_nstashed_get_local(bin) == 0,
 	    "Bin not empty");
-	expect_true(nfill + nstash <= info->ncached_max, "Exceeded max");
+	expect_true(nfill + nstash <= bin->bin_info.ncached_max, "Exceeded max");
 
 	bool ret;
 	/* Fill */
@@ -282,7 +274,7 @@ do_flush_stashed_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
 		ret = cache_bin_dalloc_easy(bin, &ptrs[i]);
 		expect_true(ret, "Unexpected fill failure");
 	}
-	expect_true(cache_bin_ncached_get_local(bin, info) == nfill,
+	expect_true(cache_bin_ncached_get_local(bin) == nfill,
 	    "Wrong cached count");
 
 	/* Stash */
@@ -290,10 +282,10 @@ do_flush_stashed_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
 		ret = cache_bin_stash(bin, &ptrs[i + nfill]);
 		expect_true(ret, "Unexpected stash failure");
 	}
-	expect_true(cache_bin_nstashed_get_local(bin, info) == nstash,
+	expect_true(cache_bin_nstashed_get_local(bin) == nstash,
 	    "Wrong stashed count");
 
-	if (nfill + nstash == info->ncached_max) {
+	if (nfill + nstash == bin->bin_info.ncached_max) {
 		ret = cache_bin_dalloc_easy(bin, &ptrs[0]);
 		expect_false(ret, "Should not dalloc into a full bin");
 		ret = cache_bin_stash(bin, &ptrs[0]);
@@ -308,19 +300,19 @@ do_flush_stashed_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
 		expect_true((uintptr_t)ptr < (uintptr_t)&ptrs[nfill],
 		    "Should not alloc stashed ptrs");
 	}
-	expect_true(cache_bin_ncached_get_local(bin, info) == 0,
+	expect_true(cache_bin_ncached_get_local(bin) == 0,
 	    "Wrong cached count");
-	expect_true(cache_bin_nstashed_get_local(bin, info) == nstash,
+	expect_true(cache_bin_nstashed_get_local(bin) == nstash,
 	    "Wrong stashed count");
 
 	cache_bin_alloc(bin, &ret);
 	expect_false(ret, "Should not alloc stashed");
 
 	/* Clear stashed ones */
-	cache_bin_finish_flush_stashed(bin, info);
-	expect_true(cache_bin_ncached_get_local(bin, info) == 0,
+	cache_bin_finish_flush_stashed(bin);
+	expect_true(cache_bin_ncached_get_local(bin) == 0,
 	    "Wrong cached count");
-	expect_true(cache_bin_nstashed_get_local(bin, info) == 0,
+	expect_true(cache_bin_nstashed_get_local(bin) == 0,
 	    "Wrong stashed count");
 
 	cache_bin_alloc(bin, &ret);
@@ -334,7 +326,6 @@ TEST_BEGIN(test_cache_bin_stash) {
 	cache_bin_info_t info;
 	cache_bin_info_init(&info, ncached_max);
 	test_bin_init(&bin, &info);
-	cache_bin_info_t *bin_info = &bin.bin_info;
 
 	/*
 	 * The content of this array is not accessed; instead the interior
@@ -344,9 +335,9 @@ TEST_BEGIN(test_cache_bin_stash) {
 	assert_ptr_not_null(ptrs, "Unexpected mallocx failure");
 	bool ret;
 	for (cache_bin_sz_t i = 0; i < ncached_max; i++) {
-		expect_true(cache_bin_ncached_get_local(&bin, bin_info) ==
+		expect_true(cache_bin_ncached_get_local(&bin) ==
 		    (i / 2 + i % 2), "Wrong ncached value");
-		expect_true(cache_bin_nstashed_get_local(&bin, bin_info) ==
+		expect_true(cache_bin_nstashed_get_local(&bin) ==
 		    i / 2, "Wrong nstashed value");
 		if (i % 2 == 0) {
 			cache_bin_dalloc_easy(&bin, &ptrs[i]);
@@ -369,22 +360,21 @@ TEST_BEGIN(test_cache_bin_stash) {
 			expect_true(diff % 2 == 0, "Should be able to alloc");
 		} else {
 			expect_false(ret, "Should not alloc stashed");
-			expect_true(cache_bin_nstashed_get_local(&bin,
-			    bin_info) == ncached_max / 2,
+			expect_true(cache_bin_nstashed_get_local(&bin) == ncached_max / 2,
 			    "Wrong nstashed value");
 		}
 	}
 
 	test_bin_init(&bin, &info);
-	do_flush_stashed_test(&bin, bin_info, ptrs, ncached_max, 0);
-	do_flush_stashed_test(&bin, bin_info, ptrs, 0, ncached_max);
-	do_flush_stashed_test(&bin, bin_info, ptrs, ncached_max / 2,
+	do_flush_stashed_test(&bin, ptrs, ncached_max, 0);
+	do_flush_stashed_test(&bin, ptrs, 0, ncached_max);
+	do_flush_stashed_test(&bin, ptrs, ncached_max / 2,
 	    ncached_max / 2);
-	do_flush_stashed_test(&bin, bin_info, ptrs, ncached_max / 4,
+	do_flush_stashed_test(&bin, ptrs, ncached_max / 4,
 	    ncached_max / 2);
-	do_flush_stashed_test(&bin, bin_info, ptrs, ncached_max / 2,
+	do_flush_stashed_test(&bin, ptrs, ncached_max / 2,
 	    ncached_max / 4);
-	do_flush_stashed_test(&bin, bin_info, ptrs, ncached_max / 4,
+	do_flush_stashed_test(&bin, ptrs, ncached_max / 4,
 	    ncached_max / 4);
 }
 TEST_END
diff --git a/test/unit/tcache_max.c b/test/unit/tcache_max.c
index c740b5e7..a64fca71 100644
--- a/test/unit/tcache_max.c
+++ b/test/unit/tcache_max.c
@@ -81,8 +81,7 @@ tcache_bytes_read_local(void) {
 		if (tcache_bin_disabled(i, cache_bin, tcache->tcache_slow)) {
 			continue;
 		}
-		cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin,
-		    &cache_bin->bin_info);
+		cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin);
 		tcache_bytes += ncached * sz_index2size(i);
 	}
 	return tcache_bytes;
@@ -370,4 +369,3 @@ main(void) {
 	    test_tcache_max,
 	    test_thread_tcache_max);
 }
-

From eda05b39941c0ff6d5236c845e6bca70324c9a32 Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@meta.com>
Date: Wed, 13 Dec 2023 15:21:09 -0800
Subject: [PATCH 148/395] Fix static analysis warnings.

---
 include/jemalloc/internal/arena_externs.h |  2 +-
 src/arena.c                               |  4 ++--
 src/extent.c                              |  5 +++--
 src/jemalloc.c                            |  8 ++++++--
 src/pac.c                                 |  9 +++++++--
 src/prof.c                                |  3 ++-
 src/prof_data.c                           |  4 ++++
 src/sz.c                                  |  3 ++-
 src/tcache.c                              | 10 ++++++----
 9 files changed, 33 insertions(+), 15 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 7a29fd8b..f91bd888 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -63,7 +63,7 @@ void arena_do_deferred_work(tsdn_t *tsdn, arena_t *arena);
 void arena_reset(tsd_t *tsd, arena_t *arena);
 void arena_destroy(tsd_t *tsd, arena_t *arena);
 void arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena,
-    cache_bin_t *cache_bin, szind_t binind, const unsigned nfill);
+    cache_bin_t *cache_bin, szind_t binind, const cache_bin_sz_t nfill);
 
 void *arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size,
     szind_t ind, bool zero, bool slab);
diff --git a/src/arena.c b/src/arena.c
index 4e923015..746ab328 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1019,7 +1019,7 @@ arena_bin_choose(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 
 void
 arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena,
-    cache_bin_t *cache_bin, szind_t binind, const unsigned nfill) {
+    cache_bin_t *cache_bin, szind_t binind, const cache_bin_sz_t nfill) {
 	assert(cache_bin_ncached_get_local(cache_bin) == 0);
 	assert(nfill != 0);
 
@@ -1056,7 +1056,7 @@ arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena,
 	bool made_progress = true;
 	edata_t *fresh_slab = NULL;
 	bool alloc_and_retry = false;
-	unsigned filled = 0;
+	cache_bin_sz_t filled = 0;
 	unsigned binshard;
 	bin_t *bin = arena_bin_choose(tsdn, arena, binind, &binshard);
 
diff --git a/src/extent.c b/src/extent.c
index 822c6eee..2efc7938 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -201,8 +201,6 @@ ecache_evict(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	 * concurrent operations.
 	 */
 	switch (ecache->state) {
-	case extent_state_active:
-		not_reached();
 	case extent_state_dirty:
 	case extent_state_muzzy:
 		emap_update_edata_state(tsdn, pac->emap, edata,
@@ -211,6 +209,9 @@ ecache_evict(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	case extent_state_retained:
 		extent_deregister(tsdn, pac, edata);
 		break;
+	case extent_state_active:
+	case extent_state_transition:
+	case extent_state_merging:
 	default:
 		not_reached();
 	}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 5da22a53..8fba8878 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -460,8 +460,12 @@ arena_bind(tsd_t *tsd, unsigned ind, bool internal) {
 		tsd_iarena_set(tsd, arena);
 	} else {
 		tsd_arena_set(tsd, arena);
-		unsigned shard = atomic_fetch_add_u(&arena->binshard_next, 1,
-		    ATOMIC_RELAXED);
+		/*
+		 * While shard acts as a random seed, the cast below should
+		 * not make much difference.
+		 */
+		uint8_t shard = (uint8_t)atomic_fetch_add_u(
+		    &arena->binshard_next, 1, ATOMIC_RELAXED);
 		tsd_binshards_t *bins = tsd_binshardsp_get(tsd);
 		for (unsigned i = 0; i < SC_NBINS; i++) {
 			assert(bin_infos[i].n_shards > 0 &&
diff --git a/src/pac.c b/src/pac.c
index 53e3d823..57a0c953 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -29,6 +29,10 @@ pac_decay_data_get(pac_t *pac, extent_state_t state,
 		*r_decay_stats = &pac->stats->decay_muzzy;
 		*r_ecache = &pac->ecache_muzzy;
 		return;
+	case extent_state_active:
+	case extent_state_retained:
+	case extent_state_transition:
+	case extent_state_merging:
 	default:
 		unreachable();
 	}
@@ -385,8 +389,6 @@ pac_decay_stashed(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
 		npurged += npages;
 
 		switch (ecache->state) {
-		case extent_state_active:
-			not_reached();
 		case extent_state_dirty:
 			if (try_muzzy) {
 				err = extent_purge_lazy_wrapper(tsdn, ehooks,
@@ -402,7 +404,10 @@ pac_decay_stashed(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
 			extent_dalloc_wrapper(tsdn, pac, ehooks, edata);
 			nunmapped += npages;
 			break;
+		case extent_state_active:
 		case extent_state_retained:
+		case extent_state_transition:
+		case extent_state_merging:
 		default:
 			not_reached();
 		}
diff --git a/src/prof.c b/src/prof.c
index 52869375..1cf49740 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -277,7 +277,8 @@ prof_sample_new_event_wait(tsd_t *tsd) {
 	 * otherwise bytes_until_sample would be 0 if u is exactly 1.0.
 	 */
 	uint64_t r = prng_lg_range_u64(tsd_prng_statep_get(tsd), 53);
-	double u = (r == 0U) ? 1.0 : (double)r * (1.0/9007199254740992.0L);
+	double u = (r == 0U) ? 1.0 : (double)((long double)r *
+	    (1.0L/9007199254740992.0L));
 	return (uint64_t)(log(u) /
 	    log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample))))
 	    + (uint64_t)1U;
diff --git a/src/prof_data.c b/src/prof_data.c
index 91a9268d..39af0c90 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -709,6 +709,7 @@ prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
 	case prof_tctx_state_purgatory:
 		prof_tctx_merge_gctx(tsdn, tctx, tctx->gctx);
 		break;
+	case prof_tctx_state_initializing:
 	default:
 		not_reached();
 	}
@@ -764,6 +765,7 @@ prof_tctx_finish_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
 	case prof_tctx_state_purgatory:
 		ret = tctx;
 		goto label_return;
+	case prof_tctx_state_initializing:
 	default:
 		not_reached();
 	}
@@ -1393,6 +1395,8 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) {
 		destroy_tctx = false;
 		destroy_gctx = false;
 		break;
+	case prof_tctx_state_initializing:
+	case prof_tctx_state_purgatory:
 	default:
 		not_reached();
 		destroy_tctx = false;
diff --git a/src/sz.c b/src/sz.c
index d3115dda..89def9d5 100644
--- a/src/sz.c
+++ b/src/sz.c
@@ -100,7 +100,8 @@ sz_boot_size2index_tab(const sc_data_t *sc_data) {
 		size_t max_ind = ((sz + (ZU(1) << SC_LG_TINY_MIN) - 1)
 				   >> SC_LG_TINY_MIN);
 		for (; dst_ind <= max_ind && dst_ind < dst_max; dst_ind++) {
-			sz_size2index_tab[dst_ind] = sc_ind;
+			assert(sc_ind < 1 << (sizeof(uint8_t) * 8));
+			sz_size2index_tab[dst_ind] = (uint8_t)sc_ind;
 		}
 	}
 }
diff --git a/src/tcache.c b/src/tcache.c
index 015bdb11..ca0b1acb 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -250,7 +250,7 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena,
 
 	assert(tcache_slow->arena != NULL);
 	assert(!tcache_bin_disabled(binind, cache_bin, tcache_slow));
-	unsigned nfill = cache_bin_ncached_max_get(cache_bin)
+	cache_bin_sz_t nfill = cache_bin_ncached_max_get(cache_bin)
 	    >> tcache_slow->lg_fill_div[binind];
 	if (nfill == 0) {
 		nfill = 1;
@@ -529,7 +529,7 @@ tcache_bin_flush_bottom(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 
 	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin);
 	assert((cache_bin_sz_t)rem <= ncached);
-	unsigned nflush = ncached - rem;
+	cache_bin_sz_t nflush = ncached - (cache_bin_sz_t)rem;
 
 	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nflush);
 	cache_bin_init_ptr_array_for_flush(cache_bin, &ptrs, nflush);
@@ -537,7 +537,8 @@ tcache_bin_flush_bottom(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 	tcache_bin_flush_impl(tsd, tcache, cache_bin, binind, &ptrs, nflush,
 	    small);
 
-	cache_bin_finish_flush(cache_bin, &ptrs, ncached - rem);
+	cache_bin_finish_flush(cache_bin, &ptrs,
+	    ncached - (cache_bin_sz_t)rem);
 }
 
 void
@@ -817,7 +818,8 @@ tcache_bin_info_compute(cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]) {
 		    (unsigned)tcache_get_default_ncached_max()[i].ncached_max:
 		    tcache_ncached_max_compute(i);
 		assert(ncached_max <= CACHE_BIN_NCACHED_MAX);
-		cache_bin_info_init(&tcache_bin_info[i], ncached_max);
+		cache_bin_info_init(&tcache_bin_info[i],
+		    (cache_bin_sz_t)ncached_max);
 	}
 }
 

From f6fe6abdcb5372f0bad1dba0b77dedc8f95c8a78 Mon Sep 17 00:00:00 2001
From: Honggyu Kim <honggyu.kim@sk.com>
Date: Fri, 29 Dec 2023 10:16:39 +0900
Subject: [PATCH 149/395] build: Make autogen.sh accept quoted extra options

The current autogen.sh script doesn't allow receiving quoted extra
options.

If someone wants to pass extra CFLAGS that is split into multiple
options with a whitespace, then a quote is required.

However, the configure inside autogen.sh fails in this case as follows.

  $ ./autogen.sh CFLAGS="-Dmmap=cxl_mmap -Dmunmap=cxl_munmap"
  autoconf
  ./configure --enable-autogen CFLAGS=-Dmmap=cxl_mmap -Dmunmap=cxl_munmap
  configure: error: unrecognized option: `-Dmunmap=cxl_munmap'
  Try `./configure --help' for more information
  Error 0 in ./configure

It's because the quote discarded unexpectedly when calling configure.

This patch is to fix this problem.

Signed-off-by: Honggyu Kim <honggyu.kim@sk.com>
---
 autogen.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/autogen.sh b/autogen.sh
index 75f32da6..c5325fc9 100755
--- a/autogen.sh
+++ b/autogen.sh
@@ -9,8 +9,8 @@ for i in autoconf; do
     fi
 done
 
-echo "./configure --enable-autogen $@"
-./configure --enable-autogen $@
+echo "./configure --enable-autogen \"$@\""
+./configure --enable-autogen "$@"
 if [ $? -ne 0 ]; then
     echo "Error $? in ./configure"
     exit 1

From dfb3260b97a13a90487ec74e495ca4fc684f6a44 Mon Sep 17 00:00:00 2001
From: Connor <zbk602423539@gmail.com>
Date: Mon, 11 Dec 2023 18:25:49 +0800
Subject: [PATCH 150/395] Fix missing cleanup message for collected profiles.

```
sub cleanup {
  unlink($main::tmpfile_sym);
  unlink(keys %main::tempnames);

  # We leave any collected profiles in $HOME/jeprof in case the user wants
  # to look at them later.  We print a message informing them of this.
  if ((scalar(@main::profile_files) > 0) &&
      defined($main::collected_profile)) {
    if (scalar(@main::profile_files) == 1) {
      print STDERR "Dynamically gathered profile is in $main::collected_profile\n";
    }
    print STDERR "If you want to investigate this profile further, you can do:\n";
    print STDERR "\n";
    print STDERR "  jeprof \\\n";
    print STDERR "    $main::prog \\\n";
    print STDERR "    $main::collected_profile\n";
    print STDERR "\n";
  }
}
```
On cleanup, it would print out a message for the collected profile.
If there is only one collected profile, it would pop by L691, then `scalar(@main::profile_files)` would be 0, and no message would be printed.
---
 bin/jeprof.in | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/bin/jeprof.in b/bin/jeprof.in
index f02c1f3e..f6999ece 100644
--- a/bin/jeprof.in
+++ b/bin/jeprof.in
@@ -688,15 +688,15 @@ sub Main() {
   my $symbol_map = {};
 
   # Read one profile, pick the last item on the list
-  my $data = ReadProfile($main::prog, pop(@main::profile_files));
+  my $data = ReadProfile($main::prog, $main::profile_files[0]);
   my $profile = $data->{profile};
   my $pcs = $data->{pcs};
   my $libs = $data->{libs};   # Info about main program and shared libraries
   $symbol_map = MergeSymbols($symbol_map, $data->{symbols});
 
   # Add additional profiles, if available.
-  if (scalar(@main::profile_files) > 0) {
-    foreach my $pname (@main::profile_files) {
+  if (scalar(@main::profile_files) > 1) {
+    foreach my $pname (@main::profile_files[1..$#main::profile_files]) {
       my $data2 = ReadProfile($main::prog, $pname);
       $profile = AddProfile($profile, $data2->{profile});
       $pcs = AddPcs($pcs, $data2->{pcs});

From d284aad0277dd11b7e05dcc0328cc7e6f53c7023 Mon Sep 17 00:00:00 2001
From: Minsoo Choo <minsoochoo0122@proton.me>
Date: Sat, 25 Nov 2023 09:01:29 -0500
Subject: [PATCH 151/395] Test on more FreeBSD versions

Added 14.0-RELEASE
Added 15-CURRENT
Added 14-STABLE
Added 13-STABLE

13.0-RELEASE will be updated when 13.3-RELEASE comes out.
---
 .cirrus.yml | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/.cirrus.yml b/.cirrus.yml
index a68f3dc1..45498fb8 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -27,9 +27,22 @@ task:
         UNCOMMON_CONFIG:
     - env:
         UNCOMMON_CONFIG: --with-lg-page=16 --with-malloc-conf=tcache:false
-  freebsd_instance:
-    matrix:
-      image: freebsd-13-0-release-amd64
+  matrix:
+     - name: 15-CURRENT
+       freebsd_instance:
+         image_family: freebsd-15-0-snap
+     - name: 14-STABLE
+       freebsd_instance:
+         image_family: freebsd-14-0-snap
+     - name: 14.0-RELEASE
+       freebsd_instance:
+         image_family: freebsd-14-0
+     - name: 13-STABLE
+       freebsd_instance:
+         image_family: freebsd-13-2-snap
+     - name: 13.0-RELEASE
+       freebsd_instance:
+         image_family: freebsd-13-0
   install_script:
     - sed -i.bak -e 's,pkg+http://pkg.FreeBSD.org/\${ABI}/quarterly,pkg+http://pkg.FreeBSD.org/\${ABI}/latest,' /etc/pkg/FreeBSD.conf
     - pkg upgrade -y

From 3a6296e1ef2249b5bb0cffb0be47376ea0491aad Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 4 Jan 2024 14:16:00 -0800
Subject: [PATCH 152/395] Disable FreeBSD on Travis CI since it's not working.

Travis CI currently provides only FreeBSD 12 which is EOL.
---
 .travis.yml           | 48 -------------------------------------------
 scripts/gen_travis.py |  6 +++++-
 2 files changed, 5 insertions(+), 49 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 85e0b720..ec1481c4 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -34,54 +34,6 @@ jobs:
     - os: windows
       arch: amd64
       env: CC=cl.exe CXX=cl.exe CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-libunwind"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=tcache:false"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-prof --enable-prof-libunwind"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-lg-page=16 --with-malloc-conf=tcache:false"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-libunwind --with-lg-page=16 --with-malloc-conf=tcache:false"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-prof --enable-prof-libunwind"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=tcache:false"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-prof --enable-prof-libunwind --with-lg-page=16 --with-malloc-conf=tcache:false"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug --enable-prof --enable-prof-libunwind"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug --with-lg-page=16 --with-malloc-conf=tcache:false"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-prof --enable-prof-libunwind --with-lg-page=16 --with-malloc-conf=tcache:false"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug --enable-prof --enable-prof-libunwind --with-lg-page=16 --with-malloc-conf=tcache:false"
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ EXTRA_CFLAGS="-Werror -Wno-array-bounds"
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
index fe4e029f..651006ca 100755
--- a/scripts/gen_travis.py
+++ b/scripts/gen_travis.py
@@ -309,7 +309,11 @@ def main():
     jobs = '\n'.join((
         generate_windows(AMD64),
 
-        generate_freebsd(AMD64),
+        # Travis currently provides only FreeBSD 12.1 which is EOL.  Builds are
+        # not working as of Jan 2024.  Disable the tests for now to avoid the
+        # noise / confusion.
+
+        # generate_freebsd(AMD64),
 
         generate_linux(AMD64),
         generate_linux(PPC64LE),

From 05160258df8a4e34f323b2c6eb1f2c0f59591d05 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 3 Jan 2024 11:59:02 -0800
Subject: [PATCH 153/395] When safety_check_fail, also embed hint msg in the
 abort function name because there are cases only logging crash stack traces.

---
 src/safety_check.c | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/src/safety_check.c b/src/safety_check.c
index 7ffe1f4f..d3f68fbc 100644
--- a/src/safety_check.c
+++ b/src/safety_check.c
@@ -20,6 +20,20 @@ void safety_check_set_abort(safety_check_abort_hook_t abort_fn) {
 	safety_check_abort = abort_fn;
 }
 
+/*
+ * In addition to malloc_write, also embed hint msg in the abort function name
+ * because there are cases only logging crash stack traces.
+ */
+static void
+safety_check_detected_heap_corruption___run_address_sanitizer_build_to_debug(const char *buf) {
+	if (safety_check_abort == NULL) {
+		malloc_write(buf);
+		abort();
+	} else {
+		safety_check_abort(buf);
+	}
+}
+
 void safety_check_fail(const char *format, ...) {
 	char buf[MALLOC_PRINTF_BUFSIZE];
 
@@ -28,10 +42,5 @@ void safety_check_fail(const char *format, ...) {
 	malloc_vsnprintf(buf, MALLOC_PRINTF_BUFSIZE, format, ap);
 	va_end(ap);
 
-	if (safety_check_abort == NULL) {
-		malloc_write(buf);
-		abort();
-	} else {
-		safety_check_abort(buf);
-	}
+	safety_check_detected_heap_corruption___run_address_sanitizer_build_to_debug(buf);
 }

From b1792c80d2870c87af79d64bcca844d19345412d Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@meta.com>
Date: Mon, 4 Dec 2023 14:34:35 -0800
Subject: [PATCH 154/395] Add LOGs when entrying and exiting free and sdallocx.

---
 .../internal/jemalloc_internal_inlines_c.h    |  8 ---
 src/jemalloc.c                                | 17 +++++--
 src/jemalloc_cpp.cpp                          | 51 ++++++++++++++++++-
 3 files changed, 61 insertions(+), 15 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index 8b80e3c1..6dcffac9 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -278,8 +278,6 @@ fastpath_success_finish(tsd_t *tsd, uint64_t allocated_after,
 	if (config_stats) {
 		bin->tstats.nrequests++;
 	}
-
-	LOG("core.malloc.exit", "result: %p", ret);
 }
 
 JEMALLOC_ALWAYS_INLINE bool
@@ -306,7 +304,6 @@ malloc_initialized(void) {
  */
 JEMALLOC_ALWAYS_INLINE void *
 imalloc_fastpath(size_t size, void *(fallback_alloc)(size_t)) {
-	LOG("core.malloc.entry", "size: %zu", size);
 	if (tsd_get_allocates() && unlikely(!malloc_initialized())) {
 		return fallback_alloc(size);
 	}
@@ -578,14 +575,9 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 
 JEMALLOC_ALWAYS_INLINE void JEMALLOC_NOTHROW
 je_sdallocx_noflags(void *ptr, size_t size) {
-        LOG("core.sdallocx.entry", "ptr: %p, size: %zu, flags: 0", ptr,
-                size);
-
         if (!free_fastpath(ptr, size, true)) {
                 sdallocx_default(ptr, size, 0);
         }
-
-        LOG("core.sdallocx.exit", "");
 }
 
 JEMALLOC_ALWAYS_INLINE void JEMALLOC_NOTHROW
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 8fba8878..88436f45 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2730,8 +2730,6 @@ malloc_default(size_t size) {
 		hook_invoke_alloc(hook_alloc_malloc, ret, (uintptr_t)ret, args);
 	}
 
-	LOG("core.malloc.exit", "result: %p", ret);
-
 	return ret;
 }
 
@@ -2744,7 +2742,12 @@ JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
 void JEMALLOC_NOTHROW *
 JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1)
 je_malloc(size_t size) {
-	return imalloc_fastpath(size, &malloc_default);
+	LOG("core.malloc.entry", "size: %zu", size);
+
+	void * ret = imalloc_fastpath(size, &malloc_default);
+
+	LOG("core.malloc.exit", "result: %p", ret);
+	return ret;
 }
 
 JEMALLOC_EXPORT int JEMALLOC_NOTHROW
@@ -2835,7 +2838,7 @@ je_calloc(size_t num, size_t size) {
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
 
-	LOG("core.calloc.entry", "num: %zu, size: %zu\n", num, size);
+	LOG("core.calloc.entry", "num: %zu, size: %zu", num, size);
 
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
@@ -3014,7 +3017,11 @@ je_free(void *ptr) {
 
 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
 je_free_sized(void *ptr, size_t size) {
-	return je_sdallocx_noflags(ptr, size);
+	LOG("core.free_sized.entry", "ptr: %p, size: %zu", ptr, size);
+
+	je_sdallocx_noflags(ptr, size);
+
+	LOG("core.free_sized.exit", "");
 }
 
 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
diff --git a/src/jemalloc_cpp.cpp b/src/jemalloc_cpp.cpp
index 08107a8a..fffd6aee 100644
--- a/src/jemalloc_cpp.cpp
+++ b/src/jemalloc_cpp.cpp
@@ -112,7 +112,12 @@ template <bool IsNoExcept>
 JEMALLOC_ALWAYS_INLINE
 void *
 newImpl(std::size_t size) noexcept(IsNoExcept) {
-	return imalloc_fastpath(size, &fallbackNewImpl<IsNoExcept>);
+	LOG("core.operator_new.entry", "size: %zu", size);
+
+	void * ret = imalloc_fastpath(size, &fallbackNewImpl<IsNoExcept>);
+
+	LOG("core.operator_new.exit", "result: %p", ret);
+	return ret;
 }
 
 void *
@@ -173,21 +178,37 @@ operator new[](std::size_t size, std::align_val_t alignment, const std::nothrow_
 
 void
 operator delete(void *ptr) noexcept {
+	LOG("core.operator_delete.entry", "ptr: %p", ptr);
+
 	je_free_impl(ptr);
+
+	LOG("core.operator_delete.exit", "");
 }
 
 void
 operator delete[](void *ptr) noexcept {
+	LOG("core.operator_delete.entry", "ptr: %p", ptr);
+
 	je_free_impl(ptr);
+
+	LOG("core.operator_delete.exit", "");
 }
 
 void
 operator delete(void *ptr, const std::nothrow_t &) noexcept {
+	LOG("core.operator_delete.entry", "ptr: %p", ptr);
+
 	je_free_impl(ptr);
+
+	LOG("core.operator_delete.exit", "");
 }
 
 void operator delete[](void *ptr, const std::nothrow_t &) noexcept {
+	LOG("core.operator_delete.entry", "ptr: %p", ptr);
+
 	je_free_impl(ptr);
+
+	LOG("core.operator_delete.exit", "");
 }
 
 #if __cpp_sized_deallocation >= 201309
@@ -198,7 +219,11 @@ sizedDeleteImpl(void* ptr, std::size_t size) noexcept {
 	if (unlikely(ptr == nullptr)) {
 		return;
 	}
+	LOG("core.operator_delete.entry", "ptr: %p, size: %zu", ptr, size);
+
 	je_sdallocx_noflags(ptr, size);
+
+	LOG("core.operator_delete.exit", "");
 }
 
 void
@@ -217,34 +242,56 @@ operator delete[](void *ptr, std::size_t size) noexcept {
 
 JEMALLOC_ALWAYS_INLINE
 void
-alignedSizedDeleteImpl(void* ptr, std::size_t size, std::align_val_t alignment) noexcept {
+alignedSizedDeleteImpl(void* ptr, std::size_t size, std::align_val_t alignment)
+    noexcept {
 	if (config_debug) {
 		assert(((size_t)alignment & ((size_t)alignment - 1)) == 0);
 	}
 	if (unlikely(ptr == nullptr)) {
 		return;
 	}
+	LOG("core.operator_delete.entry", "ptr: %p, size: %zu, alignment: %zu",
+	    ptr, size, alignment);
+
 	je_sdallocx_impl(ptr, size, MALLOCX_ALIGN(alignment));
+
+	LOG("core.operator_delete.exit", "");
 }
 
 void
 operator delete(void* ptr, std::align_val_t) noexcept {
+	LOG("core.operator_delete.entry", "ptr: %p", ptr);
+
 	je_free_impl(ptr);
+
+	LOG("core.operator_delete.exit", "");
 }
 
 void
 operator delete[](void* ptr, std::align_val_t) noexcept {
+	LOG("core.operator_delete.entry", "ptr: %p", ptr);
+
 	je_free_impl(ptr);
+
+	LOG("core.operator_delete.exit", "");
 }
 
 void
 operator delete(void* ptr, std::align_val_t, const std::nothrow_t&) noexcept {
+	LOG("core.operator_delete.entry", "ptr: %p", ptr);
+
 	je_free_impl(ptr);
+
+	LOG("core.operator_delete.exit", "");
 }
 
 void
 operator delete[](void* ptr, std::align_val_t, const std::nothrow_t&) noexcept {
+	LOG("core.operator_delete.entry", "ptr: %p", ptr);
+
 	je_free_impl(ptr);
+
+	LOG("core.operator_delete.exit", "");
 }
 
 void

From a2c52674091c53f6af1ac8b7ef8849bc7797a5ad Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 16 Jan 2024 13:07:58 -0800
Subject: [PATCH 155/395] HPA: Allow frequent reused alloc to bypass the
 slab_max_alloc limit, as long as it's within the huge page size.  These
 requests do not concern internal fragmentation with huge pages, since the
 entire range is expected to be accessed.

---
 include/jemalloc/internal/pai.h | 10 ++++++----
 src/hpa.c                       | 25 ++++++++++++++++++++-----
 src/pai.c                       |  7 ++++---
 src/sec.c                       |  7 ++++---
 test/unit/hpa.c                 | 19 ++++++++++++++++---
 test/unit/sec.c                 |  2 +-
 6 files changed, 51 insertions(+), 19 deletions(-)

diff --git a/include/jemalloc/internal/pai.h b/include/jemalloc/internal/pai.h
index dd64ee59..557d30d1 100644
--- a/include/jemalloc/internal/pai.h
+++ b/include/jemalloc/internal/pai.h
@@ -20,7 +20,7 @@ struct pai_s {
 	 * the results are not necessarily zeroed.
 	 */
 	size_t (*alloc_batch)(tsdn_t *tsdn, pai_t *self, size_t size,
-	    size_t nallocs, edata_list_active_t *results,
+	    size_t nallocs, edata_list_active_t *results, bool frequent_reuse,
 	    bool *deferred_work_generated);
 	bool (*expand)(tsdn_t *tsdn, pai_t *self, edata_t *edata,
 	    size_t old_size, size_t new_size, bool zero,
@@ -50,9 +50,10 @@ pai_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment,
 
 static inline size_t
 pai_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
-    edata_list_active_t *results, bool *deferred_work_generated) {
+    edata_list_active_t *results, bool frequent_reuse,
+    bool *deferred_work_generated) {
 	return self->alloc_batch(tsdn, self, size, nallocs, results,
-	    deferred_work_generated);
+	    frequent_reuse, deferred_work_generated);
 }
 
 static inline bool
@@ -91,7 +92,8 @@ pai_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) {
  * each item in the list.
  */
 size_t pai_alloc_batch_default(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t nallocs, edata_list_active_t *results, bool *deferred_work_generated);
+    size_t nallocs, edata_list_active_t *results, bool frequent_reuse,
+    bool *deferred_work_generated);
 /* Ditto, for dalloc. */
 void pai_dalloc_batch_default(tsdn_t *tsdn, pai_t *self,
     edata_list_active_t *list, bool *deferred_work_generated);
diff --git a/src/hpa.c b/src/hpa.c
index ee41994f..99d1f033 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -12,7 +12,8 @@ static edata_t *hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
     size_t alignment, bool zero, bool guarded, bool frequent_reuse,
     bool *deferred_work_generated);
 static size_t hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t nallocs, edata_list_active_t *results, bool *deferred_work_generated);
+    size_t nallocs, edata_list_active_t *results, bool frequent_reuse,
+    bool *deferred_work_generated);
 static bool hpa_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
     size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated);
 static bool hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
@@ -643,7 +644,9 @@ static size_t
 hpa_alloc_batch_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
     size_t nallocs, edata_list_active_t *results,
     bool *deferred_work_generated) {
-	assert(size <= shard->opts.slab_max_alloc);
+	assert(size <= HUGEPAGE);
+	assert(size <= shard->opts.slab_max_alloc ||
+	    size == sz_index2size(sz_size2index(size)));
 	bool oom = false;
 
 	size_t nsuccess = hpa_try_alloc_batch_no_grow(tsdn, shard, size, &oom,
@@ -712,14 +715,26 @@ hpa_from_pai(pai_t *self) {
 
 static size_t
 hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
-    edata_list_active_t *results, bool *deferred_work_generated) {
+    edata_list_active_t *results, bool frequent_reuse,
+    bool *deferred_work_generated) {
 	assert(nallocs > 0);
 	assert((size & PAGE_MASK) == 0);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 	hpa_shard_t *shard = hpa_from_pai(self);
 
-	if (size > shard->opts.slab_max_alloc) {
+	/*
+	 * frequent_use here indicates this request comes from the arena bins,
+	 * in which case it will be split into slabs, and therefore there is no
+	 * intrinsic slack in the allocation (the entire range of allocated size
+	 * will be accessed).
+	 *
+	 * In this case bypass the slab_max_alloc limit (if still within the
+	 * huge page size).  These requests do not concern internal
+	 * fragmentation with huge pages (again, the full size will be used).
+	 */
+	if (!(frequent_reuse && size <= HUGEPAGE) &&
+	    (size > shard->opts.slab_max_alloc)) {
 		return 0;
 	}
 
@@ -771,7 +786,7 @@ hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
 	edata_list_active_t results;
 	edata_list_active_init(&results);
 	size_t nallocs = hpa_alloc_batch(tsdn, self, size, /* nallocs */ 1,
-	    &results, deferred_work_generated);
+	    &results, frequent_reuse, deferred_work_generated);
 	assert(nallocs == 0 || nallocs == 1);
 	edata_t *edata = edata_list_active_first(&results);
 	return edata;
diff --git a/src/pai.c b/src/pai.c
index 45c87729..e8cddfc3 100644
--- a/src/pai.c
+++ b/src/pai.c
@@ -3,12 +3,13 @@
 
 size_t
 pai_alloc_batch_default(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
-    edata_list_active_t *results, bool *deferred_work_generated) {
+    edata_list_active_t *results, bool frequent_reuse,
+    bool *deferred_work_generated) {
 	for (size_t i = 0; i < nallocs; i++) {
 		bool deferred_by_alloc = false;
 		edata_t *edata = pai_alloc(tsdn, self, size, PAGE,
-		    /* zero */ false, /* guarded */ false,
-		    /* frequent_reuse */ false, &deferred_by_alloc);
+		    /* zero */ false, /* guarded */ false, frequent_reuse,
+		    &deferred_by_alloc);
 		*deferred_work_generated |= deferred_by_alloc;
 		if (edata == NULL) {
 			return i;
diff --git a/src/sec.c b/src/sec.c
index df675590..19d69ff4 100644
--- a/src/sec.c
+++ b/src/sec.c
@@ -174,14 +174,15 @@ sec_shard_alloc_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
 
 static edata_t *
 sec_batch_fill_and_alloc(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
-    sec_bin_t *bin, size_t size) {
+    sec_bin_t *bin, size_t size, bool frequent_reuse) {
 	malloc_mutex_assert_not_owner(tsdn, &shard->mtx);
 
 	edata_list_active_t result;
 	edata_list_active_init(&result);
 	bool deferred_work_generated = false;
 	size_t nalloc = pai_alloc_batch(tsdn, sec->fallback, size,
-	    1 + sec->opts.batch_fill_extra, &result, &deferred_work_generated);
+	    1 + sec->opts.batch_fill_extra, &result, frequent_reuse,
+	    &deferred_work_generated);
 
 	edata_t *ret = edata_list_active_first(&result);
 	if (ret != NULL) {
@@ -251,7 +252,7 @@ sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
 	if (edata == NULL) {
 		if (do_batch_fill) {
 			edata = sec_batch_fill_and_alloc(tsdn, sec, shard, bin,
-			    size);
+			    size, frequent_reuse);
 		} else {
 			edata = pai_alloc(tsdn, sec->fallback, size, alignment,
 			    zero, /* guarded */ false, frequent_reuse,
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index 64aef59e..9e3160b4 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -84,12 +84,25 @@ TEST_BEGIN(test_alloc_max) {
 	/* Small max */
 	bool deferred_work_generated = false;
 	edata = pai_alloc(tsdn, &shard->pai, ALLOC_MAX, PAGE, false, false,
-	    false, &deferred_work_generated);
+	     /* frequent_reuse */ false, &deferred_work_generated);
 	expect_ptr_not_null(edata, "Allocation of small max failed");
+
 	edata = pai_alloc(tsdn, &shard->pai, ALLOC_MAX + PAGE, PAGE, false,
-	    false, false, &deferred_work_generated);
+	    false, /* frequent_reuse */ false, &deferred_work_generated);
 	expect_ptr_null(edata, "Allocation of larger than small max succeeded");
 
+	edata = pai_alloc(tsdn, &shard->pai, ALLOC_MAX, PAGE, false,
+	    false, /* frequent_reuse */ true, &deferred_work_generated);
+	expect_ptr_not_null(edata, "Allocation of frequent reused failed");
+
+	edata = pai_alloc(tsdn, &shard->pai, HUGEPAGE, PAGE, false,
+	    false, /* frequent_reuse */ true, &deferred_work_generated);
+	expect_ptr_not_null(edata, "Allocation of frequent reused failed");
+
+	edata = pai_alloc(tsdn, &shard->pai, HUGEPAGE + PAGE, PAGE, false,
+	    false, /* frequent_reuse */ true, &deferred_work_generated);
+	expect_ptr_null(edata, "Allocation of larger than hugepage succeeded");
+
 	destroy_test_data(shard);
 }
 TEST_END
@@ -273,7 +286,7 @@ TEST_BEGIN(test_alloc_dalloc_batch) {
 	edata_list_active_t allocs_list;
 	edata_list_active_init(&allocs_list);
 	size_t nsuccess = pai_alloc_batch(tsdn, &shard->pai, PAGE, NALLOCS / 2,
-	    &allocs_list, &deferred_work_generated);
+	    &allocs_list, /* frequent_reuse */ false, &deferred_work_generated);
 	expect_zu_eq(NALLOCS / 2, nsuccess, "Unexpected oom");
 	for (size_t i = NALLOCS / 2; i < NALLOCS; i++) {
 		allocs[i] = edata_list_active_first(&allocs_list);
diff --git a/test/unit/sec.c b/test/unit/sec.c
index f3ec403d..0b5e1c31 100644
--- a/test/unit/sec.c
+++ b/test/unit/sec.c
@@ -73,7 +73,7 @@ pai_test_allocator_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
 
 static inline size_t
 pai_test_allocator_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t nallocs, edata_list_active_t *results,
+    size_t nallocs, edata_list_active_t *results, bool frequent_reuse,
     bool *deferred_work_generated) {
 	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
 	if (ta->alloc_fail) {

From f96010b7fa8ce5f83802144bdebf2bb7a6679649 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@meta.com>
Date: Tue, 23 Jan 2024 16:21:04 -0800
Subject: [PATCH 156/395] gitignore: Start ignoring clangd dirs.

---
 .gitignore | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.gitignore b/.gitignore
index 0f5e7aae..9180ddf1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -45,6 +45,9 @@
 /src/*.[od]
 /src/*.sym
 
+# These are semantically meaningful for clangd and related tooling.
+/build/
+/.cache/
 compile_commands.json
 /static_analysis_raw_results
 /static_analysis_results

From 6d181bc1b7a99348886984754c6227002ef4542f Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 15 Feb 2024 13:34:05 -0800
Subject: [PATCH 157/395] Fix Cirrus CI.

13.0-RELEASE does not exist anymore.  "The resource
'projects/freebsd-org-cloud-dev/global/images/family/freebsd-13-0' was not
found"
---
 .cirrus.yml | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/.cirrus.yml b/.cirrus.yml
index 45498fb8..63a96d2a 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -40,9 +40,6 @@ task:
      - name: 13-STABLE
        freebsd_instance:
          image_family: freebsd-13-2-snap
-     - name: 13.0-RELEASE
-       freebsd_instance:
-         image_family: freebsd-13-0
   install_script:
     - sed -i.bak -e 's,pkg+http://pkg.FreeBSD.org/\${ABI}/quarterly,pkg+http://pkg.FreeBSD.org/\${ABI}/latest,' /etc/pkg/FreeBSD.conf
     - pkg upgrade -y

From 1aba4f41a3fef53fa913e655444dbba53a0c82df Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 13 Feb 2024 14:37:21 -0800
Subject: [PATCH 158/395] Allow zero sized memalign to pass.

Instead of failing on assertions.  Previously the same change was made for
posix_memalign and aligned_alloc (#1554).  Make memalign behave the same way
even though it's obsolete.
---
 src/jemalloc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 88436f45..7934e767 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -3052,6 +3052,7 @@ je_memalign(size_t alignment, size_t size) {
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
 
+	sopts.bump_empty_aligned_alloc = true;
 	sopts.min_alignment = 1;
 	sopts.oom_string =
 	    "<jemalloc>: Error allocating aligned memory: out of memory\n";

From 373884ab482ad1de4b839e40bd38fd154f324707 Mon Sep 17 00:00:00 2001
From: Shirui Cheng <sherrycheng@meta.com>
Date: Tue, 30 Jan 2024 10:16:28 -0800
Subject: [PATCH 159/395] print out all malloc_conf settings in stats

---
 .../internal/jemalloc_internal_externs.h      |  3 ++
 src/ctl.c                                     | 26 +++++++++++-
 src/jemalloc.c                                | 41 ++++++++++++-------
 src/stats.c                                   | 36 +++++++++++++++-
 test/unit/malloc_conf_2.c                     | 26 +++++++++++-
 5 files changed, 114 insertions(+), 18 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index 64d9aa20..9d7a9048 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -38,6 +38,9 @@ extern atomic_zu_t zero_realloc_count;
 extern bool opt_cache_oblivious;
 extern unsigned opt_debug_double_free_max_scan;
 
+extern const char *opt_malloc_conf_symlink;
+extern const char *opt_malloc_conf_env_var;
+
 /* Escape free-fastpath when ptr & mask == 0 (for sanitization purpose). */
 extern uintptr_t san_cache_bin_nonfast_mask;
 
diff --git a/src/ctl.c b/src/ctl.c
index 93144752..7c349da7 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -159,6 +159,10 @@ CTL_PROTO(opt_prof_sys_thread_name)
 CTL_PROTO(opt_prof_time_res)
 CTL_PROTO(opt_lg_san_uaf_align)
 CTL_PROTO(opt_zero_realloc)
+CTL_PROTO(opt_malloc_conf_symlink)
+CTL_PROTO(opt_malloc_conf_env_var)
+CTL_PROTO(opt_malloc_conf_global_var)
+CTL_PROTO(opt_malloc_conf_global_var_2_conf_harder)
 CTL_PROTO(tcache_create)
 CTL_PROTO(tcache_flush)
 CTL_PROTO(tcache_destroy)
@@ -426,6 +430,14 @@ static const ctl_named_node_t	config_node[] = {
 	{NAME("xmalloc"),	CTL(config_xmalloc)}
 };
 
+static const ctl_named_node_t opt_malloc_conf_node[] = {
+	{NAME("symlink"),	CTL(opt_malloc_conf_symlink)},
+	{NAME("env_var"),	CTL(opt_malloc_conf_env_var)},
+	{NAME("global_var"),	CTL(opt_malloc_conf_global_var)},
+	{NAME("global_var_2_conf_harder"),
+	    CTL(opt_malloc_conf_global_var_2_conf_harder)}
+};
+
 static const ctl_named_node_t opt_node[] = {
 	{NAME("abort"),		CTL(opt_abort)},
 	{NAME("abort_conf"),	CTL(opt_abort_conf)},
@@ -502,7 +514,8 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("lg_san_uaf_align"),	CTL(opt_lg_san_uaf_align)},
 	{NAME("zero_realloc"),	CTL(opt_zero_realloc)},
 	{NAME("debug_double_free_max_scan"),
-		CTL(opt_debug_double_free_max_scan)}
+		CTL(opt_debug_double_free_max_scan)},
+	{NAME("malloc_conf"),	CHILD(named, opt_malloc_conf)}
 };
 
 static const ctl_named_node_t	tcache_node[] = {
@@ -2230,6 +2243,17 @@ CTL_RO_NL_CGEN(config_uaf_detection, opt_lg_san_uaf_align,
 CTL_RO_NL_GEN(opt_zero_realloc,
     zero_realloc_mode_names[opt_zero_realloc_action], const char *)
 
+/* malloc_conf options */
+CTL_RO_NL_CGEN(opt_malloc_conf_symlink, opt_malloc_conf_symlink,
+    opt_malloc_conf_symlink, const char *)
+CTL_RO_NL_CGEN(opt_malloc_conf_env_var, opt_malloc_conf_env_var,
+    opt_malloc_conf_env_var, const char *)
+CTL_RO_NL_CGEN(je_malloc_conf, opt_malloc_conf_global_var, je_malloc_conf,
+    const char *)
+CTL_RO_NL_CGEN(je_malloc_conf_2_conf_harder,
+    opt_malloc_conf_global_var_2_conf_harder, je_malloc_conf_2_conf_harder,
+    const char *)
+
 /******************************************************************************/
 
 static int
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 7934e767..68c0e7eb 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -57,6 +57,9 @@ const char	*je_malloc_conf_2_conf_harder
 #endif
     ;
 
+const char *opt_malloc_conf_symlink = NULL;
+const char *opt_malloc_conf_env_var = NULL;
+
 bool	opt_abort =
 #ifdef JEMALLOC_DEBUG
     true
@@ -955,7 +958,7 @@ malloc_slow_flag_init(void) {
 #define MALLOC_CONF_NSOURCES 5
 
 static const char *
-obtain_malloc_conf(unsigned which_source, char buf[PATH_MAX + 1]) {
+obtain_malloc_conf(unsigned which_source, char readlink_buf[PATH_MAX + 1]) {
 	if (config_debug) {
 		static unsigned read_source = 0;
 		/*
@@ -998,9 +1001,9 @@ obtain_malloc_conf(unsigned which_source, char buf[PATH_MAX + 1]) {
 		 * link's name.
 		 */
 #ifndef JEMALLOC_READLINKAT
-		linklen = readlink(linkname, buf, PATH_MAX);
+		linklen = readlink(linkname, readlink_buf, PATH_MAX);
 #else
-		linklen = readlinkat(AT_FDCWD, linkname, buf, PATH_MAX);
+		linklen = readlinkat(AT_FDCWD, linkname, readlink_buf, PATH_MAX);
 #endif
 		if (linklen == -1) {
 			/* No configuration specified. */
@@ -1009,8 +1012,8 @@ obtain_malloc_conf(unsigned which_source, char buf[PATH_MAX + 1]) {
 			set_errno(saved_errno);
 		}
 #endif
-		buf[linklen] = '\0';
-		ret = buf;
+		readlink_buf[linklen] = '\0';
+		ret = readlink_buf;
 		break;
 	} case 3: {
 		const char *envname =
@@ -1022,10 +1025,7 @@ obtain_malloc_conf(unsigned which_source, char buf[PATH_MAX + 1]) {
 		    ;
 
 		if ((ret = jemalloc_getenv(envname)) != NULL) {
-			/*
-			 * Do nothing; opts is already initialized to the value
-			 * of the MALLOC_CONF environment variable.
-			 */
+			opt_malloc_conf_env_var = ret;
 		} else {
 			/* No configuration specified. */
 			ret = NULL;
@@ -1084,7 +1084,7 @@ validate_hpa_settings(void) {
 static void
 malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
     bool initial_call, const char *opts_cache[MALLOC_CONF_NSOURCES],
-    char buf[PATH_MAX + 1]) {
+    char readlink_buf[PATH_MAX + 1]) {
 	static const char *opts_explain[MALLOC_CONF_NSOURCES] = {
 		"string specified via --with-malloc-conf",
 		"string pointed to by the global variable malloc_conf",
@@ -1101,7 +1101,7 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 	for (i = 0; i < MALLOC_CONF_NSOURCES; i++) {
 		/* Get runtime configuration. */
 		if (initial_call) {
-			opts_cache[i] = obtain_malloc_conf(i, buf);
+			opts_cache[i] = obtain_malloc_conf(i, readlink_buf);
 		}
 		opts = opts_cache[i];
 		if (!initial_call && opt_confirm_conf) {
@@ -1783,13 +1783,13 @@ malloc_conf_init_check_deps(void) {
 }
 
 static void
-malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
+malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
+    char readlink_buf[PATH_MAX + 1]) {
 	const char *opts_cache[MALLOC_CONF_NSOURCES] = {NULL, NULL, NULL, NULL,
 		NULL};
-	char buf[PATH_MAX + 1];
 
 	/* The first call only set the confirm_conf option and opts_cache */
-	malloc_conf_init_helper(NULL, NULL, true, opts_cache, buf);
+	malloc_conf_init_helper(NULL, NULL, true, opts_cache, readlink_buf);
 	malloc_conf_init_helper(sc_data, bin_shard_sizes, false, opts_cache,
 	    NULL);
 	if (malloc_conf_init_check_deps()) {
@@ -1855,7 +1855,9 @@ malloc_init_hard_a0_locked(void) {
 	if (config_prof) {
 		prof_boot0();
 	}
-	malloc_conf_init(&sc_data, bin_shard_sizes);
+	char readlink_buf[PATH_MAX + 1];
+	readlink_buf[0] = '\0';
+	malloc_conf_init(&sc_data, bin_shard_sizes, readlink_buf);
 	san_init(opt_lg_san_uaf_align);
 	sz_boot(&sc_data, opt_cache_oblivious);
 	bin_info_boot(&sc_data, bin_shard_sizes);
@@ -1949,6 +1951,15 @@ malloc_init_hard_a0_locked(void) {
 
 	malloc_init_state = malloc_init_a0_initialized;
 
+	size_t buf_len = strlen(readlink_buf);
+	if (buf_len > 0) {
+		void *readlink_allocated = a0ialloc(buf_len + 1, false, true);
+		if (readlink_allocated != NULL) {
+			memcpy(readlink_allocated, readlink_buf, buf_len + 1);
+			opt_malloc_conf_symlink = readlink_allocated;
+		}
+	}
+
 	return false;
 }
 
diff --git a/src/stats.c b/src/stats.c
index c580b49e..428e8ffb 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1473,6 +1473,40 @@ stats_general_print(emitter_t *emitter) {
 
 	emitter_dict_begin(emitter, "opt", "Run-time option settings");
 
+	/*
+	 * opt.malloc_conf.
+	 *
+	 * Sources are documented in https://jemalloc.net/jemalloc.3.html#tuning
+	 * - (Not Included Here) The string specified via --with-malloc-conf,
+	 *     which is already printed out above as config.malloc_conf
+	 * - (Included) The string pointed to by the global variable malloc_conf
+	 * - (Included) The “name” of the file referenced by the symbolic link
+	 *     named /etc/malloc.conf
+	 * - (Included) The value of the environment variable MALLOC_CONF
+	 * - (Optional, Unofficial) The string pointed to by the global variable
+	 *     malloc_conf_2_conf_harder, which is hidden from the public.
+	 *
+	 * Note: The outputs are strictly ordered by priorities (low -> high).
+	 *
+	 */
+#define MALLOC_CONF_WRITE(name, message)					\
+	if (je_mallctl("opt.malloc_conf."name, (void *)&cpv, &cpsz, NULL, 0) !=	\
+	    0) {								\
+		cpv = "";							\
+	}									\
+	emitter_kv(emitter, name, message, emitter_type_string,	&cpv);
+
+	MALLOC_CONF_WRITE("global_var", "Global variable malloc_conf");
+	MALLOC_CONF_WRITE("symlink", "Symbolic link malloc.conf");
+	MALLOC_CONF_WRITE("env_var", "Environment variable MALLOC_CONF");
+	/* As this config is unofficial, skip the output if it's NULL */
+	if (je_mallctl("opt.malloc_conf.global_var_2_conf_harder",
+	    (void *)&cpv, &cpsz, NULL, 0) == 0) {
+		emitter_kv(emitter, "global_var_2_conf_harder", "Global "
+		    "variable malloc_conf_2_conf_harder", emitter_type_string, &cpv);
+	}
+#undef MALLOC_CONF_WRITE
+
 	OPT_WRITE_BOOL("abort")
 	OPT_WRITE_BOOL("abort_conf")
 	OPT_WRITE_BOOL("cache_oblivious")
@@ -1554,7 +1588,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_CHAR_P("stats_interval_opts")
 	OPT_WRITE_CHAR_P("zero_realloc")
 
-	emitter_dict_end(emitter);
+	emitter_dict_end(emitter); /* Close "opt". */
 
 #undef OPT_WRITE
 #undef OPT_WRITE_MUTABLE
diff --git a/test/unit/malloc_conf_2.c b/test/unit/malloc_conf_2.c
index ecfa4991..9d2c6077 100644
--- a/test/unit/malloc_conf_2.c
+++ b/test/unit/malloc_conf_2.c
@@ -22,8 +22,32 @@ TEST_BEGIN(test_malloc_conf_2) {
 }
 TEST_END
 
+TEST_BEGIN(test_mallctl_global_var) {
+#ifdef _WIN32
+	bool windows = true;
+#else
+	bool windows = false;
+#endif
+	/* Windows doesn't support weak symbol linker trickery. */
+	test_skip_if(windows);
+
+	const char *mc;
+	size_t sz = sizeof(mc);
+	expect_d_eq(mallctl("opt.malloc_conf.global_var",
+	    (void *)&mc, &sz, NULL, 0), 0, "Unexpected mallctl() failure");
+	expect_str_eq(mc, malloc_conf, "Unexpected value for the global variable "
+	    "malloc_conf");
+
+	expect_d_eq(mallctl("opt.malloc_conf.global_var_2_conf_harder",
+	    (void *)&mc, &sz, NULL, 0), 0, "Unexpected mallctl() failure");
+	expect_str_eq(mc, malloc_conf_2_conf_harder, "Unexpected value for the "
+	    "global variable malloc_conf_2_conf_harder");
+}
+TEST_END
+
 int
 main(void) {
 	return test(
-	    test_malloc_conf_2);
+	    test_malloc_conf_2,
+	    test_mallctl_global_var);
 }

From ed9b00a96b25ea24e90875d7a79cdbf3411dd53b Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy@outlook.com>
Date: Mon, 4 Mar 2024 14:50:39 +0800
Subject: [PATCH 160/395] Replace unsigned induction variable with size_t in
 background_threads_enable

This patch avoids unnecessary vectorizations in clang and missed recognition of memset in gcc. See also https://godbolt.org/z/aoeMsjr4c.
---
 src/background_thread.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/background_thread.c b/src/background_thread.c
index 94d91a89..c92fa2bc 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -580,7 +580,7 @@ background_threads_enable(tsd_t *tsd) {
 
 	VARIABLE_ARRAY(bool, marked, max_background_threads);
 	unsigned nmarked;
-	for (unsigned i = 0; i < max_background_threads; i++) {
+	for (size_t i = 0; i < max_background_threads; i++) {
 		marked[i] = false;
 	}
 	nmarked = 0;

From 1978e5cdac731dca43b62e4b03612c0758f7cece Mon Sep 17 00:00:00 2001
From: Minsoo Choo <minsoochoo0122@proton.me>
Date: Sat, 9 Mar 2024 00:46:31 -0500
Subject: [PATCH 161/395] Update acitons/checkout and actions/upload-artifact
 to v4

---
 .github/workflows/check_formatting.yaml | 2 +-
 .github/workflows/static_analysis.yaml  | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/check_formatting.yaml b/.github/workflows/check_formatting.yaml
index f7be77b1..8a10065f 100644
--- a/.github/workflows/check_formatting.yaml
+++ b/.github/workflows/check_formatting.yaml
@@ -5,6 +5,6 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Check out repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
       - name: Check for trailing whitespace
         run: scripts/check_trailing_whitespace.sh
diff --git a/.github/workflows/static_analysis.yaml b/.github/workflows/static_analysis.yaml
index df60b5a1..29e617fc 100644
--- a/.github/workflows/static_analysis.yaml
+++ b/.github/workflows/static_analysis.yaml
@@ -7,7 +7,7 @@ jobs:
       # We build libunwind ourselves because sadly the version
       # provided by Ubuntu via apt-get is much too old.
       - name: Check out libunwind
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           repository: libunwind/libunwind
           path: libunwind
@@ -23,7 +23,7 @@ jobs:
           cd ..
           rm -rf libunwind
       - name: Check out repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
       # We download LLVM directly from the latest stable release
       # on GitHub, because this tends to be much newer than the
       # version available via apt-get in Ubuntu.
@@ -54,7 +54,7 @@ jobs:
           scripts/run_static_analysis.sh static_analysis_results "$GITHUB_OUTPUT"
       - name: Upload static analysis results
         if: ${{ steps.run_static_analysis.outputs.HAS_STATIC_ANALYSIS_RESULTS }} == '1'
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
           name: static_analysis_results
           path: static_analysis_results

From 10d713151d7245ae89657a7002a5988522b7bd7a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Amaury=20S=C3=A9chet?= <deadalnix@gmail.com>
Date: Wed, 25 Oct 2023 01:01:22 +0000
Subject: [PATCH 162/395] Ensure that the root of a heap is always the best
 element.

---
 include/jemalloc/internal/ph.h | 17 +++--------------
 1 file changed, 3 insertions(+), 14 deletions(-)

diff --git a/include/jemalloc/internal/ph.h b/include/jemalloc/internal/ph.h
index 3ae38710..830ccb7e 100644
--- a/include/jemalloc/internal/ph.h
+++ b/include/jemalloc/internal/ph.h
@@ -239,7 +239,7 @@ ph_merge_aux(ph_t *ph, size_t offset, ph_cmp_t cmp) {
 		phn_prev_set(phn, NULL, offset);
 		phn = phn_merge_siblings(phn, offset, cmp);
 		assert(phn_next_get(phn, offset) == NULL);
-		ph->root = phn_merge(ph->root, phn, offset, cmp);
+		phn_merge_ordered(ph->root, phn, offset, cmp);
 	}
 }
 
@@ -380,20 +380,9 @@ ph_remove_first(ph_t *ph, size_t offset, ph_cmp_t cmp) {
 JEMALLOC_ALWAYS_INLINE void
 ph_remove(ph_t *ph, void *phn, size_t offset, ph_cmp_t cmp) {
 	if (ph->root == phn) {
-		/*
-		 * We can delete from aux list without merging it, but we need
-		 * to merge if we are dealing with the root node and it has
-		 * children.
-		 */
-		if (phn_lchild_get(phn, offset) == NULL) {
-			ph->root = phn_next_get(phn, offset);
-			return;
-		}
 		ph_merge_aux(ph, offset, cmp);
-		if (ph->root == phn) {
-			ph->root = ph_merge_children(ph->root, offset, cmp);
-			return;
-		}
+		ph->root = ph_merge_children(phn, offset, cmp);
+		return;
 	}
 
 	void* prev = phn_prev_get(phn, offset);

From 92aa52c0625d35ca1c30e7fc913d7c92c9518f9e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Amaury=20S=C3=A9chet?= <deadalnix@gmail.com>
Date: Wed, 25 Oct 2023 00:36:08 +0000
Subject: [PATCH 163/395] Reduce nesting in phn_merge_siblings using an early
 return.

---
 include/jemalloc/internal/ph.h | 105 +++++++++++++++++----------------
 1 file changed, 54 insertions(+), 51 deletions(-)

diff --git a/include/jemalloc/internal/ph.h b/include/jemalloc/internal/ph.h
index 830ccb7e..ef9634be 100644
--- a/include/jemalloc/internal/ph.h
+++ b/include/jemalloc/internal/ph.h
@@ -162,6 +162,10 @@ phn_merge_siblings(void *phn, size_t offset, ph_cmp_t cmp) {
 	void *phn0 = phn;
 	void *phn1 = phn_next_get(phn0, offset);
 
+	if (phn1 == NULL) {
+		return phn0;
+	}
+
 	/*
 	 * Multipass merge, wherein the first two elements of a FIFO
 	 * are repeatedly merged, and each result is appended to the
@@ -170,62 +174,61 @@ phn_merge_siblings(void *phn, size_t offset, ph_cmp_t cmp) {
 	 * its tail, so we do a single pass over the sibling list to
 	 * populate the FIFO.
 	 */
-	if (phn1 != NULL) {
-		void *phnrest = phn_next_get(phn1, offset);
-		if (phnrest != NULL) {
-			phn_prev_set(phnrest, NULL, offset);
-		}
-		phn_prev_set(phn0, NULL, offset);
-		phn_next_set(phn0, NULL, offset);
-		phn_prev_set(phn1, NULL, offset);
-		phn_next_set(phn1, NULL, offset);
-		phn0 = phn_merge(phn0, phn1, offset, cmp);
-		head = tail = phn0;
-		phn0 = phnrest;
-		while (phn0 != NULL) {
-			phn1 = phn_next_get(phn0, offset);
-			if (phn1 != NULL) {
-				phnrest = phn_next_get(phn1, offset);
-				if (phnrest != NULL) {
-					phn_prev_set(phnrest, NULL, offset);
-				}
-				phn_prev_set(phn0, NULL, offset);
-				phn_next_set(phn0, NULL, offset);
-				phn_prev_set(phn1, NULL, offset);
-				phn_next_set(phn1, NULL, offset);
-				phn0 = phn_merge(phn0, phn1, offset, cmp);
-				/* NOLINTNEXTLINE(readability-suspicious-call-argument) */
-				phn_next_set(tail, phn0, offset);
-				tail = phn0;
-				phn0 = phnrest;
-			} else {
-				/* NOLINTNEXTLINE(readability-suspicious-call-argument) */
-				phn_next_set(tail, phn0, offset);
-				tail = phn0;
-				phn0 = NULL;
-			}
-		}
-		phn0 = head;
+	void *phnrest = phn_next_get(phn1, offset);
+	if (phnrest != NULL) {
+		phn_prev_set(phnrest, NULL, offset);
+	}
+	phn_prev_set(phn0, NULL, offset);
+	phn_next_set(phn0, NULL, offset);
+	phn_prev_set(phn1, NULL, offset);
+	phn_next_set(phn1, NULL, offset);
+	phn0 = phn_merge(phn0, phn1, offset, cmp);
+	head = tail = phn0;
+	phn0 = phnrest;
+	while (phn0 != NULL) {
 		phn1 = phn_next_get(phn0, offset);
 		if (phn1 != NULL) {
-			while (true) {
-				head = phn_next_get(phn1, offset);
-				assert(phn_prev_get(phn0, offset) == NULL);
-				phn_next_set(phn0, NULL, offset);
-				assert(phn_prev_get(phn1, offset) == NULL);
-				phn_next_set(phn1, NULL, offset);
-				phn0 = phn_merge(phn0, phn1, offset, cmp);
-				if (head == NULL) {
-					break;
-				}
-				/* NOLINTNEXTLINE(readability-suspicious-call-argument) */
-				phn_next_set(tail, phn0, offset);
-				tail = phn0;
-				phn0 = head;
-				phn1 = phn_next_get(phn0, offset);
+			phnrest = phn_next_get(phn1, offset);
+			if (phnrest != NULL) {
+				phn_prev_set(phnrest, NULL, offset);
 			}
+			phn_prev_set(phn0, NULL, offset);
+			phn_next_set(phn0, NULL, offset);
+			phn_prev_set(phn1, NULL, offset);
+			phn_next_set(phn1, NULL, offset);
+			phn0 = phn_merge(phn0, phn1, offset, cmp);
+			/* NOLINTNEXTLINE(readability-suspicious-call-argument) */
+			phn_next_set(tail, phn0, offset);
+			tail = phn0;
+			phn0 = phnrest;
+		} else {
+			/* NOLINTNEXTLINE(readability-suspicious-call-argument) */
+			phn_next_set(tail, phn0, offset);
+			tail = phn0;
+			phn0 = NULL;
 		}
 	}
+	phn0 = head;
+	phn1 = phn_next_get(phn0, offset);
+	if (phn1 != NULL) {
+		while (true) {
+			head = phn_next_get(phn1, offset);
+			assert(phn_prev_get(phn0, offset) == NULL);
+			phn_next_set(phn0, NULL, offset);
+			assert(phn_prev_get(phn1, offset) == NULL);
+			phn_next_set(phn1, NULL, offset);
+			phn0 = phn_merge(phn0, phn1, offset, cmp);
+			if (head == NULL) {
+				break;
+			}
+			/* NOLINTNEXTLINE(readability-suspicious-call-argument) */
+			phn_next_set(tail, phn0, offset);
+			tail = phn0;
+			phn0 = head;
+			phn1 = phn_next_get(phn0, offset);
+		}
+	}
+
 	return phn0;
 }
 

From b2e59a96e1ffc953300c5b69ffae934a63de38c0 Mon Sep 17 00:00:00 2001
From: Dmitry Ilvokhin <d@ilvokhin.com>
Date: Mon, 25 Mar 2024 08:02:49 -0700
Subject: [PATCH 164/395] Introduce getters for page allocator shard stats

Access nactive, ndirty and nmuzzy throught getters and not directly.
There are no functional change, but getters are required to propagate
HPA's statistics up to Page Allocator's statitics.
---
 include/jemalloc/internal/pa.h |  4 ++++
 src/pa.c                       |  2 +-
 src/pa_extra.c                 | 25 ++++++++++++++++++++-----
 3 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 5f43244d..75626738 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -224,6 +224,10 @@ void pa_shard_prefork5(tsdn_t *tsdn, pa_shard_t *shard);
 void pa_shard_postfork_parent(tsdn_t *tsdn, pa_shard_t *shard);
 void pa_shard_postfork_child(tsdn_t *tsdn, pa_shard_t *shard);
 
+size_t pa_shard_nactive(pa_shard_t *shard);
+size_t pa_shard_ndirty(pa_shard_t *shard);
+size_t pa_shard_nmuzzy(pa_shard_t *shard);
+
 void pa_shard_basic_stats_merge(pa_shard_t *shard, size_t *nactive,
     size_t *ndirty, size_t *nmuzzy);
 
diff --git a/src/pa.c b/src/pa.c
index ebc6861d..7a24ae65 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -11,7 +11,7 @@ pa_nactive_add(pa_shard_t *shard, size_t add_pages) {
 
 static void
 pa_nactive_sub(pa_shard_t *shard, size_t sub_pages) {
-	assert(atomic_load_zu(&shard->nactive, ATOMIC_RELAXED) >= sub_pages);
+	assert(pa_shard_nactive(shard) >= sub_pages);
 	atomic_fetch_sub_zu(&shard->nactive, sub_pages, ATOMIC_RELAXED);
 }
 
diff --git a/src/pa_extra.c b/src/pa_extra.c
index 0f488be6..ee101891 100644
--- a/src/pa_extra.c
+++ b/src/pa_extra.c
@@ -74,12 +74,27 @@ pa_shard_postfork_child(tsdn_t *tsdn, pa_shard_t *shard) {
 	}
 }
 
+size_t
+pa_shard_nactive(pa_shard_t *shard) {
+	return atomic_load_zu(&shard->nactive, ATOMIC_RELAXED);
+}
+
+size_t
+pa_shard_ndirty(pa_shard_t *shard) {
+	return ecache_npages_get(&shard->pac.ecache_dirty);
+}
+
+size_t
+pa_shard_nmuzzy(pa_shard_t *shard) {
+	return ecache_npages_get(&shard->pac.ecache_muzzy);
+}
+
 void
 pa_shard_basic_stats_merge(pa_shard_t *shard, size_t *nactive, size_t *ndirty,
     size_t *nmuzzy) {
-	*nactive += atomic_load_zu(&shard->nactive, ATOMIC_RELAXED);
-	*ndirty += ecache_npages_get(&shard->pac.ecache_dirty);
-	*nmuzzy += ecache_npages_get(&shard->pac.ecache_muzzy);
+	*nactive += pa_shard_nactive(shard);
+	*ndirty += pa_shard_ndirty(shard);
+	*nmuzzy += pa_shard_nmuzzy(shard);
 }
 
 void
@@ -95,8 +110,8 @@ pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
 	    &shard->edata_cache.count, ATOMIC_RELAXED);
 
 	size_t resident_pgs = 0;
-	resident_pgs += atomic_load_zu(&shard->nactive, ATOMIC_RELAXED);
-	resident_pgs += ecache_npages_get(&shard->pac.ecache_dirty);
+	resident_pgs += pa_shard_nactive(shard);
+	resident_pgs += pa_shard_ndirty(shard);
 	*resident += (resident_pgs << LG_PAGE);
 
 	/* Dirty decay stats */

From 268e8ee880bcb67163eda4c4f43c06697b28a436 Mon Sep 17 00:00:00 2001
From: Dmitry Ilvokhin <d@ilvokhin.com>
Date: Mon, 25 Mar 2024 09:23:03 -0700
Subject: [PATCH 165/395] Include HPA ndirty into page allocator ndirty stat

---
 src/pa_extra.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/pa_extra.c b/src/pa_extra.c
index ee101891..76507039 100644
--- a/src/pa_extra.c
+++ b/src/pa_extra.c
@@ -81,7 +81,11 @@ pa_shard_nactive(pa_shard_t *shard) {
 
 size_t
 pa_shard_ndirty(pa_shard_t *shard) {
-	return ecache_npages_get(&shard->pac.ecache_dirty);
+	size_t ndirty = ecache_npages_get(&shard->pac.ecache_dirty);
+	if (shard->ever_used_hpa) {
+		ndirty += psset_ndirty(&shard->hpa_shard.psset);
+	}
+	return ndirty;
 }
 
 size_t

From 38056fea64c34ca4fef0a16212776eaa4de80b78 Mon Sep 17 00:00:00 2001
From: Juhyung Park <qkrwngud825@gmail.com>
Date: Tue, 19 Mar 2024 16:46:56 +0900
Subject: [PATCH 166/395] Set errno to ENOMEM on rallocx() OOM failures

realloc() and rallocx() shares path, and realloc() should set errno to
ENOMEM upon OOM failures.

Fixes: ee961c23100e ("Merge realloc and rallocx pathways.")
Signed-off-by: Juhyung Park <qkrwngud825@gmail.com>
---
 src/jemalloc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 68c0e7eb..4e77894c 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -3493,6 +3493,7 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 
 	return p;
 label_oom:
+	set_errno(ENOMEM);
 	if (config_xmalloc && unlikely(opt_xmalloc)) {
 		malloc_write("<jemalloc>: Error in rallocx(): out of memory\n");
 		abort();

From 5081c16bb49a0c9d1dde3cbd7dfb2e97c2827ea4 Mon Sep 17 00:00:00 2001
From: Shirui Cheng <sherrycheng@meta.com>
Date: Wed, 3 Apr 2024 13:27:11 -0700
Subject: [PATCH 167/395] Experimental calloc implementation with using memset
 on larger sizes

---
 .../internal/jemalloc_internal_externs.h      |  1 +
 src/arena.c                                   | 35 +++++++++++++++----
 src/jemalloc.c                                |  5 +++
 3 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index 9d7a9048..41c0f366 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -37,6 +37,7 @@ extern const char *const zero_realloc_mode_names[];
 extern atomic_zu_t zero_realloc_count;
 extern bool opt_cache_oblivious;
 extern unsigned opt_debug_double_free_max_scan;
+extern size_t opt_calloc_madvise_threshold;
 
 extern const char *opt_malloc_conf_symlink;
 extern const char *opt_malloc_conf_env_var;
diff --git a/src/arena.c b/src/arena.c
index 746ab328..8c87d67f 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -359,18 +359,39 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 
 	bool guarded = san_large_extent_decide_guard(tsdn,
 	    arena_get_ehooks(arena), esize, alignment);
-	edata_t *edata = pa_alloc(tsdn, &arena->pa_shard, esize, alignment,
-	    /* slab */ false, szind, zero, guarded, &deferred_work_generated);
 
-	if (edata != NULL) {
-		if (config_stats) {
-			arena_large_malloc_stats_update(tsdn, arena, usize);
-		}
+	/*
+	 * - if usize >= opt_calloc_madvise_threshold,
+	 *     - pa_alloc(..., zero_override = zero, ...)
+	 * - otherwise,
+	 *     - pa_alloc(..., zero_override = false, ...)
+	 *     - use memset() to zero out memory if zero == true.
+	 */
+	bool zero_override = zero && (usize >= opt_calloc_madvise_threshold);
+	edata_t *edata = pa_alloc(tsdn, &arena->pa_shard, esize, alignment,
+	    /* slab */ false, szind, zero_override, guarded,
+	    &deferred_work_generated);
+
+	if (edata == NULL) {
+		return NULL;
 	}
 
-	if (edata != NULL && sz_large_pad != 0) {
+	if (config_stats) {
+		arena_large_malloc_stats_update(tsdn, arena, usize);
+	}
+	if (sz_large_pad != 0) {
 		arena_cache_oblivious_randomize(tsdn, arena, edata, alignment);
 	}
+	/*
+	 * This branch should be put after the randomization so that the addr
+	 * returned by edata_addr_get() has already be randomized,
+	 * if cache_oblivious is enabled.
+	 */
+	if (zero && !zero_override && !edata_zeroed_get(edata)) {
+		void *addr = edata_addr_get(edata);
+		size_t usize = edata_usize_get(edata);
+		memset(addr, 0, usize);
+	}
 
 	return edata;
 }
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 4e77894c..0362f539 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -160,6 +160,8 @@ unsigned	ncpus;
 unsigned opt_debug_double_free_max_scan =
     SAFETY_CHECK_DOUBLE_FREE_MAX_SCAN_DEFAULT;
 
+size_t opt_calloc_madvise_threshold = 0;
+
 /* Protects arenas initialization. */
 static malloc_mutex_t arenas_lock;
 
@@ -1453,6 +1455,9 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    "debug_double_free_max_scan", 0, UINT_MAX,
 			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX,
 			    /* clip */ false)
+			CONF_HANDLE_SIZE_T(opt_calloc_madvise_threshold,
+			    "calloc_madvise_threshold", 0, SC_LARGE_MAXCLASS,
+			    CONF_DONT_CHECK_MIN, CONF_CHECK_MAX, /* clip */ false)
 
 			/*
 			 * The runtime option of oversize_threshold remains

From 83b075789b4239035931c1ee212576d00153bbf0 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 5 Apr 2024 11:33:55 -0700
Subject: [PATCH 168/395] rallocx path: only set errno on the realloc case.

---
 src/jemalloc.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 0362f539..d83c191f 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -3498,7 +3498,9 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 
 	return p;
 label_oom:
-	set_errno(ENOMEM);
+	if (is_realloc) {
+		set_errno(ENOMEM);
+	}
 	if (config_xmalloc && unlikely(opt_xmalloc)) {
 		malloc_write("<jemalloc>: Error in rallocx(): out of memory\n");
 		abort();

From 11038ff762a2ba11eec26d3ffb32026424d2ccfe Mon Sep 17 00:00:00 2001
From: Daniel Hodges <hodges.daniel.scott@gmail.com>
Date: Sat, 23 Dec 2023 19:40:44 -0500
Subject: [PATCH 169/395] Add support for namespace pids in heap profile names

This change adds support for writing pid namespaces to the filename of a
heap profile. When running with namespaces pids may reused across
namespaces and if mounts are shared where profiles are written there is
not a great way to differentiate profiles between pids.

Signed-off-by: Daniel Hodges <hodges.daniel.scott@gmail.com>
Signed-off-by: Daniel Hodges <hodgesd@fb.com>
---
 doc/jemalloc.xml.in                      | 17 ++++++
 include/jemalloc/internal/prof_externs.h |  3 +
 src/ctl.c                                |  4 ++
 src/jemalloc.c                           |  1 +
 src/prof.c                               |  1 +
 src/prof_sys.c                           | 76 ++++++++++++++++++++----
 test/unit/mallctl.c                      |  1 +
 7 files changed, 93 insertions(+), 10 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index d0d4b20b..89a176e0 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1514,6 +1514,23 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         by default.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="opt.prof_pid_namespace">
+        <term>
+          <mallctl>opt.prof_pid_namespace</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+          [<option>--enable-prof</option>]
+        </term>
+        <listitem><para>Enable adding the pid namespace to the profile
+        filename. Profiles are dumped to files named according to the pattern
+        <filename>&lt;prefix&gt;.&lt;pid_namespace&gt;.&lt;pid&gt;.&lt;seq&gt;.i&lt;iseq&gt;.heap</filename>,
+        where <literal>&lt;prefix&gt;</literal> is controlled by the <link
+        linkend="opt.prof_prefix"><mallctl>opt.prof_prefix</mallctl></link> and
+        <link linkend="prof.prefix"><mallctl>prof.prefix</mallctl></link>
+        options.
+        </para></listitem>
+      </varlistentry>
+
       <varlistentry id="opt.lg_prof_interval">
         <term>
           <mallctl>opt.lg_prof_interval</mallctl>
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 514c5804..952ace7d 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -26,6 +26,9 @@ extern char opt_prof_prefix[
     1];
 extern bool opt_prof_unbias;
 
+/* Include pid namespace in profile file names. */
+extern bool opt_prof_pid_namespace;
+
 /* For recording recent allocations */
 extern ssize_t opt_prof_recent_alloc_max;
 
diff --git a/src/ctl.c b/src/ctl.c
index 7c349da7..37b69576 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -153,6 +153,7 @@ CTL_PROTO(opt_prof_final)
 CTL_PROTO(opt_prof_leak)
 CTL_PROTO(opt_prof_leak_error)
 CTL_PROTO(opt_prof_accum)
+CTL_PROTO(opt_prof_pid_namespace)
 CTL_PROTO(opt_prof_recent_alloc_max)
 CTL_PROTO(opt_prof_stats)
 CTL_PROTO(opt_prof_sys_thread_name)
@@ -507,6 +508,7 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("prof_leak"),	CTL(opt_prof_leak)},
 	{NAME("prof_leak_error"),	CTL(opt_prof_leak_error)},
 	{NAME("prof_accum"),	CTL(opt_prof_accum)},
+	{NAME("prof_pid_namespace"),	CTL(opt_prof_pid_namespace)},
 	{NAME("prof_recent_alloc_max"),	CTL(opt_prof_recent_alloc_max)},
 	{NAME("prof_stats"),	CTL(opt_prof_stats)},
 	{NAME("prof_sys_thread_name"),	CTL(opt_prof_sys_thread_name)},
@@ -2226,6 +2228,8 @@ CTL_RO_NL_CGEN(config_prof, opt_prof_thread_active_init,
 CTL_RO_NL_CGEN(config_prof, opt_prof_bt_max, opt_prof_bt_max, unsigned)
 CTL_RO_NL_CGEN(config_prof, opt_lg_prof_sample, opt_lg_prof_sample, size_t)
 CTL_RO_NL_CGEN(config_prof, opt_prof_accum, opt_prof_accum, bool)
+CTL_RO_NL_CGEN(config_prof, opt_prof_pid_namespace, opt_prof_pid_namespace,
+    bool)
 CTL_RO_NL_CGEN(config_prof, opt_lg_prof_interval, opt_lg_prof_interval, ssize_t)
 CTL_RO_NL_CGEN(config_prof, opt_prof_gdump, opt_prof_gdump, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_final, opt_prof_final, bool)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index d83c191f..390912ba 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1628,6 +1628,7 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				CONF_HANDLE_BOOL(opt_prof_leak_error,
 				    "prof_leak_error")
 				CONF_HANDLE_BOOL(opt_prof_log, "prof_log")
+				CONF_HANDLE_BOOL(opt_prof_pid_namespace, "prof_pid_namespace")
 				CONF_HANDLE_SSIZE_T(opt_prof_recent_alloc_max,
 				    "prof_recent_alloc_max", -1, SSIZE_MAX)
 				CONF_HANDLE_BOOL(opt_prof_stats, "prof_stats")
diff --git a/src/prof.c b/src/prof.c
index 1cf49740..6ae7f768 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -34,6 +34,7 @@ bool opt_prof_final = false;
 bool opt_prof_leak = false;
 bool opt_prof_leak_error = false;
 bool opt_prof_accum = false;
+bool opt_prof_pid_namespace = false;
 char opt_prof_prefix[PROF_DUMP_FILENAME_LEN];
 bool opt_prof_sys_thread_name = false;
 bool opt_prof_unbias = true;
diff --git a/src/prof_sys.c b/src/prof_sys.c
index c2998926..1e22332c 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -484,6 +484,41 @@ prof_getpid(void) {
 #endif
 }
 
+long
+prof_get_pid_namespace() {
+	long ret = 0;
+
+#if defined(_WIN32) || defined(__APPLE__)
+	// Not supported, do nothing.
+#else
+	char buf[PATH_MAX];
+	const char* linkname =
+#  if defined(__FreeBSD__) || defined(__DragonFly__)
+	    "/proc/curproc/ns/pid"
+#  else
+	    "/proc/self/ns/pid"
+#  endif
+	    ;
+	ssize_t linklen =
+#  ifndef JEMALLOC_READLINKAT
+	readlink(linkname, buf, PATH_MAX)
+#  else
+	readlinkat(AT_FDCWD, linkname, buf, PATH_MAX)
+#  endif
+	    ;
+
+	// namespace string is expected to be like pid:[4026531836]
+	if (linklen > 0) {
+		// Trim the trailing "]"
+		buf[linklen-1] = '\0';
+		char* index = strtok(buf, "pid:[");
+		ret = atol(index);
+	}
+#endif
+
+  return ret;
+}
+
 /*
  * This buffer is rather large for stack allocation, so use a single buffer for
  * all profile dumps; protected by prof_dump_mtx.
@@ -713,15 +748,30 @@ prof_dump_filename(tsd_t *tsd, char *filename, char v, uint64_t vseq) {
 	const char *prefix = prof_prefix_get(tsd_tsdn(tsd));
 
 	if (vseq != VSEQ_INVALID) {
-	        /* "<prefix>.<pid>.<seq>.v<vseq>.heap" */
-		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
-		    "%s.%d.%"FMTu64".%c%"FMTu64".heap", prefix, prof_getpid(),
-		    prof_dump_seq, v, vseq);
+		if (opt_prof_pid_namespace) {
+			/* "<prefix>.<pid_namespace>.<pid>.<seq>.v<vseq>.heap" */
+			malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
+			    "%s.%ld.%d.%"FMTu64".%c%"FMTu64".heap", prefix,
+			    prof_get_pid_namespace(), prof_getpid(), prof_dump_seq, v,
+			    vseq);
+		} else {
+			/* "<prefix>.<pid>.<seq>.v<vseq>.heap" */
+			malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
+			    "%s.%d.%"FMTu64".%c%"FMTu64".heap", prefix, prof_getpid(),
+			    prof_dump_seq, v, vseq);
+		}
 	} else {
-	        /* "<prefix>.<pid>.<seq>.<v>.heap" */
-		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
-		    "%s.%d.%"FMTu64".%c.heap", prefix, prof_getpid(),
-		    prof_dump_seq, v);
+		if (opt_prof_pid_namespace) {
+			/* "<prefix>.<pid_namespace>.<pid>.<seq>.<v>.heap" */
+			malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
+			    "%s.%ld.%d.%"FMTu64".%c.heap", prefix,
+			    prof_get_pid_namespace(), prof_getpid(), prof_dump_seq, v);
+		} else {
+			/* "<prefix>.<pid>.<seq>.<v>.heap" */
+			malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
+			    "%s.%d.%"FMTu64".%c.heap", prefix, prof_getpid(),
+			    prof_dump_seq, v);
+		}
 	}
 	prof_dump_seq++;
 }
@@ -729,8 +779,14 @@ prof_dump_filename(tsd_t *tsd, char *filename, char v, uint64_t vseq) {
 void
 prof_get_default_filename(tsdn_t *tsdn, char *filename, uint64_t ind) {
 	malloc_mutex_lock(tsdn, &prof_dump_filename_mtx);
-	malloc_snprintf(filename, PROF_DUMP_FILENAME_LEN,
-	    "%s.%d.%"FMTu64".json", prof_prefix_get(tsdn), prof_getpid(), ind);
+	if (opt_prof_pid_namespace) {
+		malloc_snprintf(filename, PROF_DUMP_FILENAME_LEN,
+		    "%s.%ld.%d.%"FMTu64".json", prof_prefix_get(tsdn),
+		    prof_get_pid_namespace(), prof_getpid(), ind);
+	} else {
+		malloc_snprintf(filename, PROF_DUMP_FILENAME_LEN,
+		    "%s.%d.%"FMTu64".json", prof_prefix_get(tsdn), prof_getpid(), ind);
+	}
 	malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
 }
 
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 1ff8b564..9e5baff0 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -317,6 +317,7 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(unsigned, prof_bt_max, prof);
 	TEST_MALLCTL_OPT(ssize_t, lg_prof_sample, prof);
 	TEST_MALLCTL_OPT(bool, prof_accum, prof);
+	TEST_MALLCTL_OPT(bool, prof_pid_namespace, prof);
 	TEST_MALLCTL_OPT(ssize_t, lg_prof_interval, prof);
 	TEST_MALLCTL_OPT(bool, prof_gdump, prof);
 	TEST_MALLCTL_OPT(bool, prof_final, prof);

From 4b555c11a54d31ba941d996011c7063b2083a12e Mon Sep 17 00:00:00 2001
From: Shirui Cheng <sherrycheng@meta.com>
Date: Mon, 4 Dec 2023 12:07:54 -0800
Subject: [PATCH 170/395] Enable heap profiling on MacOS

---
 bin/jeprof.in           | 71 ++++++++++++++++++++++++++++++++++++++++-
 src/prof_sys.c          | 67 ++++++++++++++++++++++++++++++++++++++
 test/unit/prof_gdump.sh |  2 +-
 test/unit/prof_mdump.c  |  1 +
 4 files changed, 139 insertions(+), 2 deletions(-)

diff --git a/bin/jeprof.in b/bin/jeprof.in
index f6999ece..7aff8643 100644
--- a/bin/jeprof.in
+++ b/bin/jeprof.in
@@ -88,6 +88,7 @@ my %obj_tool_map = (
   #"nm_pdb" => "nm-pdb",       # for reading windows (PDB-format) executables
   #"addr2line_pdb" => "addr2line-pdb",                                # ditto
   #"otool" => "otool",         # equivalent of objdump on OS X
+  #"dyld_info" => "dyld_info",   # equivalent of otool on OS X for shared cache
 );
 # NOTE: these are lists, so you can put in commandline flags if you want.
 my @DOT = ("dot");          # leave non-absolute, since it may be in /usr/local
@@ -4661,7 +4662,65 @@ sub ParseTextSectionHeaderFromOtool {
   return $r;
 }
 
+# Parse text section header of a library in OS X shared cache using dyld_info
+sub ParseTextSectionHeaderFromDyldInfo {
+  my $lib = shift;
+
+  my $size = undef;
+  my $vma;
+  my $file_offset;
+  # Get dyld_info output from the library file to figure out how to
+  # map between mapped addresses and addresses in the library.
+  my $cmd = ShellEscape($obj_tool_map{"dyld_info"}, "-segments", $lib);
+  open(DYLD, "$cmd |") || error("$cmd: $!\n");
+
+  while (<DYLD>) {
+    s/\r//g;         # turn windows-looking lines into unix-looking lines
+    # -segments:
+    #    load-address    segment section        sect-size  seg-size perm
+    #     0x1803E0000    __TEXT                                   112KB r.x
+    #     0x1803E4F34             __text            80960
+    #     0x1803F8B74             __auth_stubs        768
+    #     0x1803F8E74             __init_offsets        4
+    #     0x1803F8E78             __gcc_except_tab   1180
+    my @x = split;
+    if ($#x >= 2) {
+      if ($x[0] eq 'load-offset') {
+        # dyld_info should only be used for the shared lib.
+        return undef;
+      } elsif ($x[1] eq '__TEXT') {
+        $file_offset = $x[0];
+      } elsif ($x[1] eq '__text') {
+        $size = $x[2];
+        $vma = $x[0];
+        $file_offset = AddressSub($x[0], $file_offset);
+        last;
+      }
+    }
+  }
+  close(DYLD);
+
+  if (!defined($vma) || !defined($size) || !defined($file_offset)) {
+     return undef;
+  }
+
+  my $r = {};
+  $r->{size} = $size;
+  $r->{vma} = $vma;
+  $r->{file_offset} = $file_offset;
+
+  return $r;
+}
+
 sub ParseTextSectionHeader {
+  # obj_tool_map("dyld_info") is only defined if we're in a Mach-O environment
+  if (defined($obj_tool_map{"dyld_info"})) {
+    my $r = ParseTextSectionHeaderFromDyldInfo(@_);
+    if (defined($r)){
+      return $r;
+    }
+  }
+  # if dyld_info doesn't work, or we don't have it, fall back to otool
   # obj_tool_map("otool") is only defined if we're in a Mach-O environment
   if (defined($obj_tool_map{"otool"})) {
     my $r = ParseTextSectionHeaderFromOtool(@_);
@@ -4702,7 +4761,7 @@ sub ParseLibraries {
       $offset = HexExtend($3);
       $lib = $4;
       $lib =~ s|\\|/|g;     # turn windows-style paths into unix-style paths
-    } elsif ($l =~ /^\s*($h)-($h):\s*(\S+\.so(\.\d+)*)/) {
+    } elsif ($l =~ /^\s*($h)-($h):\s*(\S+\.(so|dll|dylib|bundle)(\.\d+)*)/) {
       # Cooked line from DumpAddressMap.  Example:
       #   40000000-40015000: /lib/ld-2.3.2.so
       $start = HexExtend($1);
@@ -4719,6 +4778,15 @@ sub ParseLibraries {
       $offset = HexExtend($3);
       $lib = $4;
       $lib =~ s|\\|/|g;     # turn windows-style paths into unix-style paths
+    } elsif (($l =~ /^\s*($h)-($h):\s*(\S+)/) && ($3 eq $prog)) {
+      # PIEs and address space randomization do not play well with our
+      # default assumption that main executable is at lowest
+      # addresses. So we're detecting main executable from
+      # DumpAddressMap as well.
+      $start = HexExtend($1);
+      $finish = HexExtend($2);
+      $offset = $zero_offset;
+      $lib = $3;
     }
     # FreeBSD 10.0 virtual memory map /proc/curproc/map as defined in
     # function procfs_doprocmap (sys/fs/procfs/procfs_map.c)
@@ -5249,6 +5317,7 @@ sub ConfigureObjTools {
   if ($file_type =~ /Mach-O/) {
     # OS X uses otool to examine Mach-O files, rather than objdump.
     $obj_tool_map{"otool"} = "otool";
+    $obj_tool_map{"dyld_info"} = "dyld_info";
     $obj_tool_map{"addr2line"} = "false";  # no addr2line
     $obj_tool_map{"objdump"} = "false";  # no objdump
   }
diff --git a/src/prof_sys.c b/src/prof_sys.c
index 1e22332c..8a904040 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -605,6 +605,72 @@ prof_dump_close(prof_dump_arg_t *arg) {
 	}
 }
 
+#ifdef __APPLE__
+#include <mach-o/dyld.h>
+
+#ifdef __LP64__
+typedef struct mach_header_64 mach_header_t;
+typedef struct segment_command_64 segment_command_t;
+#define MH_MAGIC_VALUE MH_MAGIC_64
+#define MH_CIGAM_VALUE MH_CIGAM_64
+#define LC_SEGMENT_VALUE LC_SEGMENT_64
+#else
+typedef struct mach_header mach_header_t;
+typedef struct segment_command segment_command_t;
+#define MH_MAGIC_VALUE MH_MAGIC
+#define MH_CIGAM_VALUE MH_CIGAM
+#define LC_SEGMENT_VALUE LC_SEGMENT
+#endif
+
+static void
+prof_dump_dyld_image_vmaddr(buf_writer_t *buf_writer, uint32_t image_index) {
+	const mach_header_t *header = (const mach_header_t *)
+	    _dyld_get_image_header(image_index);
+	if (header == NULL || (header->magic != MH_MAGIC_VALUE &&
+	    header->magic != MH_CIGAM_VALUE)) {
+		// Invalid header
+		return;
+	}
+
+	intptr_t slide = _dyld_get_image_vmaddr_slide(image_index);
+	const char *name = _dyld_get_image_name(image_index);
+	struct load_command *load_cmd = (struct load_command *)
+	    ((char *)header + sizeof(mach_header_t));
+	for (uint32_t i = 0; load_cmd && (i < header->ncmds); i++) {
+		if (load_cmd->cmd == LC_SEGMENT_VALUE) {
+			const segment_command_t *segment_cmd =
+			    (const segment_command_t *)load_cmd;
+			if (!strcmp(segment_cmd->segname, "__TEXT")) {
+				char buffer[PATH_MAX + 1];
+				malloc_snprintf(buffer, sizeof(buffer),
+				    "%016llx-%016llx: %s\n", segment_cmd->vmaddr + slide,
+				    segment_cmd->vmaddr + slide + segment_cmd->vmsize, name);
+				buf_writer_cb(buf_writer, buffer);
+				return;
+			}
+		}
+		load_cmd =
+		    (struct load_command *)((char *)load_cmd + load_cmd->cmdsize);
+	}
+}
+
+static void
+prof_dump_dyld_maps(buf_writer_t *buf_writer) {
+	uint32_t image_count = _dyld_image_count();
+	for (uint32_t i = 0; i < image_count; i++) {
+		prof_dump_dyld_image_vmaddr(buf_writer, i);
+	}
+}
+
+prof_dump_open_maps_t *JET_MUTABLE prof_dump_open_maps = NULL;
+
+static void
+prof_dump_maps(buf_writer_t *buf_writer) {
+	buf_writer_cb(buf_writer, "\nMAPPED_LIBRARIES:\n");
+	/* No proc map file to read on MacOS, dump dyld maps for backtrace. */
+	prof_dump_dyld_maps(buf_writer);
+}
+#else /* !__APPLE__ */
 #ifndef _WIN32
 JEMALLOC_FORMAT_PRINTF(1, 2)
 static int
@@ -670,6 +736,7 @@ prof_dump_maps(buf_writer_t *buf_writer) {
 	buf_writer_pipe(buf_writer, prof_dump_read_maps_cb, &mfd);
 	close(mfd);
 }
+#endif /* __APPLE__ */
 
 static bool
 prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
diff --git a/test/unit/prof_gdump.sh b/test/unit/prof_gdump.sh
index 3f600d20..a0b91dff 100644
--- a/test/unit/prof_gdump.sh
+++ b/test/unit/prof_gdump.sh
@@ -1,6 +1,6 @@
 #!/bin/sh
 
 if [ "x${enable_prof}" = "x1" ] ; then
-  export MALLOC_CONF="prof:true,prof_active:false,prof_gdump:true"
+  export MALLOC_CONF="prof:true,prof_active:false,prof_gdump:true,lg_prof_sample:0"
 fi
 
diff --git a/test/unit/prof_mdump.c b/test/unit/prof_mdump.c
index bcbb961a..0559339e 100644
--- a/test/unit/prof_mdump.c
+++ b/test/unit/prof_mdump.c
@@ -166,6 +166,7 @@ expect_maps_write_failure(int count) {
 TEST_BEGIN(test_mdump_maps_error) {
 	test_skip_if(!config_prof);
 	test_skip_if(!config_debug);
+	test_skip_if(prof_dump_open_maps == NULL);
 
 	prof_dump_open_file_t *open_file_orig = prof_dump_open_file;
 	prof_dump_write_file_t *write_file_orig = prof_dump_write_file;

From 630434bb0ac619f7beec927569782d924c459385 Mon Sep 17 00:00:00 2001
From: "debing.sun" <debing.sun@redis.com>
Date: Tue, 30 Jan 2024 01:38:08 +0800
Subject: [PATCH 171/395] Fixed type error with allocated that caused incorrect
 printing on 32bit

---
 src/stats.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/stats.c b/src/stats.c
index 428e8ffb..026a4f54 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -324,7 +324,7 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i,
 
 	COL_HDR(row, size, NULL, right, 20, size)
 	COL_HDR(row, ind, NULL, right, 4, unsigned)
-	COL_HDR(row, allocated, NULL, right, 13, uint64)
+	COL_HDR(row, allocated, NULL, right, 13, size)
 	COL_HDR(row, nmalloc, NULL, right, 13, uint64)
 	COL_HDR(row, nmalloc_ps, "(#/sec)", right, 8, uint64)
 	COL_HDR(row, ndalloc, NULL, right, 13, uint64)

From fa451de17fff73cc03c31ec8cd817d62927d1ff9 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 12 Apr 2024 11:08:39 -0700
Subject: [PATCH 172/395] Fix the tcache flush sanity checking around ncached
 and nstashed.

When there were many items stashed, it's possible that after flushing stashed,
ncached is already lower than the remain, in which case the flush can simply
return at that point.
---
 src/tcache.c | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/src/tcache.c b/src/tcache.c
index ca0b1acb..f9235541 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -524,11 +524,25 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 JEMALLOC_ALWAYS_INLINE void
 tcache_bin_flush_bottom(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
     szind_t binind, unsigned rem, bool small) {
+	assert(rem <= cache_bin_ncached_max_get(cache_bin));
 	assert(!tcache_bin_disabled(binind, cache_bin, tcache->tcache_slow));
+	cache_bin_sz_t orig_nstashed = cache_bin_nstashed_get_local(cache_bin);
 	tcache_bin_flush_stashed(tsd, tcache, cache_bin, binind, small);
 
 	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin);
-	assert((cache_bin_sz_t)rem <= ncached);
+	assert((cache_bin_sz_t)rem <= ncached + orig_nstashed);
+	if ((cache_bin_sz_t)rem > ncached) {
+		/*
+		 * The flush_stashed above could have done enough flushing, if
+		 * there were many items stashed.  Validate that: 1) non zero
+		 * stashed, and 2) bin stack has available space now.
+		 */
+		assert(orig_nstashed > 0);
+		assert(ncached + cache_bin_nstashed_get_local(cache_bin)
+		    < cache_bin_ncached_max_get(cache_bin));
+		/* Still go through the flush logic for stats purpose only. */
+		rem = ncached;
+	}
 	cache_bin_sz_t nflush = ncached - (cache_bin_sz_t)rem;
 
 	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nflush);
@@ -537,8 +551,7 @@ tcache_bin_flush_bottom(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 	tcache_bin_flush_impl(tsd, tcache, cache_bin, binind, &ptrs, nflush,
 	    small);
 
-	cache_bin_finish_flush(cache_bin, &ptrs,
-	    ncached - (cache_bin_sz_t)rem);
+	cache_bin_finish_flush(cache_bin, &ptrs, nflush);
 }
 
 void

From 47d69b4eabae199fa8b5d948f0043effccfbc31e Mon Sep 17 00:00:00 2001
From: Dmitry Ilvokhin <d@ilvokhin.com>
Date: Thu, 4 Apr 2024 10:02:08 -0700
Subject: [PATCH 173/395] HPA: Fix infinite purging loop

One of the condition to start purging is `hpa_hugify_blocked_by_ndirty`
function call returns true. This can happen in cases where we have no
dirty memory for this shard at all. In this case purging loop will be an
infinite loop.

`hpa_hugify_blocked_by_ndirty` was introduced at 0f6c420, but at that
time purging loop has different form and additional `break` was not
required. Purging loop form was re-written at 6630c5989, but additional
exit condition wasn't added there at the time.

Repo code was shared by Patrik Dokoupil at [1], I stripped it down to
minimum to reproduce issue in jemalloc unit tests.

[1]: https://github.com/jemalloc/jemalloc/pull/2533
---
 src/hpa.c       | 11 +++++++++--
 test/unit/hpa.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 57 insertions(+), 4 deletions(-)

diff --git a/src/hpa.c b/src/hpa.c
index 99d1f033..6b1ae2ce 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -537,9 +537,16 @@ hpa_shard_maybe_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard,
 		purged = false;
 		while (hpa_should_purge(tsdn, shard) && nops < max_ops) {
 			purged = hpa_try_purge(tsdn, shard);
-			if (purged) {
-				nops++;
+			if (!purged) {
+				/*
+				 * It is fine if we couldn't purge as sometimes
+				 * we try to purge just to unblock
+				 * hugification, but there is maybe no dirty
+				 * pages at all at the moment.
+				 */
+				break;
 			}
+			nops++;
 		}
 		hugified = hpa_try_hugify(tsdn, shard);
 		if (hugified) {
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index 9e3160b4..a8a26e13 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -24,7 +24,7 @@ struct test_data_s {
 static hpa_shard_opts_t test_hpa_shard_opts_default = {
 	/* slab_max_alloc */
 	ALLOC_MAX,
-	/* hugification threshold */
+	/* hugification_threshold */
 	HUGEPAGE,
 	/* dirty_mult */
 	FXP_INIT_PERCENT(25),
@@ -36,6 +36,21 @@ static hpa_shard_opts_t test_hpa_shard_opts_default = {
 	5 * 1000,
 };
 
+static hpa_shard_opts_t test_hpa_shard_opts_purge = {
+	/* slab_max_alloc */
+	HUGEPAGE,
+	/* hugification_threshold */
+	0.9 * HUGEPAGE,
+	/* dirty_mult */
+	FXP_INIT_PERCENT(11),
+	/* deferral_allowed */
+	true,
+	/* hugify_delay_ms */
+	0,
+	/* min_purge_interval_ms */
+	5 * 1000,
+};
+
 static hpa_shard_t *
 create_test_data(const hpa_hooks_t *hooks, hpa_shard_opts_t *opts) {
 	bool err;
@@ -452,6 +467,36 @@ TEST_BEGIN(test_defer_time) {
 }
 TEST_END
 
+TEST_BEGIN(test_purge_no_infinite_loop) {
+	test_skip_if(!hpa_supported());
+
+	hpa_shard_t *shard = create_test_data(&hpa_hooks_default,
+	    &test_hpa_shard_opts_purge);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+
+	/*
+	 * This is not arbitrary value, it is chosen to met hugification
+	 * criteria for huge page and at the same time do not allow hugify page
+	 * without triggering a purge.
+	 */
+	const size_t npages =
+	    test_hpa_shard_opts_purge.hugification_threshold / PAGE + 1;
+	const size_t size = npages * PAGE;
+
+	bool deferred_work_generated = false;
+	edata_t *edata = pai_alloc(tsdn, &shard->pai, size, PAGE,
+	     /* zero */ false, /* guarded */ false, /* frequent_reuse */ false,
+	     &deferred_work_generated);
+	expect_ptr_not_null(edata, "Unexpected alloc failure");
+
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	/* hpa_shard_do_deferred_work should not stuck in a purging loop */
+
+	destroy_test_data(shard);
+}
+TEST_END
+
 int
 main(void) {
 	/*
@@ -470,5 +515,6 @@ main(void) {
 	    test_alloc_max,
 	    test_stress,
 	    test_alloc_dalloc_batch,
-	    test_defer_time);
+	    test_defer_time,
+	    test_purge_no_infinite_loop);
 }

From 8d8379da443f46dc976252b968cb9ca8e63ec974 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 2 May 2024 12:53:42 -0700
Subject: [PATCH 174/395] Fix background_thread creation for the
 oversize_arena.

Bypassing background thread creation for the oversize_arena used to be an
optimization since that arena had eager purging.  However #2466 changed the
purging policy for the oversize_arena -- specifically it switched to the default
decay time when background_thread is enabled.

This issue is noticable when the number of arenas is low: whenever the total #
of arenas is <= 4 (which is the default max # of background threads), in which
case the purging will be stalled since no background thread is created for the
oversize_arena.
---
 include/jemalloc/internal/arena_externs.h |  1 -
 src/arena.c                               |  8 --------
 src/ctl.c                                 | 11 -----------
 src/jemalloc.c                            |  7 ++-----
 4 files changed, 2 insertions(+), 25 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index f91bd888..3d0329fc 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -103,7 +103,6 @@ void arena_nthreads_inc(arena_t *arena, bool internal);
 void arena_nthreads_dec(arena_t *arena, bool internal);
 arena_t *arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config);
 bool arena_init_huge(arena_t *a0);
-bool arena_is_huge(unsigned arena_ind);
 arena_t *arena_choose_huge(tsd_t *tsd);
 bin_t *arena_bin_choose(tsdn_t *tsdn, arena_t *arena, szind_t binind,
     unsigned *binshard);
diff --git a/src/arena.c b/src/arena.c
index 8c87d67f..1e5289e4 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1867,14 +1867,6 @@ arena_init_huge(arena_t *a0) {
 	return huge_enabled;
 }
 
-bool
-arena_is_huge(unsigned arena_ind) {
-	if (huge_arena_ind == 0) {
-		return false;
-	}
-	return (arena_ind == huge_arena_ind);
-}
-
 bool
 arena_boot(sc_data_t *sc_data, base_t *base, bool hpa) {
 	arena_dirty_decay_ms_default_set(opt_dirty_decay_ms);
diff --git a/src/ctl.c b/src/ctl.c
index 37b69576..3b90aa15 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -2985,17 +2985,6 @@ arena_i_decay_ms_ctl_impl(tsd_t *tsd, const size_t *mib, size_t miblen,
 			ret = EINVAL;
 			goto label_return;
 		}
-		if (arena_is_huge(arena_ind) && *(ssize_t *)newp > 0) {
-			/*
-			 * By default the huge arena purges eagerly.  If it is
-			 * set to non-zero decay time afterwards, background
-			 * thread might be needed.
-			 */
-			if (background_thread_create(tsd, arena_ind)) {
-				ret = EFAULT;
-				goto label_return;
-			}
-		}
 
 		if (arena_decay_ms_set(tsd_tsdn(tsd), arena, state,
 		    *(ssize_t *)newp)) {
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 390912ba..77407714 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -430,11 +430,8 @@ arena_new_create_background_thread(tsdn_t *tsdn, unsigned ind) {
 	if (ind == 0) {
 		return;
 	}
-	/*
-	 * Avoid creating a new background thread just for the huge arena, which
-	 * purges eagerly by default.
-	 */
-	if (have_background_thread && !arena_is_huge(ind)) {
+
+	if (have_background_thread) {
 		if (background_thread_create(tsdn_tsd(tsdn), ind)) {
 			malloc_printf("<jemalloc>: error in background thread "
 				      "creation for arena %u. Abort.\n", ind);

From 5afff2e44e8d31ef1e9eb01d6b1327fe111835ed Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Amaury=20S=C3=A9chet?= <deadalnix@gmail.com>
Date: Fri, 3 May 2024 00:31:49 +0000
Subject: [PATCH 175/395] Simplify the logic in tcache_gc_small.

---
 src/tcache.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/tcache.c b/src/tcache.c
index f9235541..2114ff95 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -154,11 +154,10 @@ tcache_gc_small(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 		    sizeof(nflush_uint8));
 		tcache_slow->bin_flush_delay_items[szind] -= nflush_uint8;
 		return;
-	} else {
-		tcache_slow->bin_flush_delay_items[szind]
-		    = tcache_gc_item_delay_compute(szind);
 	}
 
+	tcache_slow->bin_flush_delay_items[szind]
+	    = tcache_gc_item_delay_compute(szind);
 	tcache_bin_flush_small(tsd, tcache, cache_bin, szind,
 	    (unsigned)(ncached - nflush));
 

From 86f4851f5d1242f4a17f78afeb4b974be5b2b1aa Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@meta.com>
Date: Tue, 21 May 2024 15:52:57 -0700
Subject: [PATCH 176/395] Add clang static analyzer suppression macro.

---
 include/jemalloc/internal/jemalloc_internal_macros.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/include/jemalloc/internal/jemalloc_internal_macros.h b/include/jemalloc/internal/jemalloc_internal_macros.h
index 40df5feb..407e868a 100644
--- a/include/jemalloc/internal/jemalloc_internal_macros.h
+++ b/include/jemalloc/internal/jemalloc_internal_macros.h
@@ -116,6 +116,18 @@
 #  define JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
 #endif
 
+#ifdef __clang_analyzer__
+#  define JEMALLOC_CLANG_ANALYZER
+#endif
+
+#ifdef JEMALLOC_CLANG_ANALYZER
+#  define JEMALLOC_CLANG_ANALYZER_SUPPRESS __attribute__((suppress))
+#  define JEMALLOC_CLANG_ANALYZER_SILENCE_INIT(v) = v
+#else
+#  define JEMALLOC_CLANG_ANALYZER_SUPPRESS
+#  define JEMALLOC_CLANG_ANALYZER_SILENCE_INIT(v)
+#endif
+
 #define JEMALLOC_SUPPRESS_WARN_ON_USAGE(...) \
    JEMALLOC_DIAGNOSTIC_PUSH \
    JEMALLOC_DIAGNOSTIC_IGNORE_DEPRECATED \

From 70c94d7474c3c4f4b61303f042727d2dab66ad07 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@meta.com>
Date: Mon, 22 Jan 2024 15:34:58 -0800
Subject: [PATCH 177/395] Add batcher module.

This can be used to batch up simple operation commands for later use by another
thread.
---
 Makefile.in                                   |   2 +
 include/jemalloc/internal/batcher.h           |  44 ++++
 include/jemalloc/internal/witness.h           |   3 +-
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |   3 +-
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |   5 +-
 .../projects/vc2017/jemalloc/jemalloc.vcxproj |   3 +-
 .../vc2017/jemalloc/jemalloc.vcxproj.filters  |   5 +-
 .../projects/vc2019/jemalloc/jemalloc.vcxproj |   3 +-
 .../vc2019/jemalloc/jemalloc.vcxproj.filters  |   5 +-
 .../projects/vc2022/jemalloc/jemalloc.vcxproj |   3 +-
 .../vc2022/jemalloc/jemalloc.vcxproj.filters  |   5 +-
 src/batcher.c                                 |  86 ++++++
 test/unit/batcher.c                           | 246 ++++++++++++++++++
 13 files changed, 404 insertions(+), 9 deletions(-)
 create mode 100644 include/jemalloc/internal/batcher.h
 create mode 100644 src/batcher.c
 create mode 100644 test/unit/batcher.c

diff --git a/Makefile.in b/Makefile.in
index df244adb..ef6e1764 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -98,6 +98,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/arena.c \
 	$(srcroot)src/background_thread.c \
 	$(srcroot)src/base.c \
+	$(srcroot)src/batcher.c \
 	$(srcroot)src/bin.c \
 	$(srcroot)src/bin_info.c \
 	$(srcroot)src/bitmap.c \
@@ -204,6 +205,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/background_thread_enable.c \
 	$(srcroot)test/unit/base.c \
 	$(srcroot)test/unit/batch_alloc.c \
+	$(srcroot)test/unit/batcher.c \
 	$(srcroot)test/unit/binshard.c \
 	$(srcroot)test/unit/bitmap.c \
 	$(srcroot)test/unit/bit_util.c \
diff --git a/include/jemalloc/internal/batcher.h b/include/jemalloc/internal/batcher.h
new file mode 100644
index 00000000..a435f0b7
--- /dev/null
+++ b/include/jemalloc/internal/batcher.h
@@ -0,0 +1,44 @@
+#ifndef JEMALLOC_INTERNAL_BATCHER_H
+#define JEMALLOC_INTERNAL_BATCHER_H
+
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/mutex.h"
+
+#define BATCHER_NO_IDX ((size_t)-1)
+
+typedef struct batcher_s batcher_t;
+struct batcher_s {
+	/*
+	 * Optimize for locality -- nelems_max and nelems are always touched
+	 * togehter, along with the front of the mutex. The end of the mutex is
+	 * only touched if there's contention.
+	 */
+	atomic_zu_t nelems;
+	size_t nelems_max;
+	malloc_mutex_t mtx;
+};
+
+void batcher_init(batcher_t *batcher, size_t nelems_max);
+
+/*
+ * Returns an index (into some user-owned array) to use for pushing, or
+ * BATCHER_NO_IDX if no index is free.  If the former, the caller must call
+ * batcher_push_end once done.
+ */
+size_t batcher_push_begin(tsdn_t *tsdn, batcher_t *batcher,
+    size_t elems_to_push);
+void batcher_push_end(tsdn_t *tsdn, batcher_t *batcher);
+
+/*
+ * Returns the number of items to pop, or BATCHER_NO_IDX if there are none.
+ * If the former, must be followed by a call to batcher_pop_end.
+ */
+size_t batcher_pop_begin(tsdn_t *tsdn, batcher_t *batcher);
+void batcher_pop_end(tsdn_t *tsdn, batcher_t *batcher);
+
+void batcher_prefork(tsdn_t *tsdn, batcher_t *batcher);
+void batcher_postfork_parent(tsdn_t *tsdn, batcher_t *batcher);
+void batcher_postfork_child(tsdn_t *tsdn, batcher_t *batcher);
+
+#endif /* JEMALLOC_INTERNAL_BATCHER_H */
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index 937ca2d5..afee1246 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -64,9 +64,10 @@ enum witness_rank_e {
 	WITNESS_RANK_BASE,
 	WITNESS_RANK_ARENA_LARGE,
 	WITNESS_RANK_HOOK,
+	WITNESS_RANK_BIN,
 
 	WITNESS_RANK_LEAF=0x1000,
-	WITNESS_RANK_BIN = WITNESS_RANK_LEAF,
+	WITNESS_RANK_BATCHER=WITNESS_RANK_LEAF,
 	WITNESS_RANK_ARENA_STATS = WITNESS_RANK_LEAF,
 	WITNESS_RANK_COUNTER_ACCUM = WITNESS_RANK_LEAF,
 	WITNESS_RANK_DSS = WITNESS_RANK_LEAF,
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 03c241ca..58bd7b3e 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -38,6 +38,7 @@
     <ClCompile Include="..\..\..\..\src\arena.c" />
     <ClCompile Include="..\..\..\..\src\background_thread.c" />
     <ClCompile Include="..\..\..\..\src\base.c" />
+    <ClCompile Include="..\..\..\..\src\batcher.c" />
     <ClCompile Include="..\..\..\..\src\bin.c" />
     <ClCompile Include="..\..\..\..\src\bin_info.c" />
     <ClCompile Include="..\..\..\..\src\bitmap.c" />
@@ -378,4 +379,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 514368aa..82ad3e35 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -16,6 +16,9 @@
     <ClCompile Include="..\..\..\..\src\base.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\batcher.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\bin.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -197,4 +200,4 @@
       <Filter>Source Files</Filter>
     </ClCompile>
   </ItemGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 5d23d8e2..6e59c035 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -38,6 +38,7 @@
     <ClCompile Include="..\..\..\..\src\arena.c" />
     <ClCompile Include="..\..\..\..\src\background_thread.c" />
     <ClCompile Include="..\..\..\..\src\base.c" />
+    <ClCompile Include="..\..\..\..\src\batcher.c" />
     <ClCompile Include="..\..\..\..\src\bin.c" />
     <ClCompile Include="..\..\..\..\src\bin_info.c" />
     <ClCompile Include="..\..\..\..\src\bitmap.c" />
@@ -377,4 +378,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 514368aa..82ad3e35 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -16,6 +16,9 @@
     <ClCompile Include="..\..\..\..\src\base.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\batcher.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\bin.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -197,4 +200,4 @@
       <Filter>Source Files</Filter>
     </ClCompile>
   </ItemGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
index 8eaab36b..db06fc6d 100644
--- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
@@ -38,6 +38,7 @@
     <ClCompile Include="..\..\..\..\src\arena.c" />
     <ClCompile Include="..\..\..\..\src\background_thread.c" />
     <ClCompile Include="..\..\..\..\src\base.c" />
+    <ClCompile Include="..\..\..\..\src\batcher.c" />
     <ClCompile Include="..\..\..\..\src\bin.c" />
     <ClCompile Include="..\..\..\..\src\bin_info.c" />
     <ClCompile Include="..\..\..\..\src\bitmap.c" />
@@ -377,4 +378,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
index 514368aa..82ad3e35 100644
--- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
@@ -16,6 +16,9 @@
     <ClCompile Include="..\..\..\..\src\base.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\batcher.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\bin.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -197,4 +200,4 @@
       <Filter>Source Files</Filter>
     </ClCompile>
   </ItemGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
index cd871379..01de0dcb 100644
--- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
@@ -38,6 +38,7 @@
     <ClCompile Include="..\..\..\..\src\arena.c" />
     <ClCompile Include="..\..\..\..\src\background_thread.c" />
     <ClCompile Include="..\..\..\..\src\base.c" />
+    <ClCompile Include="..\..\..\..\src\batcher.c" />
     <ClCompile Include="..\..\..\..\src\bin.c" />
     <ClCompile Include="..\..\..\..\src\bin_info.c" />
     <ClCompile Include="..\..\..\..\src\bitmap.c" />
@@ -377,4 +378,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
index 514368aa..82ad3e35 100644
--- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
@@ -16,6 +16,9 @@
     <ClCompile Include="..\..\..\..\src\base.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\batcher.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\bin.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -197,4 +200,4 @@
       <Filter>Source Files</Filter>
     </ClCompile>
   </ItemGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/src/batcher.c b/src/batcher.c
new file mode 100644
index 00000000..19af7d83
--- /dev/null
+++ b/src/batcher.c
@@ -0,0 +1,86 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+
+#include "jemalloc/internal/batcher.h"
+
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/atomic.h"
+
+void
+batcher_init(batcher_t *batcher, size_t nelems_max) {
+	atomic_store_zu(&batcher->nelems, 0, ATOMIC_RELAXED);
+	batcher->nelems_max = nelems_max;
+	malloc_mutex_init(&batcher->mtx, "batcher", WITNESS_RANK_BATCHER,
+	    malloc_mutex_rank_exclusive);
+}
+
+/*
+ * Returns an index (into some user-owned array) to use for pushing, or
+ * BATCHER_NO_IDX if no index is free.
+ */
+size_t batcher_push_begin(tsdn_t *tsdn, batcher_t *batcher,
+    size_t elems_to_push) {
+	assert(elems_to_push > 0);
+	size_t nelems_guess = atomic_load_zu(&batcher->nelems, ATOMIC_RELAXED);
+	if (nelems_guess + elems_to_push > batcher->nelems_max) {
+		return BATCHER_NO_IDX;
+	}
+	malloc_mutex_lock(tsdn, &batcher->mtx);
+	size_t nelems = atomic_load_zu(&batcher->nelems, ATOMIC_RELAXED);
+	if (nelems + elems_to_push > batcher->nelems_max) {
+		malloc_mutex_unlock(tsdn, &batcher->mtx);
+		return BATCHER_NO_IDX;
+	}
+	assert(elems_to_push <= batcher->nelems_max - nelems);
+	/*
+	 * We update nelems at push time (instead of during pop) so that other
+	 * racing accesses of the batcher can fail fast instead of trying to
+	 * acquire a mutex only to discover that there's no space for them.
+	 */
+	atomic_store_zu(&batcher->nelems, nelems + elems_to_push, ATOMIC_RELAXED);
+	return nelems;
+}
+
+void
+batcher_push_end(tsdn_t *tsdn, batcher_t *batcher) {
+	malloc_mutex_assert_owner(tsdn, &batcher->mtx);
+	assert(atomic_load_zu(&batcher->nelems, ATOMIC_RELAXED) > 0);
+	malloc_mutex_unlock(tsdn, &batcher->mtx);
+}
+
+size_t
+batcher_pop_begin(tsdn_t *tsdn, batcher_t *batcher) {
+	size_t nelems_guess = atomic_load_zu(&batcher->nelems, ATOMIC_RELAXED);
+	assert(nelems_guess <= batcher->nelems_max);
+	if (nelems_guess == 0) {
+		return BATCHER_NO_IDX;
+	}
+	malloc_mutex_lock(tsdn, &batcher->mtx);
+	size_t nelems = atomic_load_zu(&batcher->nelems, ATOMIC_RELAXED);
+	assert(nelems <= batcher->nelems_max);
+	if (nelems == 0) {
+		malloc_mutex_unlock(tsdn, &batcher->mtx);
+		return BATCHER_NO_IDX;
+	}
+	atomic_store_zu(&batcher->nelems, 0, ATOMIC_RELAXED);
+	return nelems;
+}
+
+void batcher_pop_end(tsdn_t *tsdn, batcher_t *batcher) {
+	assert(atomic_load_zu(&batcher->nelems, ATOMIC_RELAXED) == 0);
+	malloc_mutex_unlock(tsdn, &batcher->mtx);
+}
+
+void
+batcher_prefork(tsdn_t *tsdn, batcher_t *batcher) {
+	malloc_mutex_prefork(tsdn, &batcher->mtx);
+}
+
+void
+batcher_postfork_parent(tsdn_t *tsdn, batcher_t *batcher) {
+	malloc_mutex_postfork_parent(tsdn, &batcher->mtx);
+}
+
+void
+batcher_postfork_child(tsdn_t *tsdn, batcher_t *batcher) {
+	malloc_mutex_postfork_child(tsdn, &batcher->mtx);
+}
diff --git a/test/unit/batcher.c b/test/unit/batcher.c
new file mode 100644
index 00000000..df9d3e5b
--- /dev/null
+++ b/test/unit/batcher.c
@@ -0,0 +1,246 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/batcher.h"
+
+TEST_BEGIN(test_simple) {
+	enum { NELEMS_MAX = 10, DATA_BASE_VAL = 100, NRUNS = 5 };
+	batcher_t batcher;
+	size_t data[NELEMS_MAX];
+	for (size_t nelems = 0; nelems < NELEMS_MAX; nelems++) {
+		batcher_init(&batcher, nelems);
+		for (int run = 0; run < NRUNS; run++) {
+			for (int i = 0; i < NELEMS_MAX; i++) {
+				data[i] = (size_t)-1;
+			}
+			for (size_t i = 0; i < nelems; i++) {
+				size_t idx = batcher_push_begin(TSDN_NULL,
+				    &batcher, 1);
+				assert_zu_eq(i, idx, "Wrong index");
+				assert_zu_eq((size_t)-1, data[idx],
+				    "Expected uninitialized slot");
+				data[idx] = DATA_BASE_VAL + i;
+				batcher_push_end(TSDN_NULL, &batcher);
+			}
+			if (nelems > 0) {
+				size_t idx = batcher_push_begin(TSDN_NULL,
+				    &batcher, 1);
+				assert_zu_eq(BATCHER_NO_IDX, idx,
+				    "Shouldn't be able to push into a full "
+				    "batcher");
+			}
+
+			size_t npop = batcher_pop_begin(TSDN_NULL, &batcher);
+			if (nelems == 0) {
+				assert_zu_eq(npop, BATCHER_NO_IDX,
+				    "Shouldn't get any items out of an empty "
+				    "batcher");
+			} else {
+				assert_zu_eq(npop, nelems,
+				    "Wrong number of elements popped");
+			}
+			for (size_t i = 0; i < nelems; i++) {
+				assert_zu_eq(data[i], DATA_BASE_VAL + i,
+				    "Item popped out of order!");
+			}
+			if (nelems != 0) {
+				batcher_pop_end(TSDN_NULL, &batcher);
+			}
+		}
+	}
+}
+TEST_END
+
+TEST_BEGIN(test_multi_push) {
+	size_t idx, nelems;
+	batcher_t batcher;
+	batcher_init(&batcher, 11);
+	/* Push two at a time, 5 times, for 10 total. */
+	for (int i = 0; i < 5; i++) {
+		idx = batcher_push_begin(TSDN_NULL, &batcher, 2);
+		assert_zu_eq(2 * i, idx, "Should push in order");
+		batcher_push_end(TSDN_NULL, &batcher);
+	}
+	/* Pushing two more should fail -- would put us at 12 elems. */
+	idx = batcher_push_begin(TSDN_NULL, &batcher, 2);
+	assert_zu_eq(BATCHER_NO_IDX, idx, "Should be out of space");
+	/* But one more should work */
+	idx = batcher_push_begin(TSDN_NULL, &batcher, 1);
+	assert_zu_eq(10, idx, "Should be out of space");
+	batcher_push_end(TSDN_NULL, &batcher);
+	nelems = batcher_pop_begin(TSDN_NULL, &batcher);
+	batcher_pop_end(TSDN_NULL, &batcher);
+	assert_zu_eq(11, nelems, "Should have popped everything");
+}
+TEST_END
+
+enum {
+	STRESS_TEST_ELEMS = 10,
+	STRESS_TEST_THREADS = 4,
+	STRESS_TEST_OPS = 1000 * 1000,
+	STRESS_TEST_PUSH_TO_POP_RATIO = 5,
+};
+
+typedef struct stress_test_data_s stress_test_data_t;
+struct stress_test_data_s {
+	batcher_t batcher;
+	mtx_t pop_mtx;
+	atomic_u32_t thread_id;
+
+	uint32_t elems_data[STRESS_TEST_ELEMS];
+	size_t push_count[STRESS_TEST_ELEMS];
+	size_t pop_count[STRESS_TEST_ELEMS];
+	atomic_zu_t atomic_push_count[STRESS_TEST_ELEMS];
+	atomic_zu_t atomic_pop_count[STRESS_TEST_ELEMS];
+};
+
+/*
+ * Note: 0-indexed. If one element is set and you want to find it, you call
+ * get_nth_set(elems, 0).
+ */
+static size_t
+get_nth_set(bool elems_owned[STRESS_TEST_ELEMS], size_t n) {
+	size_t ntrue = 0;
+	for (size_t i = 0; i < STRESS_TEST_ELEMS; i++) {
+		if (elems_owned[i]) {
+			ntrue++;
+		}
+		if (ntrue > n) {
+			return i;
+		}
+	}
+	assert_not_reached("Asked for the %zu'th set element when < %zu are "
+	    "set",
+	    n, n);
+	/* Just to silence a compiler warning. */
+	return 0;
+}
+
+static void *
+stress_test_thd(void *arg) {
+	stress_test_data_t *data = arg;
+	size_t prng = atomic_fetch_add_u32(&data->thread_id, 1,
+	    ATOMIC_RELAXED);
+
+	size_t nelems_owned = 0;
+	bool elems_owned[STRESS_TEST_ELEMS] = {0};
+	size_t local_push_count[STRESS_TEST_ELEMS] = {0};
+	size_t local_pop_count[STRESS_TEST_ELEMS] = {0};
+
+	for (int i = 0; i < STRESS_TEST_OPS; i++) {
+		size_t rnd = prng_range_zu(&prng,
+		    STRESS_TEST_PUSH_TO_POP_RATIO);
+		if (rnd == 0 || nelems_owned == 0) {
+			size_t nelems = batcher_pop_begin(TSDN_NULL,
+			    &data->batcher);
+			if (nelems == BATCHER_NO_IDX) {
+				continue;
+			}
+			for (size_t i = 0; i < nelems; i++) {
+				uint32_t elem = data->elems_data[i];
+				assert_false(elems_owned[elem],
+				    "Shouldn't already own what we just "
+				    "popped");
+				elems_owned[elem] = true;
+				nelems_owned++;
+				local_pop_count[elem]++;
+				data->pop_count[elem]++;
+			}
+			batcher_pop_end(TSDN_NULL, &data->batcher);
+		} else {
+			size_t elem_to_push_idx = prng_range_zu(&prng,
+			    nelems_owned);
+			size_t elem = get_nth_set(elems_owned,
+			    elem_to_push_idx);
+			assert_true(
+			    elems_owned[elem],
+			    "Should own element we're about to pop");
+			elems_owned[elem] = false;
+			local_push_count[elem]++;
+			data->push_count[elem]++;
+			nelems_owned--;
+			size_t idx = batcher_push_begin(TSDN_NULL,
+			    &data->batcher, 1);
+			assert_zu_ne(idx, BATCHER_NO_IDX,
+			    "Batcher can't be full -- we have one of its "
+			    "elems!");
+			data->elems_data[idx] = (uint32_t)elem;
+			batcher_push_end(TSDN_NULL, &data->batcher);
+		}
+	}
+
+	/* Push all local elems back, flush local counts to the shared ones. */
+	size_t push_idx = 0;
+	if (nelems_owned != 0) {
+		push_idx = batcher_push_begin(TSDN_NULL, &data->batcher,
+		    nelems_owned);
+		assert_zu_ne(BATCHER_NO_IDX, push_idx,
+		    "Should be space to push");
+	}
+	for (size_t i = 0; i < STRESS_TEST_ELEMS; i++) {
+		if (elems_owned[i]) {
+			data->elems_data[push_idx] = (uint32_t)i;
+			push_idx++;
+			local_push_count[i]++;
+			data->push_count[i]++;
+		}
+		atomic_fetch_add_zu(
+		    &data->atomic_push_count[i], local_push_count[i],
+		    ATOMIC_RELAXED);
+		atomic_fetch_add_zu(
+		    &data->atomic_pop_count[i], local_pop_count[i],
+		    ATOMIC_RELAXED);
+	}
+	if (nelems_owned != 0) {
+		batcher_push_end(TSDN_NULL, &data->batcher);
+	}
+
+	return NULL;
+}
+
+TEST_BEGIN(test_stress) {
+	stress_test_data_t data;
+	batcher_init(&data.batcher, STRESS_TEST_ELEMS);
+	bool err = mtx_init(&data.pop_mtx);
+	assert_false(err, "mtx_init failure");
+	atomic_store_u32(&data.thread_id, 0, ATOMIC_RELAXED);
+	for (int i = 0; i < STRESS_TEST_ELEMS; i++) {
+		data.push_count[i] = 0;
+		data.pop_count[i] = 0;
+		atomic_store_zu(&data.atomic_push_count[i], 0, ATOMIC_RELAXED);
+		atomic_store_zu(&data.atomic_pop_count[i], 0, ATOMIC_RELAXED);
+
+		size_t idx = batcher_push_begin(TSDN_NULL, &data.batcher, 1);
+		assert_zu_eq(i, idx, "Should push in order");
+		data.elems_data[idx] = i;
+		batcher_push_end(TSDN_NULL, &data.batcher);
+	}
+
+	thd_t threads[STRESS_TEST_THREADS];
+	for (int i = 0; i < STRESS_TEST_THREADS; i++) {
+		thd_create(&threads[i], stress_test_thd, &data);
+	}
+	for (int i = 0; i < STRESS_TEST_THREADS; i++) {
+		thd_join(threads[i], NULL);
+	}
+	for (int i = 0; i < STRESS_TEST_ELEMS; i++) {
+		assert_zu_ne(0, data.push_count[i],
+		    "Should have done something!");
+		assert_zu_eq(data.push_count[i], data.pop_count[i],
+		    "every element should be pushed and popped an equal number "
+		    "of times");
+		assert_zu_eq(data.push_count[i],
+		    atomic_load_zu(&data.atomic_push_count[i], ATOMIC_RELAXED),
+		    "atomic and non-atomic count should be equal given proper "
+		    "synchronization");
+		assert_zu_eq(data.pop_count[i],
+		    atomic_load_zu(&data.atomic_pop_count[i], ATOMIC_RELAXED),
+		    "atomic and non-atomic count should be equal given proper "
+		    "synchronization");
+	}
+}
+TEST_END
+
+int
+main(void) {
+	return test_no_reentrancy(test_simple, test_multi_push, test_stress);
+}

From c085530c711fb233203963cd93dfa9339b0b9980 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@meta.com>
Date: Fri, 2 Feb 2024 13:20:14 -0800
Subject: [PATCH 178/395] Tcache batching: Plumbing

In the next commit, we'll start using the batcher to eliminate mutex traffic.
To avoid cluttering up that commit with the random bits of busy-work it entails,
we'll centralize them here.  This commit introduces:
- A batched bin type.
- The ability to mix batched and unbatched bins in the arena.
- Conf parsing to set batches per size and a max batched size.
- mallctl access to the corresponding opt-namespace keys.
- Stats output of the above.
---
 include/jemalloc/internal/arena_inlines_b.h | 17 ++++++-
 include/jemalloc/internal/arena_structs.h   |  2 +-
 include/jemalloc/internal/bin.h             | 22 +++++++--
 include/jemalloc/internal/bin_info.h        |  9 ++++
 src/arena.c                                 | 54 ++++++++++-----------
 src/bin.c                                   | 18 +++++--
 src/bin_info.c                              | 13 +++++
 src/ctl.c                                   |  7 +++
 src/jemalloc.c                              | 10 ++++
 src/stats.c                                 |  2 +
 test/analyze/sizes.c                        |  2 +
 11 files changed, 121 insertions(+), 35 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index a891b35c..18a72e7c 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -604,10 +604,25 @@ arena_dalloc_bin_locked_finish(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
 	}
 }
 
+static inline bool
+arena_bin_has_batch(szind_t binind) {
+	return binind < bin_info_nbatched_sizes;
+}
+
 static inline bin_t *
 arena_get_bin(arena_t *arena, szind_t binind, unsigned binshard) {
 	bin_t *shard0 = (bin_t *)((byte_t *)arena + arena_bin_offsets[binind]);
-	return shard0 + binshard;
+	bin_t *ret;
+	if (arena_bin_has_batch(binind)) {
+		ret = (bin_t *)((bin_with_batch_t *)shard0 + binshard);
+	} else {
+		ret = shard0 + binshard;
+	}
+	assert(binind >= SC_NBINS - 1
+	    || (uintptr_t)ret < (uintptr_t)arena
+	    + arena_bin_offsets[binind + 1]);
+
+	return ret;
 }
 
 #endif /* JEMALLOC_INTERNAL_ARENA_INLINES_B_H */
diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index 803ed25c..56e12f95 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -104,7 +104,7 @@ struct arena_s {
 	JEMALLOC_WARN_ON_USAGE("Do not use this field directly. "
 	                       "Use `arena_get_bin` instead.")
 	JEMALLOC_ALIGNED(CACHELINE)
-	bin_t			all_bins[0];
+	bin_with_batch_t			all_bins[0];
 };
 
 #endif /* JEMALLOC_INTERNAL_ARENA_STRUCTS_H */
diff --git a/include/jemalloc/internal/bin.h b/include/jemalloc/internal/bin.h
index ed27c18f..36fce04f 100644
--- a/include/jemalloc/internal/bin.h
+++ b/include/jemalloc/internal/bin.h
@@ -2,12 +2,15 @@
 #define JEMALLOC_INTERNAL_BIN_H
 
 #include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/batcher.h"
 #include "jemalloc/internal/bin_stats.h"
 #include "jemalloc/internal/bin_types.h"
 #include "jemalloc/internal/edata.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/sc.h"
 
+#define BIN_REMOTE_FREE_ELEMS_MAX 16
+
 /*
  * A bin contains a set of extents that are currently being used for slab
  * allocations.
@@ -42,6 +45,19 @@ struct bin_s {
 	edata_list_active_t	slabs_full;
 };
 
+typedef struct bin_remote_free_data_s bin_remote_free_data_t;
+struct bin_remote_free_data_s {
+	void *ptr;
+	edata_t *slab;
+};
+
+typedef struct bin_with_batch_s bin_with_batch_t;
+struct bin_with_batch_s {
+	bin_t bin;
+	batcher_t remote_frees;
+	bin_remote_free_data_t remote_free_data[BIN_REMOTE_FREE_ELEMS_MAX];
+};
+
 /* A set of sharded bins of the same size class. */
 typedef struct bins_s bins_t;
 struct bins_s {
@@ -57,9 +73,9 @@ bool bin_update_shard_size(unsigned bin_shards[SC_NBINS], size_t start_size,
 bool bin_init(bin_t *bin);
 
 /* Forking. */
-void bin_prefork(tsdn_t *tsdn, bin_t *bin);
-void bin_postfork_parent(tsdn_t *tsdn, bin_t *bin);
-void bin_postfork_child(tsdn_t *tsdn, bin_t *bin);
+void bin_prefork(tsdn_t *tsdn, bin_t *bin, bool has_batch);
+void bin_postfork_parent(tsdn_t *tsdn, bin_t *bin, bool has_batch);
+void bin_postfork_child(tsdn_t *tsdn, bin_t *bin, bool has_batch);
 
 /* Stats. */
 static inline void
diff --git a/include/jemalloc/internal/bin_info.h b/include/jemalloc/internal/bin_info.h
index b6175550..f743b7d8 100644
--- a/include/jemalloc/internal/bin_info.h
+++ b/include/jemalloc/internal/bin_info.h
@@ -44,6 +44,15 @@ struct bin_info_s {
 	bitmap_info_t		bitmap_info;
 };
 
+/* The maximum size a size class can be and still get batching behavior. */
+extern size_t opt_bin_info_max_batched_size;
+/* The number of batches per batched size class. */
+extern size_t opt_bin_info_remote_free_max_batch;
+
+extern szind_t bin_info_nbatched_sizes;
+extern unsigned bin_info_nbatched_bins;
+extern unsigned bin_info_nunbatched_bins;
+
 extern bin_info_t bin_infos[SC_NBINS];
 
 void bin_info_boot(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]);
diff --git a/src/arena.c b/src/arena.c
index 1e5289e4..71ef26f5 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -45,7 +45,6 @@ size_t opt_oversize_threshold = OVERSIZE_THRESHOLD_DEFAULT;
 size_t oversize_threshold = OVERSIZE_THRESHOLD_DEFAULT;
 
 uint32_t arena_bin_offsets[SC_NBINS];
-static unsigned nbins_total;
 
 static unsigned huge_arena_ind;
 
@@ -1672,7 +1671,6 @@ arena_t *
 arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) {
 	arena_t *arena;
 	base_t *base;
-	unsigned i;
 
 	if (ind == 0) {
 		base = b0get();
@@ -1685,15 +1683,12 @@ arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) {
 	}
 
 	size_t arena_size = ALIGNMENT_CEILING(sizeof(arena_t), CACHELINE) +
-	    sizeof(bin_t) * nbins_total;
+	    sizeof(bin_with_batch_t) * bin_info_nbatched_bins
+	    + sizeof(bin_t) * bin_info_nunbatched_bins;
 	arena = (arena_t *)base_alloc(tsdn, base, arena_size, CACHELINE);
 	if (arena == NULL) {
 		goto label_error;
 	}
-	JEMALLOC_SUPPRESS_WARN_ON_USAGE(
-	assert((uintptr_t)&arena->all_bins[nbins_total -1] + sizeof(bin_t) <=
-	    (uintptr_t)arena + arena_size);
-	)
 
 	atomic_store_u(&arena->nthreads[0], 0, ATOMIC_RELAXED);
 	atomic_store_u(&arena->nthreads[1], 0, ATOMIC_RELAXED);
@@ -1733,12 +1728,13 @@ arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) {
 
 	/* Initialize bins. */
 	atomic_store_u(&arena->binshard_next, 0, ATOMIC_RELEASE);
-	for (i = 0; i < nbins_total; i++) {
-		JEMALLOC_SUPPRESS_WARN_ON_USAGE(
-		bool err = bin_init(&arena->all_bins[i]);
-		)
-		if (err) {
-			goto label_error;
+	for (unsigned i = 0; i < SC_NBINS; i++) {
+		for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
+			bin_t *bin = arena_get_bin(arena, i, j);
+			bool err = bin_init(bin);
+			if (err) {
+				goto label_error;
+			}
 		}
 	}
 
@@ -1882,8 +1878,9 @@ arena_boot(sc_data_t *sc_data, base_t *base, bool hpa) {
 	)
 	for (szind_t i = 0; i < SC_NBINS; i++) {
 		arena_bin_offsets[i] = cur_offset;
-		nbins_total += bin_infos[i].n_shards;
-		cur_offset += (uint32_t)(bin_infos[i].n_shards * sizeof(bin_t));
+		uint32_t bin_sz = (i < bin_info_nbatched_sizes
+		    ? sizeof(bin_with_batch_t) : sizeof(bin_t));
+		cur_offset += (uint32_t)bin_infos[i].n_shards * bin_sz;
 	}
 	return pa_central_init(&arena_pa_central_global, base, hpa,
 	    &hpa_hooks_default);
@@ -1933,19 +1930,21 @@ arena_prefork7(tsdn_t *tsdn, arena_t *arena) {
 
 void
 arena_prefork8(tsdn_t *tsdn, arena_t *arena) {
-	for (unsigned i = 0; i < nbins_total; i++) {
-		JEMALLOC_SUPPRESS_WARN_ON_USAGE(
-		bin_prefork(tsdn, &arena->all_bins[i]);
-		)
+	for (szind_t i = 0; i < SC_NBINS; i++) {
+		for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
+			bin_t *bin = arena_get_bin(arena, i, j);
+			bin_prefork(tsdn, bin, arena_bin_has_batch(i));
+		}
 	}
 }
 
 void
 arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) {
-	for (unsigned i = 0; i < nbins_total; i++) {
-		JEMALLOC_SUPPRESS_WARN_ON_USAGE(
-		bin_postfork_parent(tsdn, &arena->all_bins[i]);
-		)
+	for (szind_t i = 0; i < SC_NBINS; i++) {
+		for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
+			bin_t *bin = arena_get_bin(arena, i, j);
+			bin_postfork_parent(tsdn, bin, arena_bin_has_batch(i));
+		}
 	}
 
 	malloc_mutex_postfork_parent(tsdn, &arena->large_mtx);
@@ -1982,10 +1981,11 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
 		}
 	}
 
-	for (unsigned i = 0; i < nbins_total; i++) {
-		JEMALLOC_SUPPRESS_WARN_ON_USAGE(
-		bin_postfork_child(tsdn, &arena->all_bins[i]);
-		)
+	for (szind_t i = 0; i < SC_NBINS; i++) {
+		for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
+			bin_t *bin = arena_get_bin(arena, i, j);
+			bin_postfork_child(tsdn, bin, arena_bin_has_batch(i));
+		}
 	}
 
 	malloc_mutex_postfork_child(tsdn, &arena->large_mtx);
diff --git a/src/bin.c b/src/bin.c
index fa204587..b9b4be2c 100644
--- a/src/bin.c
+++ b/src/bin.c
@@ -54,16 +54,28 @@ bin_init(bin_t *bin) {
 }
 
 void
-bin_prefork(tsdn_t *tsdn, bin_t *bin) {
+bin_prefork(tsdn_t *tsdn, bin_t *bin, bool has_batch) {
 	malloc_mutex_prefork(tsdn, &bin->lock);
+	if (has_batch) {
+		bin_with_batch_t *batched = (bin_with_batch_t *)bin;
+		batcher_prefork(tsdn, &batched->remote_frees);
+	}
 }
 
 void
-bin_postfork_parent(tsdn_t *tsdn, bin_t *bin) {
+bin_postfork_parent(tsdn_t *tsdn, bin_t *bin, bool has_batch) {
 	malloc_mutex_postfork_parent(tsdn, &bin->lock);
+	if (has_batch) {
+		bin_with_batch_t *batched = (bin_with_batch_t *)bin;
+		batcher_postfork_parent(tsdn, &batched->remote_frees);
+	}
 }
 
 void
-bin_postfork_child(tsdn_t *tsdn, bin_t *bin) {
+bin_postfork_child(tsdn_t *tsdn, bin_t *bin, bool has_batch) {
 	malloc_mutex_postfork_child(tsdn, &bin->lock);
+	if (has_batch) {
+		bin_with_batch_t *batched = (bin_with_batch_t *)bin;
+		batcher_postfork_child(tsdn, &batched->remote_frees);
+	}
 }
diff --git a/src/bin_info.c b/src/bin_info.c
index 8629ef88..27f0be17 100644
--- a/src/bin_info.c
+++ b/src/bin_info.c
@@ -3,8 +3,15 @@
 
 #include "jemalloc/internal/bin_info.h"
 
+size_t opt_bin_info_max_batched_size;
+size_t opt_bin_info_remote_free_max_batch;
+
 bin_info_t bin_infos[SC_NBINS];
 
+szind_t bin_info_nbatched_sizes;
+unsigned bin_info_nbatched_bins;
+unsigned bin_info_nunbatched_bins;
+
 static void
 bin_infos_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
     bin_info_t infos[SC_NBINS]) {
@@ -20,6 +27,12 @@ bin_infos_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 		bitmap_info_t bitmap_info = BITMAP_INFO_INITIALIZER(
 		    bin_info->nregs);
 		bin_info->bitmap_info = bitmap_info;
+		if (bin_info->reg_size <= opt_bin_info_max_batched_size) {
+			bin_info_nbatched_sizes++;
+			bin_info_nbatched_bins += bin_info->n_shards;
+		} else {
+			bin_info_nunbatched_bins += bin_info->n_shards;
+		}
 	}
 }
 
diff --git a/src/ctl.c b/src/ctl.c
index 3b90aa15..1b76b792 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -129,6 +129,8 @@ CTL_PROTO(opt_zero)
 CTL_PROTO(opt_utrace)
 CTL_PROTO(opt_xmalloc)
 CTL_PROTO(opt_experimental_infallible_new)
+CTL_PROTO(opt_max_batched_size)
+CTL_PROTO(opt_remote_free_max_batch)
 CTL_PROTO(opt_tcache)
 CTL_PROTO(opt_tcache_max)
 CTL_PROTO(opt_tcache_nslots_small_min)
@@ -480,6 +482,8 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("xmalloc"),	CTL(opt_xmalloc)},
 	{NAME("experimental_infallible_new"),
 		CTL(opt_experimental_infallible_new)},
+	{NAME("max_batched_size"),	CTL(opt_max_batched_size)},
+	{NAME("remote_free_max_batch"),	CTL(opt_remote_free_max_batch)},
 	{NAME("tcache"),	CTL(opt_tcache)},
 	{NAME("tcache_max"),	CTL(opt_tcache_max)},
 	{NAME("tcache_nslots_small_min"),
@@ -2203,6 +2207,9 @@ CTL_RO_NL_CGEN(config_utrace, opt_utrace, opt_utrace, bool)
 CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool)
 CTL_RO_NL_CGEN(config_enable_cxx, opt_experimental_infallible_new,
     opt_experimental_infallible_new, bool)
+CTL_RO_NL_GEN(opt_max_batched_size, opt_bin_info_max_batched_size, size_t)
+CTL_RO_NL_GEN(opt_remote_free_max_batch, opt_bin_info_remote_free_max_batch,
+    size_t)
 CTL_RO_NL_GEN(opt_tcache, opt_tcache, bool)
 CTL_RO_NL_GEN(opt_tcache_max, opt_tcache_max, size_t)
 CTL_RO_NL_GEN(opt_tcache_nslots_small_min, opt_tcache_nslots_small_min,
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 77407714..8f40e0cc 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1325,6 +1325,16 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				} while (vlen_left > 0);
 				CONF_CONTINUE;
 			}
+			CONF_HANDLE_SIZE_T(opt_bin_info_max_batched_size,
+			    "max_batched_size", 0, SIZE_T_MAX,
+			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX,
+			    /* clip */ true)
+			CONF_HANDLE_SIZE_T(opt_bin_info_remote_free_max_batch,
+			    "remote_free_max_batch", 0,
+			    BIN_REMOTE_FREE_ELEMS_MAX,
+			    CONF_DONT_CHECK_MIN, CONF_CHECK_MAX,
+			    /* clip */ true)
+
 			if (CONF_MATCH("tcache_ncached_max")) {
 				bool err = tcache_bin_info_default_init(
 				    v, vlen);
diff --git a/src/stats.c b/src/stats.c
index 026a4f54..359a252c 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1555,6 +1555,8 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_BOOL("utrace")
 	OPT_WRITE_BOOL("xmalloc")
 	OPT_WRITE_BOOL("experimental_infallible_new")
+	OPT_WRITE_SIZE_T("max_batched_size")
+	OPT_WRITE_SIZE_T("remote_free_max_batch")
 	OPT_WRITE_BOOL("tcache")
 	OPT_WRITE_SIZE_T("tcache_max")
 	OPT_WRITE_UNSIGNED("tcache_nslots_small_min")
diff --git a/test/analyze/sizes.c b/test/analyze/sizes.c
index a48c4f48..cfb5ce51 100644
--- a/test/analyze/sizes.c
+++ b/test/analyze/sizes.c
@@ -35,6 +35,8 @@ main(void) {
 	P(arena_t);
 	P(arena_stats_t);
 	P(base_t);
+	P(bin_t);
+	P(bin_with_batch_t);
 	P(decay_t);
 	P(edata_t);
 	P(ecache_t);

From 6e568488500b12441094e084f89b1a1da784f39b Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@meta.com>
Date: Wed, 7 Feb 2024 10:21:53 -0800
Subject: [PATCH 179/395] Tcache: Split up small/large handling.

The main bits of shared code are the edata filtering and the stats flushing
logic, both of which are fairly simple to read and not so painful to duplicate.
The shared code comes at the cost of guarding all the subtle logic with
`if (small)`, which doesn't feel worth it.
---
 src/tcache.c | 256 ++++++++++++++++++++++++++++++---------------------
 1 file changed, 152 insertions(+), 104 deletions(-)

diff --git a/src/tcache.c b/src/tcache.c
index 2114ff95..8bec5d6c 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -312,20 +312,9 @@ tcache_bin_flush_edatas_lookup(tsd_t *tsd, cache_bin_ptr_array_t *arr,
 	}
 }
 
-JEMALLOC_ALWAYS_INLINE bool
-tcache_bin_flush_match(edata_t *edata, unsigned cur_arena_ind,
-    unsigned cur_binshard, bool small) {
-	if (small) {
-		return edata_arena_ind_get(edata) == cur_arena_ind
-		    && edata_binshard_get(edata) == cur_binshard;
-	} else {
-		return edata_arena_ind_get(edata) == cur_arena_ind;
-	}
-}
-
 JEMALLOC_ALWAYS_INLINE void
-tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
-    szind_t binind, cache_bin_ptr_array_t *ptrs, unsigned nflush, bool small) {
+tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
+    szind_t binind, cache_bin_ptr_array_t *ptrs, unsigned nflush) {
 	tcache_slow_t *tcache_slow = tcache->tcache_slow;
 	/*
 	 * A couple lookup calls take tsdn; declare it once for convenience
@@ -333,11 +322,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 	 */
 	tsdn_t *tsdn = tsd_tsdn(tsd);
 
-	if (small) {
-		assert(binind < SC_NBINS);
-	} else {
-		assert(binind < tcache_nbins_get(tcache_slow));
-	}
+	assert(binind < SC_NBINS);
 	arena_t *tcache_arena = tcache_slow->arena;
 	assert(tcache_arena != NULL);
 
@@ -368,34 +353,19 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 		unsigned cur_arena_ind = edata_arena_ind_get(edata);
 		arena_t *cur_arena = arena_get(tsdn, cur_arena_ind, false);
 
+		unsigned cur_binshard = edata_binshard_get(edata);
+		bin_t *cur_bin = arena_get_bin(cur_arena, binind,
+		    cur_binshard);
+		assert(cur_binshard < bin_infos[binind].n_shards);
 		/*
-		 * These assignments are always overwritten when small is true,
-		 * and their values are always ignored when small is false, but
-		 * to avoid the technical UB when we pass them as parameters, we
-		 * need to intialize them.
+		 * If you're looking at profiles, you might think this
+		 * is a good place to prefetch the bin stats, which are
+		 * often a cache miss.  This turns out not to be
+		 * helpful on the workloads we've looked at, with moving
+		 * the bin stats next to the lock seeming to do better.
 		 */
-		unsigned cur_binshard = 0;
-		bin_t *cur_bin = NULL;
-		if (small) {
-			cur_binshard = edata_binshard_get(edata);
-			cur_bin = arena_get_bin(cur_arena, binind,
-			    cur_binshard);
-			assert(cur_binshard < bin_infos[binind].n_shards);
-			/*
-			 * If you're looking at profiles, you might think this
-			 * is a good place to prefetch the bin stats, which are
-			 * often a cache miss.  This turns out not to be
-			 * helpful on the workloads we've looked at, with moving
-			 * the bin stats next to the lock seeming to do better.
-			 */
-		}
 
-		if (small) {
-			malloc_mutex_lock(tsdn, &cur_bin->lock);
-		}
-		if (!small && !arena_is_auto(cur_arena)) {
-			malloc_mutex_lock(tsdn, &cur_arena->large_mtx);
-		}
+		malloc_mutex_lock(tsdn, &cur_bin->lock);
 
 		/*
 		 * If we acquired the right lock and have some stats to flush,
@@ -404,53 +374,23 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 		if (config_stats && tcache_arena == cur_arena
 		    && !merged_stats) {
 			merged_stats = true;
-			if (small) {
-				cur_bin->stats.nflushes++;
-				cur_bin->stats.nrequests +=
-				    cache_bin->tstats.nrequests;
-				cache_bin->tstats.nrequests = 0;
-			} else {
-				arena_stats_large_flush_nrequests_add(tsdn,
-				    &tcache_arena->stats, binind,
-				    cache_bin->tstats.nrequests);
-				cache_bin->tstats.nrequests = 0;
-			}
-		}
-
-		/*
-		 * Large allocations need special prep done.  Afterwards, we can
-		 * drop the large lock.
-		 */
-		if (!small) {
-			for (unsigned i = 0; i < nflush; i++) {
-				void *ptr = ptrs->ptr[i];
-				edata = item_edata[i].edata;
-				assert(ptr != NULL && edata != NULL);
-
-				if (tcache_bin_flush_match(edata, cur_arena_ind,
-				    cur_binshard, small)) {
-					large_dalloc_prep_locked(tsdn,
-					    edata);
-				}
-			}
-		}
-		if (!small && !arena_is_auto(cur_arena)) {
-			malloc_mutex_unlock(tsdn, &cur_arena->large_mtx);
+			cur_bin->stats.nflushes++;
+			cur_bin->stats.nrequests +=
+			    cache_bin->tstats.nrequests;
+			cache_bin->tstats.nrequests = 0;
 		}
 
 		/* Deallocate whatever we can. */
 		unsigned ndeferred = 0;
 		/* Init only to avoid used-uninitialized warning. */
 		arena_dalloc_bin_locked_info_t dalloc_bin_info = {0};
-		if (small) {
-			arena_dalloc_bin_locked_begin(&dalloc_bin_info, binind);
-		}
+		arena_dalloc_bin_locked_begin(&dalloc_bin_info, binind);
 		for (unsigned i = 0; i < nflush; i++) {
 			void *ptr = ptrs->ptr[i];
 			edata = item_edata[i].edata;
 			assert(ptr != NULL && edata != NULL);
-			if (!tcache_bin_flush_match(edata, cur_arena_ind,
-			    cur_binshard, small)) {
+			if (edata_arena_ind_get(edata) != cur_arena_ind
+			    || edata_binshard_get(edata) != cur_binshard) {
 				/*
 				 * The object was allocated either via a
 				 * different arena, or a different bin in this
@@ -462,34 +402,23 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 				ndeferred++;
 				continue;
 			}
-			if (small) {
-				if (arena_dalloc_bin_locked_step(tsdn,
-				    cur_arena, cur_bin, &dalloc_bin_info,
-				    binind, edata, ptr)) {
-					dalloc_slabs[dalloc_count] = edata;
-					dalloc_count++;
-				}
-			} else {
-				if (large_dalloc_safety_checks(edata, ptr,
-				    binind)) {
-					/* See the comment in isfree. */
-					continue;
-				}
-				large_dalloc_finish(tsdn, edata);
+			if (arena_dalloc_bin_locked_step(tsdn,
+			    cur_arena, cur_bin, &dalloc_bin_info,
+			    binind, edata, ptr)) {
+				dalloc_slabs[dalloc_count] = edata;
+				dalloc_count++;
 			}
 		}
 
-		if (small) {
-			arena_dalloc_bin_locked_finish(tsdn, cur_arena, cur_bin,
-			    &dalloc_bin_info);
-			malloc_mutex_unlock(tsdn, &cur_bin->lock);
-		}
+		arena_dalloc_bin_locked_finish(tsdn, cur_arena, cur_bin,
+		    &dalloc_bin_info);
+		malloc_mutex_unlock(tsdn, &cur_bin->lock);
+
 		arena_decay_ticks(tsdn, cur_arena, nflush - ndeferred);
 		nflush = ndeferred;
 	}
 
 	/* Handle all deferred slab dalloc. */
-	assert(small || dalloc_count == 0);
 	for (unsigned i = 0; i < dalloc_count; i++) {
 		edata_t *slab = dalloc_slabs[i];
 		arena_slab_dalloc(tsdn, arena_get_from_edata(slab), slab);
@@ -497,7 +426,6 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 	}
 
 	if (config_stats && !merged_stats) {
-		if (small) {
 			/*
 			 * The flush loop didn't happen to flush to this
 			 * thread's arena, so the stats didn't get merged.
@@ -510,14 +438,132 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 			bin->stats.nrequests += cache_bin->tstats.nrequests;
 			cache_bin->tstats.nrequests = 0;
 			malloc_mutex_unlock(tsdn, &bin->lock);
-		} else {
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE void
+tcache_bin_flush_impl_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
+    szind_t binind, cache_bin_ptr_array_t *ptrs, unsigned nflush) {
+	tcache_slow_t *tcache_slow = tcache->tcache_slow;
+	/*
+	 * A couple lookup calls take tsdn; declare it once for convenience
+	 * instead of calling tsd_tsdn(tsd) all the time.
+	 */
+	tsdn_t *tsdn = tsd_tsdn(tsd);
+
+	assert(binind < tcache_nbins_get(tcache_slow));
+	arena_t *tcache_arena = tcache_slow->arena;
+	assert(tcache_arena != NULL);
+
+	/*
+	 * Variable length array must have > 0 length; the last element is never
+	 * touched (it's just included to satisfy the no-zero-length rule).
+	 */
+	VARIABLE_ARRAY(emap_batch_lookup_result_t, item_edata, nflush + 1);
+	tcache_bin_flush_edatas_lookup(tsd, ptrs, binind, nflush, item_edata);
+
+	/*
+	 * We're about to grab a bunch of locks.  If one of them happens to be
+	 * the one guarding the arena-level stats counters we flush our
+	 * thread-local ones to, we do so under one critical section.
+	 */
+	bool merged_stats = false;
+	while (nflush > 0) {
+		/* Lock the arena, or bin, associated with the first object. */
+		edata_t *edata = item_edata[0].edata;
+		unsigned cur_arena_ind = edata_arena_ind_get(edata);
+		arena_t *cur_arena = arena_get(tsdn, cur_arena_ind, false);
+
+		if (!arena_is_auto(cur_arena)) {
+			malloc_mutex_lock(tsdn, &cur_arena->large_mtx);
+		}
+
+		/*
+		 * If we acquired the right lock and have some stats to flush,
+		 * flush them.
+		 */
+		if (config_stats && tcache_arena == cur_arena
+		    && !merged_stats) {
+			merged_stats = true;
 			arena_stats_large_flush_nrequests_add(tsdn,
 			    &tcache_arena->stats, binind,
 			    cache_bin->tstats.nrequests);
 			cache_bin->tstats.nrequests = 0;
 		}
+
+		/*
+		 * Large allocations need special prep done.  Afterwards, we can
+		 * drop the large lock.
+		 */
+		for (unsigned i = 0; i < nflush; i++) {
+			void *ptr = ptrs->ptr[i];
+			edata = item_edata[i].edata;
+			assert(ptr != NULL && edata != NULL);
+
+			if (edata_arena_ind_get(edata) == cur_arena_ind) {
+				large_dalloc_prep_locked(tsdn,
+				    edata);
+			}
+		}
+		if (!arena_is_auto(cur_arena)) {
+			malloc_mutex_unlock(tsdn, &cur_arena->large_mtx);
+		}
+
+		/* Deallocate whatever we can. */
+		unsigned ndeferred = 0;
+		for (unsigned i = 0; i < nflush; i++) {
+			void *ptr = ptrs->ptr[i];
+			edata = item_edata[i].edata;
+			assert(ptr != NULL && edata != NULL);
+			if (edata_arena_ind_get(edata) != cur_arena_ind) {
+				/*
+				 * The object was allocated either via a
+				 * different arena, or a different bin in this
+				 * arena.  Either way, stash the object so that
+				 * it can be handled in a future pass.
+				 */
+				ptrs->ptr[ndeferred] = ptr;
+				item_edata[ndeferred].edata = edata;
+				ndeferred++;
+				continue;
+			}
+			if (large_dalloc_safety_checks(edata, ptr, binind)) {
+				/* See the comment in isfree. */
+				continue;
+			}
+			large_dalloc_finish(tsdn, edata);
+		}
+		arena_decay_ticks(tsdn, cur_arena, nflush - ndeferred);
+		nflush = ndeferred;
 	}
 
+	if (config_stats && !merged_stats) {
+		arena_stats_large_flush_nrequests_add(tsdn,
+		    &tcache_arena->stats, binind,
+		    cache_bin->tstats.nrequests);
+		cache_bin->tstats.nrequests = 0;
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE void
+tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
+    szind_t binind, cache_bin_ptr_array_t *ptrs, unsigned nflush, bool small) {
+	/*
+	 * The small/large flush logic is very similar; you might conclude that
+	 * it's a good opportunity to share code.  We've tried this, and by and
+	 * large found this to obscure more than it helps; there are so many
+	 * fiddly bits around things like stats handling, precisely when and
+	 * which mutexes are acquired, etc., that almost all code ends up being
+	 * gated behind 'if (small) { ... } else { ... }'.  Even though the
+	 * '...' is morally equivalent, the code itself needs slight tweaks.
+	 */
+	if (small) {
+		tcache_bin_flush_impl_small(tsd, tcache, cache_bin, binind,
+		    ptrs, nflush);
+	} else {
+		tcache_bin_flush_impl_large(tsd, tcache, cache_bin, binind,
+		    ptrs, nflush);
+	}
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -556,13 +602,15 @@ tcache_bin_flush_bottom(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 void
 tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
     szind_t binind, unsigned rem) {
-	tcache_bin_flush_bottom(tsd, tcache, cache_bin, binind, rem, true);
+	tcache_bin_flush_bottom(tsd, tcache, cache_bin, binind, rem,
+	    /* small */ true);
 }
 
 void
 tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
     szind_t binind, unsigned rem) {
-	tcache_bin_flush_bottom(tsd, tcache, cache_bin, binind, rem, false);
+	tcache_bin_flush_bottom(tsd, tcache, cache_bin, binind, rem,
+	    /* small */ false);
 }
 
 /*

From 44d91cf2434796188486960a07771709c15b0c2b Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@meta.com>
Date: Wed, 7 Feb 2024 13:18:39 -0800
Subject: [PATCH 180/395] Tcache flush: Partition by bin before locking.

This accomplishes two things:
- It avoids a full array scan (and any attendant branch prediction misses, etc.)
  while holding the bin lock.
- It allows us to know the number of items that will be flushed before flushing
  them, which will (in an upcoming commit) let us know if it's safe to use the
  batched flush (in which case we won't acquire the bin mutex).
---
 src/tcache.c | 110 ++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 74 insertions(+), 36 deletions(-)

diff --git a/src/tcache.c b/src/tcache.c
index 8bec5d6c..4dd5ccd6 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -347,29 +347,85 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin
 	 * thread-local ones to, we do so under one critical section.
 	 */
 	bool merged_stats = false;
-	while (nflush > 0) {
-		/* Lock the arena, or bin, associated with the first object. */
-		edata_t *edata = item_edata[0].edata;
-		unsigned cur_arena_ind = edata_arena_ind_get(edata);
+
+	/*
+	 * We maintain the invariant that all edatas yet to be flushed are
+	 * contained in the half-open range [flush_start, flush_end).  We'll
+	 * repeatedly partition the array so that the unflushed items are at the
+	 * end.
+	 */
+	unsigned flush_start = 0;
+
+	while (flush_start < nflush) {
+		/*
+		 * After our partitioning step, all objects to flush will be in
+		 * the half-open range [prev_flush_start, flush_start), and
+		 * flush_start will be updated to correspond to the next loop
+		 * iteration.
+		 */
+		unsigned prev_flush_start = flush_start;
+
+		edata_t *cur_edata = item_edata[flush_start].edata;
+		unsigned cur_arena_ind = edata_arena_ind_get(cur_edata);
 		arena_t *cur_arena = arena_get(tsdn, cur_arena_ind, false);
 
-		unsigned cur_binshard = edata_binshard_get(edata);
+		unsigned cur_binshard = edata_binshard_get(cur_edata);
 		bin_t *cur_bin = arena_get_bin(cur_arena, binind,
 		    cur_binshard);
 		assert(cur_binshard < bin_infos[binind].n_shards);
+
 		/*
-		 * If you're looking at profiles, you might think this
-		 * is a good place to prefetch the bin stats, which are
-		 * often a cache miss.  This turns out not to be
-		 * helpful on the workloads we've looked at, with moving
-		 * the bin stats next to the lock seeming to do better.
+		 * Start off the partition; item_edata[i] always matches itself
+		 * of course.
 		 */
+		flush_start++;
+		for (unsigned i = flush_start; i < nflush; i++) {
+			void *ptr = ptrs->ptr[i];
+			edata_t *edata = item_edata[i].edata;
+			assert(ptr != NULL && edata != NULL);
+			assert((uintptr_t)ptr >= (uintptr_t)edata_addr_get(edata));
+			assert((uintptr_t)ptr < (uintptr_t)edata_past_get(edata));
+			if (edata_arena_ind_get(edata) == cur_arena_ind
+			    && edata_binshard_get(edata) == cur_binshard) {
+				/* Swap the edatas. */
+				emap_batch_lookup_result_t temp_edata
+				    = item_edata[flush_start];
+				item_edata[flush_start] = item_edata[i];
+				item_edata[i] = temp_edata;
+				/* Swap the pointers */
+				void *temp_ptr = ptrs->ptr[flush_start];
+				ptrs->ptr[flush_start] = ptrs->ptr[i];
+				ptrs->ptr[i] = temp_ptr;
+				flush_start++;
+			}
+		}
+		/* Make sure we implemented partitioning correctly. */
+		if (config_debug) {
+			for (unsigned i = prev_flush_start; i < flush_start;
+			    i++) {
+				edata_t *edata = item_edata[i].edata;
+				unsigned arena_ind = edata_arena_ind_get(edata);
+				assert(arena_ind == cur_arena_ind);
+				unsigned binshard = edata_binshard_get(edata);
+				assert(binshard == cur_binshard);
+			}
+			for (unsigned i = flush_start; i < nflush; i++) {
+				edata_t *edata = item_edata[i].edata;
+				assert(edata_arena_ind_get(edata)
+				    != cur_arena_ind
+				    || edata_binshard_get(edata)
+				    != cur_binshard);
+			}
+		}
 
+		/* Actually do the flushing. */
 		malloc_mutex_lock(tsdn, &cur_bin->lock);
-
 		/*
-		 * If we acquired the right lock and have some stats to flush,
-		 * flush them.
+		 * Flush stats first, if that was the right lock.  Note that we
+		 * don't actually have to flush stats into the current thread's
+		 * binshard. Flushing into any binshard in the same arena is
+		 * enough; we don't expose stats on per-binshard basis (just
+		 * per-bin).
 		 */
 		if (config_stats && tcache_arena == cur_arena
 		    && !merged_stats) {
@@ -379,29 +435,13 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin
 			    cache_bin->tstats.nrequests;
 			cache_bin->tstats.nrequests = 0;
 		}
-
-		/* Deallocate whatever we can. */
-		unsigned ndeferred = 0;
+		/* Next flush objects. */
 		/* Init only to avoid used-uninitialized warning. */
 		arena_dalloc_bin_locked_info_t dalloc_bin_info = {0};
 		arena_dalloc_bin_locked_begin(&dalloc_bin_info, binind);
-		for (unsigned i = 0; i < nflush; i++) {
+		for (unsigned i = prev_flush_start; i < flush_start; i++) {
 			void *ptr = ptrs->ptr[i];
-			edata = item_edata[i].edata;
-			assert(ptr != NULL && edata != NULL);
-			if (edata_arena_ind_get(edata) != cur_arena_ind
-			    || edata_binshard_get(edata) != cur_binshard) {
-				/*
-				 * The object was allocated either via a
-				 * different arena, or a different bin in this
-				 * arena.  Either way, stash the object so that
-				 * it can be handled in a future pass.
-				 */
-				ptrs->ptr[ndeferred] = ptr;
-				item_edata[ndeferred].edata = edata;
-				ndeferred++;
-				continue;
-			}
+			edata_t *edata = item_edata[i].edata;
 			if (arena_dalloc_bin_locked_step(tsdn,
 			    cur_arena, cur_bin, &dalloc_bin_info,
 			    binind, edata, ptr)) {
@@ -409,13 +449,11 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin
 				dalloc_count++;
 			}
 		}
-
 		arena_dalloc_bin_locked_finish(tsdn, cur_arena, cur_bin,
 		    &dalloc_bin_info);
 		malloc_mutex_unlock(tsdn, &cur_bin->lock);
-
-		arena_decay_ticks(tsdn, cur_arena, nflush - ndeferred);
-		nflush = ndeferred;
+		arena_decay_ticks(tsdn, cur_arena,
+		    flush_start - prev_flush_start);
 	}
 
 	/* Handle all deferred slab dalloc. */

From fc615739cbd15dcb4a60c611206d9b8817aab565 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@meta.com>
Date: Fri, 9 Feb 2024 16:08:45 -0800
Subject: [PATCH 181/395] Add batching to arena bins.

This adds a fast-path for threads freeing a small number of allocations to
bins which are not their "home-base" and which encounter lock contention in
attempting to do so. In producer-consumer workflows, such small lock hold times
can cause lock convoying that greatly increases overall bin mutex contention.
---
 Makefile.in                                 |   1 +
 include/jemalloc/internal/arena_inlines_b.h | 132 +++++++++-
 include/jemalloc/internal/bin.h             |  47 +++-
 include/jemalloc/internal/bin_info.h        |   2 +
 src/arena.c                                 |  38 ++-
 src/bin.c                                   |  36 ++-
 src/bin_info.c                              |  15 +-
 src/ctl.c                                   |   4 +
 src/jemalloc.c                              |   5 +
 src/stats.c                                 |   1 +
 src/tcache.c                                | 173 ++++++++++---
 test/include/test/fork.h                    |  32 +++
 test/include/test/jemalloc_test.h.in        |   5 +
 test/unit/bin_batching.c                    | 264 ++++++++++++++++++++
 test/unit/bin_batching.sh                   |  10 +
 test/unit/fork.c                            |  35 +--
 16 files changed, 722 insertions(+), 78 deletions(-)
 create mode 100644 test/include/test/fork.h
 create mode 100644 test/unit/bin_batching.c
 create mode 100644 test/unit/bin_batching.sh

diff --git a/Makefile.in b/Makefile.in
index ef6e1764..94208f37 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -206,6 +206,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/base.c \
 	$(srcroot)test/unit/batch_alloc.c \
 	$(srcroot)test/unit/batcher.c \
+	$(srcroot)test/unit/bin_batching.c \
 	$(srcroot)test/unit/binshard.c \
 	$(srcroot)test/unit/bitmap.c \
 	$(srcroot)test/unit/bit_util.c \
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 18a72e7c..7f5f6bb0 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -563,10 +563,11 @@ arena_dalloc_bin_locked_begin(arena_dalloc_bin_locked_info_t *info,
  * stats updates, which happen during finish (this lets running counts get left
  * in a register).
  */
-JEMALLOC_ALWAYS_INLINE bool
+JEMALLOC_ALWAYS_INLINE void
 arena_dalloc_bin_locked_step(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
     arena_dalloc_bin_locked_info_t *info, szind_t binind, edata_t *slab,
-    void *ptr) {
+    void *ptr, edata_t **dalloc_slabs, unsigned ndalloc_slabs,
+    unsigned *dalloc_slabs_count, edata_list_active_t *dalloc_slabs_extra) {
 	const bin_info_t *bin_info = &bin_infos[binind];
 	size_t regind = arena_slab_regind(info, binind, slab, ptr);
 	slab_data_t *slab_data = edata_slab_data_get(slab);
@@ -586,12 +587,17 @@ arena_dalloc_bin_locked_step(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
 	if (nfree == bin_info->nregs) {
 		arena_dalloc_bin_locked_handle_newly_empty(tsdn, arena, slab,
 		    bin);
-		return true;
+
+		if (*dalloc_slabs_count < ndalloc_slabs) {
+			dalloc_slabs[*dalloc_slabs_count] = slab;
+			(*dalloc_slabs_count)++;
+		} else {
+			edata_list_active_append(dalloc_slabs_extra, slab);
+		}
 	} else if (nfree == 1 && slab != bin->slabcur) {
 		arena_dalloc_bin_locked_handle_newly_nonempty(tsdn, arena, slab,
 		    bin);
 	}
-	return false;
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -604,11 +610,129 @@ arena_dalloc_bin_locked_finish(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
 	}
 }
 
+JEMALLOC_ALWAYS_INLINE void
+arena_bin_flush_batch_impl(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
+    arena_dalloc_bin_locked_info_t *dalloc_bin_info, unsigned binind,
+    edata_t **dalloc_slabs, unsigned ndalloc_slabs, unsigned *dalloc_count,
+    edata_list_active_t *dalloc_slabs_extra) {
+	assert(binind < bin_info_nbatched_sizes);
+	bin_with_batch_t *batched_bin = (bin_with_batch_t *)bin;
+	size_t nelems_to_pop = batcher_pop_begin(tsdn,
+	    &batched_bin->remote_frees);
+
+	bin_batching_test_mid_pop(nelems_to_pop);
+	if (nelems_to_pop == BATCHER_NO_IDX) {
+		malloc_mutex_assert_not_owner(tsdn,
+		    &batched_bin->remote_frees.mtx);
+		return;
+	} else {
+		malloc_mutex_assert_owner(tsdn,
+		    &batched_bin->remote_frees.mtx);
+	}
+
+	bin_remote_free_data_t remote_free_data[BIN_REMOTE_FREE_ELEMS_MAX];
+	for (size_t i = 0; i < nelems_to_pop; i++) {
+		remote_free_data[i] = batched_bin->remote_free_data[i];
+	}
+	batcher_pop_end(tsdn, &batched_bin->remote_frees);
+
+	for (size_t i = 0; i < nelems_to_pop; i++) {
+		arena_dalloc_bin_locked_step(tsdn, arena, bin, dalloc_bin_info,
+		    binind, remote_free_data[i].slab, remote_free_data[i].ptr,
+		    dalloc_slabs, ndalloc_slabs, dalloc_count,
+		    dalloc_slabs_extra);
+	}
+}
+
+typedef struct arena_bin_flush_batch_state_s arena_bin_flush_batch_state_t;
+struct arena_bin_flush_batch_state_s {
+	arena_dalloc_bin_locked_info_t info;
+
+	/*
+	 * Bin batching is subtle in that there are unusual edge cases in which
+	 * it can trigger the deallocation of more slabs than there were items
+	 * flushed (say, if every original deallocation triggered a slab
+	 * deallocation, and so did every batched one).  So we keep a small
+	 * backup array for any "extra" slabs, as well as a a list to allow a
+	 * dynamic number of ones exceeding that array.
+	 */
+	edata_t *dalloc_slabs[8];
+	unsigned dalloc_slab_count;
+	edata_list_active_t dalloc_slabs_extra;
+};
+
+JEMALLOC_ALWAYS_INLINE unsigned
+arena_bin_batch_get_ndalloc_slabs(unsigned preallocated_slabs) {
+	if (preallocated_slabs > bin_batching_test_ndalloc_slabs_max) {
+		return bin_batching_test_ndalloc_slabs_max;
+	}
+	return preallocated_slabs;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+arena_bin_flush_batch_after_lock(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
+    unsigned binind, arena_bin_flush_batch_state_t *state) {
+	if (binind >= bin_info_nbatched_sizes) {
+		return;
+	}
+
+	arena_dalloc_bin_locked_begin(&state->info, binind);
+	state->dalloc_slab_count = 0;
+	edata_list_active_init(&state->dalloc_slabs_extra);
+
+	unsigned preallocated_slabs = (unsigned)(sizeof(state->dalloc_slabs)
+	    / sizeof(state->dalloc_slabs[0]));
+	unsigned ndalloc_slabs = arena_bin_batch_get_ndalloc_slabs(
+	    preallocated_slabs);
+
+	arena_bin_flush_batch_impl(tsdn, arena, bin, &state->info, binind,
+	    state->dalloc_slabs, ndalloc_slabs,
+	    &state->dalloc_slab_count, &state->dalloc_slabs_extra);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+arena_bin_flush_batch_before_unlock(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
+    unsigned binind, arena_bin_flush_batch_state_t *state) {
+	if (binind >= bin_info_nbatched_sizes) {
+		return;
+	}
+
+	arena_dalloc_bin_locked_finish(tsdn, arena, bin, &state->info);
+}
+
 static inline bool
 arena_bin_has_batch(szind_t binind) {
 	return binind < bin_info_nbatched_sizes;
 }
 
+JEMALLOC_ALWAYS_INLINE void
+arena_bin_flush_batch_after_unlock(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
+    unsigned binind, arena_bin_flush_batch_state_t *state) {
+	if (!arena_bin_has_batch(binind)) {
+		return;
+	}
+	/*
+	 * The initialization of dalloc_slabs_extra is guarded by an
+	 * arena_bin_has_batch check higher up the stack.  But the clang
+	 * analyzer forgets this down the stack, triggering a spurious error
+	 * reported here.
+	 */
+	JEMALLOC_CLANG_ANALYZER_SUPPRESS {
+		bin_batching_test_after_unlock(state->dalloc_slab_count,
+		    edata_list_active_empty(&state->dalloc_slabs_extra));
+	}
+	for (unsigned i = 0; i < state->dalloc_slab_count; i++) {
+		edata_t *slab = state->dalloc_slabs[i];
+		arena_slab_dalloc(tsdn, arena_get_from_edata(slab), slab);
+	}
+	while (!edata_list_active_empty(&state->dalloc_slabs_extra)) {
+		edata_t *slab = edata_list_active_first(
+		    &state->dalloc_slabs_extra);
+		edata_list_active_remove(&state->dalloc_slabs_extra, slab);
+		arena_slab_dalloc(tsdn, arena_get_from_edata(slab), slab);
+	}
+}
+
 static inline bin_t *
 arena_get_bin(arena_t *arena, szind_t binind, unsigned binshard) {
 	bin_t *shard0 = (bin_t *)((byte_t *)arena + arena_bin_offsets[binind]);
diff --git a/include/jemalloc/internal/bin.h b/include/jemalloc/internal/bin.h
index 36fce04f..5b776c17 100644
--- a/include/jemalloc/internal/bin.h
+++ b/include/jemalloc/internal/bin.h
@@ -11,6 +11,51 @@
 
 #define BIN_REMOTE_FREE_ELEMS_MAX 16
 
+#ifdef JEMALLOC_JET
+extern void (*bin_batching_test_after_push_hook)(size_t idx);
+extern void (*bin_batching_test_mid_pop_hook)(size_t elems_to_pop);
+extern void (*bin_batching_test_after_unlock_hook)(unsigned slab_dalloc_count,
+    bool list_empty);
+#endif
+
+#ifdef JEMALLOC_JET
+extern unsigned bin_batching_test_ndalloc_slabs_max;
+#else
+static const unsigned bin_batching_test_ndalloc_slabs_max = (unsigned)-1;
+#endif
+
+JEMALLOC_ALWAYS_INLINE void
+bin_batching_test_after_push(size_t idx) {
+	(void)idx;
+#ifdef JEMALLOC_JET
+	if (bin_batching_test_after_push_hook != NULL) {
+		bin_batching_test_after_push_hook(idx);
+	}
+#endif
+}
+
+JEMALLOC_ALWAYS_INLINE void
+bin_batching_test_mid_pop(size_t elems_to_pop) {
+	(void)elems_to_pop;
+#ifdef JEMALLOC_JET
+	if (bin_batching_test_mid_pop_hook != NULL) {
+		bin_batching_test_mid_pop_hook(elems_to_pop);
+	}
+#endif
+}
+
+JEMALLOC_ALWAYS_INLINE void
+bin_batching_test_after_unlock(unsigned slab_dalloc_count, bool list_empty) {
+	(void)slab_dalloc_count;
+	(void)list_empty;
+#ifdef JEMALLOC_JET
+	if (bin_batching_test_after_unlock_hook != NULL) {
+		bin_batching_test_after_unlock_hook(slab_dalloc_count,
+		    list_empty);
+	}
+#endif
+}
+
 /*
  * A bin contains a set of extents that are currently being used for slab
  * allocations.
@@ -70,7 +115,7 @@ bool bin_update_shard_size(unsigned bin_shards[SC_NBINS], size_t start_size,
     size_t end_size, size_t nshards);
 
 /* Initializes a bin to empty.  Returns true on error. */
-bool bin_init(bin_t *bin);
+bool bin_init(bin_t *bin, unsigned binind);
 
 /* Forking. */
 void bin_prefork(tsdn_t *tsdn, bin_t *bin, bool has_batch);
diff --git a/include/jemalloc/internal/bin_info.h b/include/jemalloc/internal/bin_info.h
index f743b7d8..88d58c91 100644
--- a/include/jemalloc/internal/bin_info.h
+++ b/include/jemalloc/internal/bin_info.h
@@ -48,6 +48,8 @@ struct bin_info_s {
 extern size_t opt_bin_info_max_batched_size;
 /* The number of batches per batched size class. */
 extern size_t opt_bin_info_remote_free_max_batch;
+// The max number of pending elems (across all batches)
+extern size_t opt_bin_info_remote_free_max;
 
 extern szind_t bin_info_nbatched_sizes;
 extern unsigned bin_info_nbatched_bins;
diff --git a/src/arena.c b/src/arena.c
index 71ef26f5..21010279 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -661,10 +661,17 @@ arena_bin_slabs_full_remove(arena_t *arena, bin_t *bin, edata_t *slab) {
 }
 
 static void
-arena_bin_reset(tsd_t *tsd, arena_t *arena, bin_t *bin) {
+arena_bin_reset(tsd_t *tsd, arena_t *arena, bin_t *bin, unsigned binind) {
 	edata_t *slab;
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
+
+	if (arena_bin_has_batch(binind)) {
+		bin_with_batch_t *batched_bin = (bin_with_batch_t *)bin;
+		batcher_init(&batched_bin->remote_frees,
+		    BIN_REMOTE_FREE_ELEMS_MAX);
+	}
+
 	if (bin->slabcur != NULL) {
 		slab = bin->slabcur;
 		bin->slabcur = NULL;
@@ -815,7 +822,8 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 	/* Bins. */
 	for (unsigned i = 0; i < SC_NBINS; i++) {
 		for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
-			arena_bin_reset(tsd, arena, arena_get_bin(arena, i, j));
+			arena_bin_reset(tsd, arena, arena_get_bin(arena, i, j),
+			    i);
 		}
 	}
 	pa_shard_reset(tsd_tsdn(tsd), &arena->pa_shard);
@@ -1080,8 +1088,18 @@ arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena,
 	unsigned binshard;
 	bin_t *bin = arena_bin_choose(tsdn, arena, binind, &binshard);
 
+	/*
+	 * This has some fields that are conditionally initialized down batch
+	 * flush pathways.  This can trigger static analysis warnings deeper
+	 * down in the static.  The accesses are guarded by the same checks as
+	 * the initialization, but the analysis isn't able to track that across
+	 * multiple stack frames.
+	 */
+	arena_bin_flush_batch_state_t batch_flush_state
+	    JEMALLOC_CLANG_ANALYZER_SILENCE_INIT({0});
 label_refill:
 	malloc_mutex_lock(tsdn, &bin->lock);
+	arena_bin_flush_batch_after_lock(tsdn, arena, bin, binind, &batch_flush_state);
 
 	while (filled < nfill) {
 		/* Try batch-fill from slabcur first. */
@@ -1136,7 +1154,11 @@ label_refill:
 		cache_bin->tstats.nrequests = 0;
 	}
 
+	arena_bin_flush_batch_before_unlock(tsdn, arena, bin, binind,
+	    &batch_flush_state);
 	malloc_mutex_unlock(tsdn, &bin->lock);
+	arena_bin_flush_batch_after_unlock(tsdn, arena, bin, binind,
+	    &batch_flush_state);
 
 	if (alloc_and_retry) {
 		assert(fresh_slab == NULL);
@@ -1427,12 +1449,16 @@ arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, edata_t *edata, void *ptr) {
 	malloc_mutex_lock(tsdn, &bin->lock);
 	arena_dalloc_bin_locked_info_t info;
 	arena_dalloc_bin_locked_begin(&info, binind);
-	bool ret = arena_dalloc_bin_locked_step(tsdn, arena, bin,
-	    &info, binind, edata, ptr);
+	edata_t *dalloc_slabs[1];
+	unsigned dalloc_slabs_count = 0;
+	arena_dalloc_bin_locked_step(tsdn, arena, bin, &info, binind, edata,
+	    ptr, dalloc_slabs, /* ndalloc_slabs */ 1, &dalloc_slabs_count,
+	    /* dalloc_slabs_extra */ NULL);
 	arena_dalloc_bin_locked_finish(tsdn, arena, bin, &info);
 	malloc_mutex_unlock(tsdn, &bin->lock);
 
-	if (ret) {
+	if (dalloc_slabs_count != 0) {
+		assert(dalloc_slabs[0] == edata);
 		arena_slab_dalloc(tsdn, arena, edata);
 	}
 }
@@ -1731,7 +1757,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) {
 	for (unsigned i = 0; i < SC_NBINS; i++) {
 		for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
 			bin_t *bin = arena_get_bin(arena, i, j);
-			bool err = bin_init(bin);
+			bool err = bin_init(bin, i);
 			if (err) {
 				goto label_error;
 			}
diff --git a/src/bin.c b/src/bin.c
index b9b4be2c..267aa0f3 100644
--- a/src/bin.c
+++ b/src/bin.c
@@ -6,6 +6,14 @@
 #include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/witness.h"
 
+#ifdef JEMALLOC_JET
+unsigned bin_batching_test_ndalloc_slabs_max = (unsigned)-1;
+void (*bin_batching_test_after_push_hook)(size_t push_idx);
+void (*bin_batching_test_mid_pop_hook)(size_t nelems_to_pop);
+void (*bin_batching_test_after_unlock_hook)(unsigned slab_dalloc_count,
+    bool list_empty);
+#endif
+
 bool
 bin_update_shard_size(unsigned bin_shard_sizes[SC_NBINS], size_t start_size,
     size_t end_size, size_t nshards) {
@@ -39,7 +47,7 @@ bin_shard_sizes_boot(unsigned bin_shard_sizes[SC_NBINS]) {
 }
 
 bool
-bin_init(bin_t *bin) {
+bin_init(bin_t *bin, unsigned binind) {
 	if (malloc_mutex_init(&bin->lock, "bin", WITNESS_RANK_BIN,
 	    malloc_mutex_rank_exclusive)) {
 		return true;
@@ -50,6 +58,11 @@ bin_init(bin_t *bin) {
 	if (config_stats) {
 		memset(&bin->stats, 0, sizeof(bin_stats_t));
 	}
+	if (arena_bin_has_batch(binind)) {
+		bin_with_batch_t *batched_bin = (bin_with_batch_t *)bin;
+		batcher_init(&batched_bin->remote_frees,
+		    opt_bin_info_remote_free_max);
+	}
 	return false;
 }
 
@@ -57,8 +70,23 @@ void
 bin_prefork(tsdn_t *tsdn, bin_t *bin, bool has_batch) {
 	malloc_mutex_prefork(tsdn, &bin->lock);
 	if (has_batch) {
+		/*
+		 * The batch mutex has lower rank than the bin mutex (as it must
+		 * -- it's acquired later).  But during forking, we go
+		 *  bin-at-a-time, so that we acquire mutex on bin 0, then on
+		 *  the bin 0 batcher, then on bin 1.  This is a safe ordering
+		 *  (it's ordered by the index of arenas and bins within those
+		 *  arenas), but will trigger witness errors that would
+		 *  otherwise force another level of arena forking that breaks
+		 *  bin encapsulation (because the witness API doesn't "know"
+		 *  about arena or bin ordering -- it just sees that the batcher
+		 *  has a lower rank than the bin).  So instead we exclude the
+		 *  batcher mutex from witness checking during fork (which is
+		 *  the only time we touch multiple bins at once) by passing
+		 *  TSDN_NULL.
+		 */
 		bin_with_batch_t *batched = (bin_with_batch_t *)bin;
-		batcher_prefork(tsdn, &batched->remote_frees);
+		batcher_prefork(TSDN_NULL, &batched->remote_frees);
 	}
 }
 
@@ -67,7 +95,7 @@ bin_postfork_parent(tsdn_t *tsdn, bin_t *bin, bool has_batch) {
 	malloc_mutex_postfork_parent(tsdn, &bin->lock);
 	if (has_batch) {
 		bin_with_batch_t *batched = (bin_with_batch_t *)bin;
-		batcher_postfork_parent(tsdn, &batched->remote_frees);
+		batcher_postfork_parent(TSDN_NULL, &batched->remote_frees);
 	}
 }
 
@@ -76,6 +104,6 @@ bin_postfork_child(tsdn_t *tsdn, bin_t *bin, bool has_batch) {
 	malloc_mutex_postfork_child(tsdn, &bin->lock);
 	if (has_batch) {
 		bin_with_batch_t *batched = (bin_with_batch_t *)bin;
-		batcher_postfork_child(tsdn, &batched->remote_frees);
+		batcher_postfork_child(TSDN_NULL, &batched->remote_frees);
 	}
 }
diff --git a/src/bin_info.c b/src/bin_info.c
index 27f0be17..f8a64ae3 100644
--- a/src/bin_info.c
+++ b/src/bin_info.c
@@ -3,8 +3,19 @@
 
 #include "jemalloc/internal/bin_info.h"
 
-size_t opt_bin_info_max_batched_size;
-size_t opt_bin_info_remote_free_max_batch;
+/*
+ * We leave bin-batching disabled by default, with other settings chosen mostly
+ * empirically; across the test programs I looked at they provided the most bang
+ * for the buck.  With other default settings, these choices for bin batching
+ * result in them consuming far less memory (even in the worst case) than the
+ * tcaches themselves, the arena, etc.
+ * Note that we always try to pop all bins on every arena cache bin lock
+ * operation, so the typical memory waste is far less than this (and only on
+ * hot bins, which tend to be large anyways).
+ */
+size_t opt_bin_info_max_batched_size = 0; /* 192 is a good default. */
+size_t opt_bin_info_remote_free_max_batch = 4;
+size_t opt_bin_info_remote_free_max = BIN_REMOTE_FREE_ELEMS_MAX;
 
 bin_info_t bin_infos[SC_NBINS];
 
diff --git a/src/ctl.c b/src/ctl.c
index 1b76b792..ab40050d 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -130,6 +130,7 @@ CTL_PROTO(opt_utrace)
 CTL_PROTO(opt_xmalloc)
 CTL_PROTO(opt_experimental_infallible_new)
 CTL_PROTO(opt_max_batched_size)
+CTL_PROTO(opt_remote_free_max)
 CTL_PROTO(opt_remote_free_max_batch)
 CTL_PROTO(opt_tcache)
 CTL_PROTO(opt_tcache_max)
@@ -483,6 +484,7 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("experimental_infallible_new"),
 		CTL(opt_experimental_infallible_new)},
 	{NAME("max_batched_size"),	CTL(opt_max_batched_size)},
+	{NAME("remote_free_max"),	CTL(opt_remote_free_max)},
 	{NAME("remote_free_max_batch"),	CTL(opt_remote_free_max_batch)},
 	{NAME("tcache"),	CTL(opt_tcache)},
 	{NAME("tcache_max"),	CTL(opt_tcache_max)},
@@ -2208,6 +2210,8 @@ CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool)
 CTL_RO_NL_CGEN(config_enable_cxx, opt_experimental_infallible_new,
     opt_experimental_infallible_new, bool)
 CTL_RO_NL_GEN(opt_max_batched_size, opt_bin_info_max_batched_size, size_t)
+CTL_RO_NL_GEN(opt_remote_free_max, opt_bin_info_remote_free_max,
+    size_t)
 CTL_RO_NL_GEN(opt_remote_free_max_batch, opt_bin_info_remote_free_max_batch,
     size_t)
 CTL_RO_NL_GEN(opt_tcache, opt_tcache, bool)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 8f40e0cc..89f4b29d 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1334,6 +1334,11 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    BIN_REMOTE_FREE_ELEMS_MAX,
 			    CONF_DONT_CHECK_MIN, CONF_CHECK_MAX,
 			    /* clip */ true)
+			CONF_HANDLE_SIZE_T(opt_bin_info_remote_free_max,
+			    "remote_free_max", 0,
+			    BIN_REMOTE_FREE_ELEMS_MAX,
+			    CONF_DONT_CHECK_MIN, CONF_CHECK_MAX,
+			    /* clip */ true)
 
 			if (CONF_MATCH("tcache_ncached_max")) {
 				bool err = tcache_bin_info_default_init(
diff --git a/src/stats.c b/src/stats.c
index 359a252c..f057e722 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1556,6 +1556,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_BOOL("xmalloc")
 	OPT_WRITE_BOOL("experimental_infallible_new")
 	OPT_WRITE_SIZE_T("max_batched_size")
+	OPT_WRITE_SIZE_T("remote_free_max")
 	OPT_WRITE_SIZE_T("remote_free_max_batch")
 	OPT_WRITE_BOOL("tcache")
 	OPT_WRITE_SIZE_T("tcache_max")
diff --git a/src/tcache.c b/src/tcache.c
index 4dd5ccd6..564b5d9c 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -325,6 +325,7 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin
 	assert(binind < SC_NBINS);
 	arena_t *tcache_arena = tcache_slow->arena;
 	assert(tcache_arena != NULL);
+	unsigned tcache_binshard = tsd_binshardsp_get(tsdn_tsd(tsdn))->binshard[binind];
 
 	/*
 	 * Variable length array must have > 0 length; the last element is never
@@ -341,6 +342,18 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin
 	unsigned dalloc_count = 0;
 	VARIABLE_ARRAY(edata_t *, dalloc_slabs, nflush + 1);
 
+	/*
+	 * There's an edge case where we need to deallocate more slabs than we
+	 * have elements of dalloc_slabs.  This can if we end up deallocating
+	 * items batched by another thread in addition to ones flushed from the
+	 * cache.  Since this is not very likely (most small object
+	 * deallocations don't free up a whole slab), we don't want to burn the
+	 * stack space to keep those excess slabs in an array.  Instead we'll
+	 * maintain an overflow list.
+	 */
+	edata_list_active_t dalloc_slabs_extra;
+	edata_list_active_init(&dalloc_slabs_extra);
+
 	/*
 	 * We're about to grab a bunch of locks.  If one of them happens to be
 	 * the one guarding the arena-level stats counters we flush our
@@ -418,40 +431,136 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin
 			}
 		}
 
-		/* Actually do the flushing. */
-		malloc_mutex_lock(tsdn, &cur_bin->lock);
 		/*
-		 * Flush stats first, if that was the right lock.  Note that we
-		 * don't actually have to flush stats into the current thread's
-		 * binshard. Flushing into any binshard in the same arena is
-		 * enough; we don't expose stats on per-binshard basis (just
-		 * per-bin).
+		 * We never batch when flushing to our home-base bin shard,
+		 * since it's likely that we'll have to acquire that lock anyway
+		 * when flushing stats.
+		 *
+		 * A plausible check we could add to can_batch is
+		 * '&& arena_is_auto(cur_arena)'.  The motivation would be that
+		 * we have a higher tolerance for dubious user assumptions
+		 * around non-auto arenas (e.g. "if I deallocate every object I
+		 * allocated, and then call tcache.flush, then the arena stats
+		 * must reflect zero live allocations").
+		 *
+		 * This is dubious for a couple reasons:
+		 * - We already don't provide perfect fidelity for stats
+		 *   counting (e.g. for profiled allocations, whose size can
+		 *   inflate in stats).
+		 * - Hanging load-bearing guarantees around stats impedes
+		 *   scalability in general.
+		 *
+		 * There are some "complete" strategies we could do instead:
+		 * - Add a arena.<i>.quiesce call to pop all bins for users who
+		 *   do want those stats accounted for.
+		 * - Make batchability a user-controllable per-arena option.
+		 * - Do a batch pop after every mutex acquisition for which we
+		 *   want to provide accurate stats.  This gives perfectly
+		 *   accurate stats, but can cause weird performance effects
+		 *   (because doing stats collection can now result in slabs
+		 *   becoming empty, and therefore purging, large mutex
+		 *   acquisition, etc.).
+		 * - Propagate the "why" behind a flush down to the level of the
+		 *   batcher, and include a batch pop attempt down full tcache
+		 *   flushing pathways.  This is just a lot of plumbing and
+		 *   internal complexity.
+		 *
+		 * We don't do any of these right now, but the decision calculus
+		 * and tradeoffs are subtle enough that the reasoning was worth
+		 * leaving in this comment.
 		 */
-		if (config_stats && tcache_arena == cur_arena
-		    && !merged_stats) {
-			merged_stats = true;
-			cur_bin->stats.nflushes++;
-			cur_bin->stats.nrequests +=
-			    cache_bin->tstats.nrequests;
-			cache_bin->tstats.nrequests = 0;
+		bool bin_is_batched = arena_bin_has_batch(binind);
+		bool home_binshard = (cur_arena == tcache_arena
+		    && cur_binshard == tcache_binshard);
+		bool can_batch = (flush_start - prev_flush_start
+		    <= opt_bin_info_remote_free_max_batch)
+		    && !home_binshard && bin_is_batched;
+
+		/*
+		 * We try to avoid the batching pathway if we can, so we always
+		 * at least *try* to lock.
+		 */
+		bool locked = false;
+		bool batched = false;
+		if (can_batch) {
+			locked = !malloc_mutex_trylock(tsdn, &cur_bin->lock);
 		}
-		/* Next flush objects. */
-		/* Init only to avoid used-uninitialized warning. */
-		arena_dalloc_bin_locked_info_t dalloc_bin_info = {0};
-		arena_dalloc_bin_locked_begin(&dalloc_bin_info, binind);
-		for (unsigned i = prev_flush_start; i < flush_start; i++) {
-			void *ptr = ptrs->ptr[i];
-			edata_t *edata = item_edata[i].edata;
-			if (arena_dalloc_bin_locked_step(tsdn,
-			    cur_arena, cur_bin, &dalloc_bin_info,
-			    binind, edata, ptr)) {
-				dalloc_slabs[dalloc_count] = edata;
-				dalloc_count++;
+		if (can_batch && !locked) {
+			bin_with_batch_t *batched_bin =
+			    (bin_with_batch_t *)cur_bin;
+			size_t push_idx = batcher_push_begin(tsdn,
+			    &batched_bin->remote_frees,
+			    flush_start - prev_flush_start);
+			bin_batching_test_after_push(push_idx);
+
+			if (push_idx != BATCHER_NO_IDX) {
+				batched = true;
+				unsigned nbatched
+				    = flush_start - prev_flush_start;
+				for (unsigned i = 0; i < nbatched; i++) {
+					unsigned src_ind = prev_flush_start + i;
+					batched_bin->remote_free_data[
+					    push_idx + i].ptr
+						= ptrs->ptr[src_ind];
+					batched_bin->remote_free_data[
+					    push_idx + i].slab
+						= item_edata[src_ind].edata;
+				}
+				batcher_push_end(tsdn,
+				    &batched_bin->remote_frees);
 			}
 		}
-		arena_dalloc_bin_locked_finish(tsdn, cur_arena, cur_bin,
-		    &dalloc_bin_info);
-		malloc_mutex_unlock(tsdn, &cur_bin->lock);
+		if (!batched) {
+			if (!locked) {
+				malloc_mutex_lock(tsdn, &cur_bin->lock);
+			}
+			/*
+			 * Flush stats first, if that was the right lock.  Note
+			 * that we don't actually have to flush stats into the
+			 * current thread's binshard. Flushing into any binshard
+			 * in the same arena is enough; we don't expose stats on
+			 * per-binshard basis (just per-bin).
+			 */
+			if (config_stats && tcache_arena == cur_arena
+			    && !merged_stats) {
+				merged_stats = true;
+				cur_bin->stats.nflushes++;
+				cur_bin->stats.nrequests +=
+				    cache_bin->tstats.nrequests;
+				cache_bin->tstats.nrequests = 0;
+			}
+			unsigned preallocated_slabs = nflush;
+			unsigned ndalloc_slabs = arena_bin_batch_get_ndalloc_slabs(
+			    preallocated_slabs);
+
+			/* Next flush objects our own objects. */
+			/* Init only to avoid used-uninitialized warning. */
+			arena_dalloc_bin_locked_info_t dalloc_bin_info = {0};
+			arena_dalloc_bin_locked_begin(&dalloc_bin_info, binind);
+			for (unsigned i = prev_flush_start; i < flush_start;
+			    i++) {
+				void *ptr = ptrs->ptr[i];
+				edata_t *edata = item_edata[i].edata;
+				arena_dalloc_bin_locked_step(tsdn, cur_arena,
+				    cur_bin, &dalloc_bin_info, binind, edata,
+				    ptr, dalloc_slabs, ndalloc_slabs,
+				    &dalloc_count, &dalloc_slabs_extra);
+			}
+			/*
+			 * Lastly, flush any batched objects (from other
+			 * threads).
+			 */
+			if (bin_is_batched) {
+				arena_bin_flush_batch_impl(tsdn, cur_arena,
+				    cur_bin, &dalloc_bin_info, binind,
+				    dalloc_slabs, ndalloc_slabs,
+				    &dalloc_count, &dalloc_slabs_extra);
+			}
+
+			arena_dalloc_bin_locked_finish(tsdn, cur_arena, cur_bin,
+			    &dalloc_bin_info);
+			malloc_mutex_unlock(tsdn, &cur_bin->lock);
+		}
 		arena_decay_ticks(tsdn, cur_arena,
 		    flush_start - prev_flush_start);
 	}
@@ -460,7 +569,11 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin
 	for (unsigned i = 0; i < dalloc_count; i++) {
 		edata_t *slab = dalloc_slabs[i];
 		arena_slab_dalloc(tsdn, arena_get_from_edata(slab), slab);
-
+	}
+	while (!edata_list_active_empty(&dalloc_slabs_extra)) {
+		edata_t *slab = edata_list_active_first(&dalloc_slabs_extra);
+		edata_list_active_remove(&dalloc_slabs_extra, slab);
+		arena_slab_dalloc(tsdn, arena_get_from_edata(slab), slab);
 	}
 
 	if (config_stats && !merged_stats) {
diff --git a/test/include/test/fork.h b/test/include/test/fork.h
new file mode 100644
index 00000000..ac9b1858
--- /dev/null
+++ b/test/include/test/fork.h
@@ -0,0 +1,32 @@
+#ifndef JEMALLOC_TEST_FORK_H
+#define JEMALLOC_TEST_FORK_H
+
+#ifndef _WIN32
+
+#include <sys/wait.h>
+
+static inline void
+fork_wait_for_child_exit(int pid) {
+	int status;
+	while (true) {
+		if (waitpid(pid, &status, 0) == -1) {
+			test_fail("Unexpected waitpid() failure.");
+		}
+		if (WIFSIGNALED(status)) {
+			test_fail("Unexpected child termination due to "
+			    "signal %d", WTERMSIG(status));
+			break;
+		}
+		if (WIFEXITED(status)) {
+			if (WEXITSTATUS(status) != 0) {
+				test_fail("Unexpected child exit value %d",
+				    WEXITSTATUS(status));
+			}
+			break;
+		}
+	}
+}
+
+#endif
+
+#endif /* JEMALLOC_TEST_FORK_H */
diff --git a/test/include/test/jemalloc_test.h.in b/test/include/test/jemalloc_test.h.in
index f9c506da..8b139db1 100644
--- a/test/include/test/jemalloc_test.h.in
+++ b/test/include/test/jemalloc_test.h.in
@@ -1,3 +1,6 @@
+#ifndef JEMALLOC_TEST_H
+#define JEMALLOC_TEST_H
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -172,3 +175,5 @@ extern "C" {
 #ifdef __cplusplus
 }
 #endif
+
+#endif
diff --git a/test/unit/bin_batching.c b/test/unit/bin_batching.c
new file mode 100644
index 00000000..525f59e0
--- /dev/null
+++ b/test/unit/bin_batching.c
@@ -0,0 +1,264 @@
+#include "test/jemalloc_test.h"
+#include "test/fork.h"
+
+enum {
+	STRESS_THREADS = 3,
+	STRESS_OBJECTS_PER_THREAD = 1000,
+	STRESS_ALLOC_SZ = PAGE / 2,
+};
+
+typedef struct stress_thread_data_s stress_thread_data_t;
+struct stress_thread_data_s {
+	unsigned thd_id;
+	atomic_zu_t *ready_thds;
+	atomic_zu_t *done_thds;
+	void **to_dalloc;
+};
+
+static atomic_zu_t push_failure_count;
+static atomic_zu_t pop_attempt_results[2];
+static atomic_zu_t dalloc_zero_slab_count;
+static atomic_zu_t dalloc_nonzero_slab_count;
+static atomic_zu_t dalloc_nonempty_list_count;
+
+static bool
+should_skip() {
+	return
+	    /*
+	     * We do batching operations on tcache flush pathways; we can't if
+	     * caching is disabled.
+	     */
+	    !opt_tcache ||
+	    /* We rely on tcache fill/flush operations of the size we use. */
+	    opt_tcache_max < STRESS_ALLOC_SZ
+	    /*
+	     * Some of the races we want to trigger are fiddly enough that they
+	     * only show up under real concurrency.  We add 1 to account for the
+	     * main thread, which also does some work.
+	     */
+	    || ncpus < STRESS_THREADS + 1;
+}
+
+static void
+increment_push_failure(size_t push_idx) {
+	if (push_idx == BATCHER_NO_IDX) {
+		atomic_fetch_add_zu(&push_failure_count, 1, ATOMIC_RELAXED);
+	} else {
+		assert_zu_lt(push_idx, 4, "Only 4 elems");
+		volatile int x = 10000;
+		while (--x) {
+			/* Spin for a while, to try to provoke a failure. */
+		}
+	}
+}
+
+static void
+increment_pop_attempt(size_t elems_to_pop) {
+	bool elems = (elems_to_pop != BATCHER_NO_IDX);
+	atomic_fetch_add_zu(&pop_attempt_results[elems], 1, ATOMIC_RELAXED);
+}
+
+static void
+increment_slab_dalloc_count(unsigned slab_dalloc_count, bool list_empty) {
+	if (slab_dalloc_count > 0) {
+		atomic_fetch_add_zu(&dalloc_nonzero_slab_count, 1,
+		    ATOMIC_RELAXED);
+	} else {
+		atomic_fetch_add_zu(&dalloc_zero_slab_count, 1,
+		    ATOMIC_RELAXED);
+	}
+	if (!list_empty) {
+		atomic_fetch_add_zu(&dalloc_nonempty_list_count, 1,
+		    ATOMIC_RELAXED);
+	}
+}
+
+static void flush_tcache() {
+	assert_d_eq(0, mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
+	    "Unexpected mallctl failure");
+}
+
+static void *
+stress_thread(void *arg) {
+	stress_thread_data_t *data = arg;
+	uint64_t prng_state = data->thd_id;
+	atomic_fetch_add_zu(data->ready_thds, 1, ATOMIC_RELAXED);
+	while (atomic_load_zu(data->ready_thds, ATOMIC_RELAXED)
+	    != STRESS_THREADS) {
+		/* Spin */
+	}
+	for (int i = 0; i < STRESS_OBJECTS_PER_THREAD; i++) {
+		dallocx(data->to_dalloc[i], 0);
+		if (prng_range_u64(&prng_state, 3) == 0) {
+			flush_tcache();
+		}
+
+	}
+	flush_tcache();
+	atomic_fetch_add_zu(data->done_thds, 1, ATOMIC_RELAXED);
+	return NULL;
+}
+
+/*
+ * Run main_thread_fn in conditions that trigger all the various edge cases and
+ * subtle race conditions.
+ */
+static void
+stress_run(void (*main_thread_fn)(), int nruns) {
+	bin_batching_test_ndalloc_slabs_max = 1;
+	bin_batching_test_after_push_hook = &increment_push_failure;
+	bin_batching_test_mid_pop_hook = &increment_pop_attempt;
+	bin_batching_test_after_unlock_hook = &increment_slab_dalloc_count;
+
+	atomic_store_zu(&push_failure_count, 0, ATOMIC_RELAXED);
+	atomic_store_zu(&pop_attempt_results[2], 0, ATOMIC_RELAXED);
+	atomic_store_zu(&dalloc_zero_slab_count, 0, ATOMIC_RELAXED);
+	atomic_store_zu(&dalloc_nonzero_slab_count, 0, ATOMIC_RELAXED);
+	atomic_store_zu(&dalloc_nonempty_list_count, 0, ATOMIC_RELAXED);
+
+	for (int run = 0; run < nruns; run++) {
+		thd_t thds[STRESS_THREADS];
+		stress_thread_data_t thd_datas[STRESS_THREADS];
+		atomic_zu_t ready_thds;
+		atomic_store_zu(&ready_thds, 0, ATOMIC_RELAXED);
+		atomic_zu_t done_thds;
+		atomic_store_zu(&done_thds, 0, ATOMIC_RELAXED);
+
+		void *ptrs[STRESS_THREADS][STRESS_OBJECTS_PER_THREAD];
+		for (int i = 0; i < STRESS_THREADS; i++) {
+			thd_datas[i].thd_id = i;
+			thd_datas[i].ready_thds = &ready_thds;
+			thd_datas[i].done_thds = &done_thds;
+			thd_datas[i].to_dalloc = ptrs[i];
+			for (int j = 0; j < STRESS_OBJECTS_PER_THREAD; j++) {
+				void *ptr = mallocx(STRESS_ALLOC_SZ, 0);
+				assert_ptr_not_null(ptr, "alloc failure");
+				ptrs[i][j] = ptr;
+			}
+		}
+		for (int i = 0; i < STRESS_THREADS; i++) {
+			thd_create(&thds[i], stress_thread, &thd_datas[i]);
+		}
+		while (atomic_load_zu(&done_thds, ATOMIC_RELAXED)
+		    != STRESS_THREADS) {
+			main_thread_fn();
+		}
+		for (int i = 0; i < STRESS_THREADS; i++) {
+			thd_join(thds[i], NULL);
+		}
+	}
+
+	bin_batching_test_ndalloc_slabs_max = (unsigned)-1;
+	bin_batching_test_after_push_hook = NULL;
+	bin_batching_test_mid_pop_hook = NULL;
+	bin_batching_test_after_unlock_hook = NULL;
+}
+
+static void
+do_allocs_frees() {
+	enum {NALLOCS = 32};
+	flush_tcache();
+	void *ptrs[NALLOCS];
+	for (int i = 0; i < NALLOCS; i++) {
+		ptrs[i] = mallocx(STRESS_ALLOC_SZ, 0);
+	}
+	for (int i = 0; i < NALLOCS; i++) {
+		dallocx(ptrs[i], 0);
+	}
+	flush_tcache();
+}
+
+static void
+test_arena_reset_main_fn() {
+	do_allocs_frees();
+}
+
+TEST_BEGIN(test_arena_reset) {
+	int err;
+	unsigned arena;
+	unsigned old_arena;
+
+	test_skip_if(should_skip());
+	test_skip_if(opt_percpu_arena != percpu_arena_disabled);
+
+	size_t arena_sz = sizeof(arena);
+	err = mallctl("arenas.create", (void *)&arena, &arena_sz, NULL, 0);
+	assert_d_eq(0, err, "Arena creation failed");
+
+	err = mallctl("thread.arena", &old_arena, &arena_sz, &arena, arena_sz);
+	assert_d_eq(0, err, "changing arena failed");
+
+	stress_run(&test_arena_reset_main_fn, /* nruns */ 10);
+
+	flush_tcache();
+
+	char buf[100];
+	malloc_snprintf(buf, sizeof(buf), "arena.%u.reset", arena);
+	err = mallctl(buf, NULL, NULL, NULL, 0);
+	assert_d_eq(0, err, "Couldn't change arena");
+
+	do_allocs_frees();
+
+	err = mallctl("thread.arena", NULL, NULL, &old_arena, arena_sz);
+	assert_d_eq(0, err, "changing arena failed");
+}
+TEST_END
+
+static void
+test_fork_main_fn() {
+#ifndef _WIN32
+	pid_t pid = fork();
+	if (pid == -1) {
+		test_fail("Fork failure!");
+	} else if (pid == 0) {
+		/* Child */
+		do_allocs_frees();
+		_exit(0);
+	} else {
+		fork_wait_for_child_exit(pid);
+		do_allocs_frees();
+	}
+#endif
+}
+
+TEST_BEGIN(test_fork) {
+#ifdef _WIN32
+	test_skip("No fork on windows");
+#endif
+	test_skip_if(should_skip());
+	stress_run(&test_fork_main_fn, /* nruns */ 10);
+}
+TEST_END
+
+static void
+test_races_main_fn() {
+	do_allocs_frees();
+}
+
+TEST_BEGIN(test_races) {
+	test_skip_if(should_skip());
+
+	stress_run(&test_races_main_fn, /* nruns */ 400);
+
+	assert_zu_lt(0, atomic_load_zu(&push_failure_count, ATOMIC_RELAXED),
+	    "Should have seen some push failures");
+	assert_zu_lt(0, atomic_load_zu(&pop_attempt_results[0], ATOMIC_RELAXED),
+	    "Should have seen some pop failures");
+	assert_zu_lt(0, atomic_load_zu(&pop_attempt_results[1], ATOMIC_RELAXED),
+	    "Should have seen some pop successes");
+	assert_zu_lt(0, atomic_load_zu(&dalloc_zero_slab_count, ATOMIC_RELAXED),
+	    "Expected some frees that didn't empty a slab");
+	assert_zu_lt(0, atomic_load_zu(&dalloc_nonzero_slab_count,
+	    ATOMIC_RELAXED), "expected some frees that emptied a slab");
+	assert_zu_lt(0, atomic_load_zu(&dalloc_nonempty_list_count,
+	    ATOMIC_RELAXED), "expected some frees that used the empty list");
+}
+TEST_END
+
+int
+main(void) {
+	return test_no_reentrancy(
+	    test_arena_reset,
+	    test_races,
+	    test_fork);
+}
diff --git a/test/unit/bin_batching.sh b/test/unit/bin_batching.sh
new file mode 100644
index 00000000..fef9bdc6
--- /dev/null
+++ b/test/unit/bin_batching.sh
@@ -0,0 +1,10 @@
+#!/bin/sh
+
+# This value of max_batched_size effectively requires all bins to be batched;
+# our page limits are fuzzy, but we bound slab item counts to 2**32, so we'd be
+# at multi-gigabyte minimum page sizes.
+# The reason for this sort of hacky approach is that we want to
+# allocate/deallocate PAGE/2-sized objects (to trigger the "non-empty" ->
+# "empty" and "non-empty"-> "full" transitions often, which have special
+# handling). But the value of PAGE isn't easily available in test scripts.
+export MALLOC_CONF="narenas:2,bin_shards:1-1000000000:3,max_batched_size:1000000000,remote_free_max_batch:1,remote_free_max:4"
diff --git a/test/unit/fork.c b/test/unit/fork.c
index 447eb191..1a4c575e 100644
--- a/test/unit/fork.c
+++ b/test/unit/fork.c
@@ -1,32 +1,5 @@
 #include "test/jemalloc_test.h"
-
-#ifndef _WIN32
-#include <sys/wait.h>
-#endif
-
-#ifndef _WIN32
-static void
-wait_for_child_exit(int pid) {
-	int status;
-	while (true) {
-		if (waitpid(pid, &status, 0) == -1) {
-			test_fail("Unexpected waitpid() failure.");
-		}
-		if (WIFSIGNALED(status)) {
-			test_fail("Unexpected child termination due to "
-			    "signal %d", WTERMSIG(status));
-			break;
-		}
-		if (WIFEXITED(status)) {
-			if (WEXITSTATUS(status) != 0) {
-				test_fail("Unexpected child exit value %d",
-				    WEXITSTATUS(status));
-			}
-			break;
-		}
-	}
-}
-#endif
+#include "test/fork.h"
 
 TEST_BEGIN(test_fork) {
 #ifndef _WIN32
@@ -64,7 +37,7 @@ TEST_BEGIN(test_fork) {
 		/* Child. */
 		_exit(0);
 	} else {
-		wait_for_child_exit(pid);
+		fork_wait_for_child_exit(pid);
 	}
 #else
 	test_skip("fork(2) is irrelevant to Windows");
@@ -87,7 +60,7 @@ do_fork_thd(void *arg) {
 		test_fail("Exec failed");
 	} else {
 		/* Parent */
-		wait_for_child_exit(pid);
+		fork_wait_for_child_exit(pid);
 	}
 	return NULL;
 }
@@ -124,7 +97,7 @@ TEST_BEGIN(test_fork_multithreaded) {
 			do_test_fork_multithreaded();
 			_exit(0);
 		} else {
-			wait_for_child_exit(pid);
+			fork_wait_for_child_exit(pid);
 		}
 	}
 #else

From f9c0b5f7f8a917661db39289e38ec94d9d198f11 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@meta.com>
Date: Tue, 20 Feb 2024 14:54:43 -0800
Subject: [PATCH 182/395] Bin batching: add some stats.

This lets us easily see what fraction of flush load is being taken up by the
bins, and helps guide future optimization approaches (for example: should we
prefetch during cache bin fills? It depends on how many objects the average fill
pops out of the batch).
---
 include/jemalloc/internal/arena_inlines_b.h |  6 +++
 include/jemalloc/internal/batcher.h         |  2 +
 include/jemalloc/internal/bin.h             |  5 +++
 include/jemalloc/internal/bin_stats.h       |  5 +++
 src/batcher.c                               | 10 +++++
 src/ctl.c                                   | 30 ++++++++++++++
 src/stats.c                                 | 43 +++++++++++++++++++++
 src/tcache.c                                | 13 +++++++
 8 files changed, 114 insertions(+)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 7f5f6bb0..ea246cc5 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -630,6 +630,8 @@ arena_bin_flush_batch_impl(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
 		    &batched_bin->remote_frees.mtx);
 	}
 
+	size_t npushes = batcher_pop_get_pushes(tsdn,
+	    &batched_bin->remote_frees);
 	bin_remote_free_data_t remote_free_data[BIN_REMOTE_FREE_ELEMS_MAX];
 	for (size_t i = 0; i < nelems_to_pop; i++) {
 		remote_free_data[i] = batched_bin->remote_free_data[i];
@@ -642,6 +644,10 @@ arena_bin_flush_batch_impl(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
 		    dalloc_slabs, ndalloc_slabs, dalloc_count,
 		    dalloc_slabs_extra);
 	}
+
+	bin->stats.batch_pops++;
+	bin->stats.batch_pushes += npushes;
+	bin->stats.batch_pushed_elems += nelems_to_pop;
 }
 
 typedef struct arena_bin_flush_batch_state_s arena_bin_flush_batch_state_t;
diff --git a/include/jemalloc/internal/batcher.h b/include/jemalloc/internal/batcher.h
index a435f0b7..40c8b35f 100644
--- a/include/jemalloc/internal/batcher.h
+++ b/include/jemalloc/internal/batcher.h
@@ -16,6 +16,7 @@ struct batcher_s {
 	 */
 	atomic_zu_t nelems;
 	size_t nelems_max;
+	size_t npushes;
 	malloc_mutex_t mtx;
 };
 
@@ -35,6 +36,7 @@ void batcher_push_end(tsdn_t *tsdn, batcher_t *batcher);
  * If the former, must be followed by a call to batcher_pop_end.
  */
 size_t batcher_pop_begin(tsdn_t *tsdn, batcher_t *batcher);
+size_t batcher_pop_get_pushes(tsdn_t *tsdn, batcher_t *batcher);
 void batcher_pop_end(tsdn_t *tsdn, batcher_t *batcher);
 
 void batcher_prefork(tsdn_t *tsdn, batcher_t *batcher);
diff --git a/include/jemalloc/internal/bin.h b/include/jemalloc/internal/bin.h
index 5b776c17..c49afea6 100644
--- a/include/jemalloc/internal/bin.h
+++ b/include/jemalloc/internal/bin.h
@@ -138,6 +138,11 @@ bin_stats_merge(tsdn_t *tsdn, bin_stats_data_t *dst_bin_stats, bin_t *bin) {
 	stats->reslabs += bin->stats.reslabs;
 	stats->curslabs += bin->stats.curslabs;
 	stats->nonfull_slabs += bin->stats.nonfull_slabs;
+
+	stats->batch_failed_pushes += bin->stats.batch_failed_pushes;
+	stats->batch_pushes += bin->stats.batch_pushes;
+	stats->batch_pushed_elems += bin->stats.batch_pushed_elems;
+
 	malloc_mutex_unlock(tsdn, &bin->lock);
 }
 
diff --git a/include/jemalloc/internal/bin_stats.h b/include/jemalloc/internal/bin_stats.h
index f95b9e9c..334c166d 100644
--- a/include/jemalloc/internal/bin_stats.h
+++ b/include/jemalloc/internal/bin_stats.h
@@ -48,6 +48,11 @@ struct bin_stats_s {
 
 	/* Current size of nonfull slabs heap in this bin. */
 	size_t		nonfull_slabs;
+
+	uint64_t	batch_pops;
+	uint64_t	batch_failed_pushes;
+	uint64_t	batch_pushes;
+	uint64_t	batch_pushed_elems;
 };
 
 typedef struct bin_stats_data_s bin_stats_data_t;
diff --git a/src/batcher.c b/src/batcher.c
index 19af7d83..2570b3a9 100644
--- a/src/batcher.c
+++ b/src/batcher.c
@@ -9,6 +9,7 @@ void
 batcher_init(batcher_t *batcher, size_t nelems_max) {
 	atomic_store_zu(&batcher->nelems, 0, ATOMIC_RELAXED);
 	batcher->nelems_max = nelems_max;
+	batcher->npushes = 0;
 	malloc_mutex_init(&batcher->mtx, "batcher", WITNESS_RANK_BATCHER,
 	    malloc_mutex_rank_exclusive);
 }
@@ -37,9 +38,18 @@ size_t batcher_push_begin(tsdn_t *tsdn, batcher_t *batcher,
 	 * acquire a mutex only to discover that there's no space for them.
 	 */
 	atomic_store_zu(&batcher->nelems, nelems + elems_to_push, ATOMIC_RELAXED);
+	batcher->npushes++;
 	return nelems;
 }
 
+size_t
+batcher_pop_get_pushes(tsdn_t *tsdn, batcher_t *batcher) {
+	malloc_mutex_assert_owner(tsdn, &batcher->mtx);
+	size_t npushes = batcher->npushes;
+	batcher->npushes = 0;
+	return npushes;
+}
+
 void
 batcher_push_end(tsdn_t *tsdn, batcher_t *batcher) {
 	malloc_mutex_assert_owner(tsdn, &batcher->mtx);
diff --git a/src/ctl.c b/src/ctl.c
index ab40050d..09188dd9 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -239,6 +239,10 @@ CTL_PROTO(stats_arenas_i_bins_j_nslabs)
 CTL_PROTO(stats_arenas_i_bins_j_nreslabs)
 CTL_PROTO(stats_arenas_i_bins_j_curslabs)
 CTL_PROTO(stats_arenas_i_bins_j_nonfull_slabs)
+CTL_PROTO(stats_arenas_i_bins_j_batch_pops)
+CTL_PROTO(stats_arenas_i_bins_j_batch_failed_pushes)
+CTL_PROTO(stats_arenas_i_bins_j_batch_pushes)
+CTL_PROTO(stats_arenas_i_bins_j_batch_pushed_elems)
 INDEX_PROTO(stats_arenas_i_bins_j)
 CTL_PROTO(stats_arenas_i_lextents_j_nmalloc)
 CTL_PROTO(stats_arenas_i_lextents_j_ndalloc)
@@ -694,6 +698,14 @@ static const ctl_named_node_t stats_arenas_i_bins_j_node[] = {
 	{NAME("nreslabs"),	CTL(stats_arenas_i_bins_j_nreslabs)},
 	{NAME("curslabs"),	CTL(stats_arenas_i_bins_j_curslabs)},
 	{NAME("nonfull_slabs"),	CTL(stats_arenas_i_bins_j_nonfull_slabs)},
+	{NAME("batch_pops"),
+		CTL(stats_arenas_i_bins_j_batch_pops)},
+	{NAME("batch_failed_pushes"),
+		CTL(stats_arenas_i_bins_j_batch_failed_pushes)},
+	{NAME("batch_pushes"),
+		CTL(stats_arenas_i_bins_j_batch_pushes)},
+	{NAME("batch_pushed_elems"),
+		CTL(stats_arenas_i_bins_j_batch_pushed_elems)},
 	{NAME("mutex"),		CHILD(named, stats_arenas_i_bins_j_mutex)}
 };
 
@@ -1246,6 +1258,16 @@ MUTEX_PROF_ARENA_MUTEXES
 				assert(bstats->curslabs == 0);
 				assert(bstats->nonfull_slabs == 0);
 			}
+
+			merged->batch_pops
+			    += bstats->batch_pops;
+			merged->batch_failed_pushes
+			    += bstats->batch_failed_pushes;
+			merged->batch_pushes
+			    += bstats->batch_pushes;
+			merged->batch_pushed_elems
+			    += bstats->batch_pushed_elems;
+
 			malloc_mutex_prof_merge(&sdstats->bstats[i].mutex_data,
 			    &astats->bstats[i].mutex_data);
 		}
@@ -3957,6 +3979,14 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curslabs,
     arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.curslabs, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nonfull_slabs,
     arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.nonfull_slabs, size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_batch_pops,
+    arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.batch_pops, uint64_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_batch_failed_pushes,
+    arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.batch_failed_pushes, uint64_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_batch_pushes,
+    arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.batch_pushes, uint64_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_batch_pushed_elems,
+    arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.batch_pushed_elems, uint64_t)
 
 static const ctl_named_node_t *
 stats_arenas_i_bins_j_index(tsdn_t *tsdn, const size_t *mib,
diff --git a/src/stats.c b/src/stats.c
index f057e722..8419158a 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -358,6 +358,15 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i,
 	COL_HDR(row, nreslabs, NULL, right, 13, uint64)
 	COL_HDR(row, nreslabs_ps, "(#/sec)", right, 8, uint64)
 
+	COL_HDR(row, pops, NULL, right, 10, uint64)
+	COL_HDR(row, pops_ps, "(#/sec)", right, 8, uint64)
+	COL_HDR(row, failed_push, NULL, right, 13, uint64)
+	COL_HDR(row, failed_push_ps, "(#/sec)", right, 8, uint64)
+	COL_HDR(row, push, NULL, right, 7, uint64)
+	COL_HDR(row, push_ps, "(#/sec)", right, 8, uint64)
+	COL_HDR(row, push_elem, NULL, right, 12, uint64)
+	COL_HDR(row, push_elem_ps, "(#/sec)", right, 8, uint64)
+
 	/* Don't want to actually print the name. */
 	header_justify_spacer.str_val = " ";
 	col_justify_spacer.str_val = " ";
@@ -405,6 +414,8 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i,
 		uint32_t nregs, nshards;
 		uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes;
 		uint64_t nreslabs;
+		uint64_t batch_pops, batch_failed_pushes, batch_pushes,
+		    batch_pushed_elems;
 		prof_stats_t prof_live;
 		prof_stats_t prof_accum;
 
@@ -453,6 +464,15 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i,
 		CTL_LEAF(stats_arenas_mib, 5, "nonfull_slabs", &nonfull_slabs,
 		    size_t);
 
+		CTL_LEAF(stats_arenas_mib, 5, "batch_pops", &batch_pops,
+		    uint64_t);
+		CTL_LEAF(stats_arenas_mib, 5, "batch_failed_pushes",
+		    &batch_failed_pushes, uint64_t);
+		CTL_LEAF(stats_arenas_mib, 5, "batch_pushes",
+		    &batch_pushes, uint64_t);
+		CTL_LEAF(stats_arenas_mib, 5, "batch_pushed_elems",
+		    &batch_pushed_elems, uint64_t);
+
 		if (mutex) {
 			mutex_stats_read_arena_bin(stats_arenas_mib, 5,
 			    col_mutex64, col_mutex32, uptime);
@@ -487,6 +507,14 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i,
 		    &curslabs);
 		emitter_json_kv(emitter, "nonfull_slabs", emitter_type_size,
 		    &nonfull_slabs);
+		emitter_json_kv(emitter, "batch_pops",
+		    emitter_type_uint64, &batch_pops);
+		emitter_json_kv(emitter, "batch_failed_pushes",
+		    emitter_type_uint64, &batch_failed_pushes);
+		emitter_json_kv(emitter, "batch_pushes",
+		    emitter_type_uint64, &batch_pushes);
+		emitter_json_kv(emitter, "batch_pushed_elems",
+		    emitter_type_uint64, &batch_pushed_elems);
 		if (mutex) {
 			emitter_json_object_kv_begin(emitter, "mutex");
 			mutex_stats_emit(emitter, NULL, col_mutex64,
@@ -545,6 +573,21 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i,
 		col_nreslabs.uint64_val = nreslabs;
 		col_nreslabs_ps.uint64_val = rate_per_second(nreslabs, uptime);
 
+		col_pops.uint64_val = batch_pops;
+		col_pops_ps.uint64_val
+		    = rate_per_second(batch_pops, uptime);
+
+		col_failed_push.uint64_val = batch_failed_pushes;
+		col_failed_push_ps.uint64_val
+		    = rate_per_second(batch_failed_pushes, uptime);
+		col_push.uint64_val = batch_pushes;
+		col_push_ps.uint64_val
+		    = rate_per_second(batch_pushes, uptime);
+
+		col_push_elem.uint64_val = batch_pushed_elems;
+		col_push_elem_ps.uint64_val
+		    = rate_per_second(batch_pushed_elems, uptime);
+
 		/*
 		 * Note that mutex columns were initialized above, if mutex ==
 		 * true.
diff --git a/src/tcache.c b/src/tcache.c
index 564b5d9c..03ec5136 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -482,6 +482,7 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin
 		 */
 		bool locked = false;
 		bool batched = false;
+		bool batch_failed = false;
 		if (can_batch) {
 			locked = !malloc_mutex_trylock(tsdn, &cur_bin->lock);
 		}
@@ -508,12 +509,24 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin
 				}
 				batcher_push_end(tsdn,
 				    &batched_bin->remote_frees);
+			} else {
+				batch_failed = true;
 			}
 		}
 		if (!batched) {
 			if (!locked) {
 				malloc_mutex_lock(tsdn, &cur_bin->lock);
 			}
+			/*
+			 * Unlike other stats (which only ever get flushed into
+			 * a tcache's associated arena), batch_failed counts get
+			 * accumulated into the bin where the push attempt
+			 * failed.
+			 */
+			if (config_stats && batch_failed) {
+				cur_bin->stats.batch_failed_pushes++;
+			}
+
 			/*
 			 * Flush stats first, if that was the right lock.  Note
 			 * that we don't actually have to flush stats into the

From 90c627edb70e081e1298b79010478d2f804467f1 Mon Sep 17 00:00:00 2001
From: Dmitry Ilvokhin <d@ilvokhin.com>
Date: Fri, 31 May 2024 10:28:58 -0700
Subject: [PATCH 183/395] Export hugepage size with `arenas.hugepage`

---
 src/ctl.c           | 3 +++
 src/stats.c         | 4 ++++
 test/unit/mallctl.c | 1 +
 3 files changed, 8 insertions(+)

diff --git a/src/ctl.c b/src/ctl.c
index 09188dd9..4347dd2b 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -195,6 +195,7 @@ CTL_PROTO(arenas_dirty_decay_ms)
 CTL_PROTO(arenas_muzzy_decay_ms)
 CTL_PROTO(arenas_quantum)
 CTL_PROTO(arenas_page)
+CTL_PROTO(arenas_hugepage)
 CTL_PROTO(arenas_tcache_max)
 CTL_PROTO(arenas_nbins)
 CTL_PROTO(arenas_nhbins)
@@ -593,6 +594,7 @@ static const ctl_named_node_t arenas_node[] = {
 	{NAME("muzzy_decay_ms"), CTL(arenas_muzzy_decay_ms)},
 	{NAME("quantum"),	CTL(arenas_quantum)},
 	{NAME("page"),		CTL(arenas_page)},
+	{NAME("hugepage"),	CTL(arenas_hugepage)},
 	{NAME("tcache_max"),	CTL(arenas_tcache_max)},
 	{NAME("nbins"),		CTL(arenas_nbins)},
 	{NAME("nhbins"),	CTL(arenas_nhbins)},
@@ -3284,6 +3286,7 @@ arenas_muzzy_decay_ms_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 
 CTL_RO_NL_GEN(arenas_quantum, QUANTUM, size_t)
 CTL_RO_NL_GEN(arenas_page, PAGE, size_t)
+CTL_RO_NL_GEN(arenas_hugepage, HUGEPAGE, size_t)
 CTL_RO_NL_GEN(arenas_tcache_max, global_do_not_change_tcache_maxclass, size_t)
 CTL_RO_NL_GEN(arenas_nbins, SC_NBINS, unsigned)
 CTL_RO_NL_GEN(arenas_nhbins, global_do_not_change_tcache_nbins, unsigned)
diff --git a/src/stats.c b/src/stats.c
index 8419158a..4df0ae62 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1698,6 +1698,10 @@ stats_general_print(emitter_t *emitter) {
 	CTL_GET("arenas.page", &sv, size_t);
 	emitter_kv(emitter, "page", "Page size", emitter_type_size, &sv);
 
+	CTL_GET("arenas.hugepage", &sv, size_t);
+	emitter_kv(emitter, "hugepage", "Hugepage size", emitter_type_size,
+	    &sv);
+
 	if (je_mallctl("arenas.tcache_max", (void *)&sv, &ssz, NULL, 0) == 0) {
 		emitter_kv(emitter, "tcache_max",
 		    "Maximum thread-cached size class", emitter_type_size, &sv);
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 9e5baff0..84cd3995 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -877,6 +877,7 @@ TEST_BEGIN(test_arenas_constants) {
 
 	TEST_ARENAS_CONSTANT(size_t, quantum, QUANTUM);
 	TEST_ARENAS_CONSTANT(size_t, page, PAGE);
+	TEST_ARENAS_CONSTANT(size_t, hugepage, HUGEPAGE);
 	TEST_ARENAS_CONSTANT(unsigned, nbins, SC_NBINS);
 	TEST_ARENAS_CONSTANT(unsigned, nlextents, SC_NSIZES - SC_NBINS);
 

From 91a6d230dba40ef2ef6e381b4c4fab5f5b0f6111 Mon Sep 17 00:00:00 2001
From: Dmitry Ilvokhin <d@ilvokhin.com>
Date: Fri, 31 May 2024 06:35:48 -0700
Subject: [PATCH 184/395] Respect `hpa_min_purge_interval_ms` option

Currently, hugepages aware allocator backend works together with classic
one as a fallback for not yet supported allocations. When background
threads are enabled wake up time for classic interfere with hpa as there
were no checks inside hpa purging logic to check if we are not purging too
frequently. If background thread is running and `hpa_should_purge`
returns true, then we will purge, even if we purged less than
hpa_min_purge_interval_ms ago.
---
 src/hpa.c                         | 10 ++++++
 test/unit/hpa_background_thread.c | 52 ++++++++++++++++++++++---------
 2 files changed, 48 insertions(+), 14 deletions(-)

diff --git a/src/hpa.c b/src/hpa.c
index 6b1ae2ce..fe925ad4 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -378,6 +378,16 @@ static bool
 hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
 
+	/*
+	 * Make sure we respect purge interval setting and don't purge
+	 * too frequently.
+	 */
+	uint64_t since_last_purge_ms = shard->central->hooks.ms_since(
+	    &shard->last_purge);
+	if (since_last_purge_ms < shard->opts.min_purge_interval_ms) {
+	     return false;
+	}
+
 	hpdata_t *to_purge = psset_pick_purge(&shard->psset);
 	if (to_purge == NULL) {
 		return false;
diff --git a/test/unit/hpa_background_thread.c b/test/unit/hpa_background_thread.c
index 774ccb4a..e4abb63b 100644
--- a/test/unit/hpa_background_thread.c
+++ b/test/unit/hpa_background_thread.c
@@ -83,7 +83,36 @@ wait_until_thread_is_enabled(unsigned arena_id) {
 }
 
 static void
-expect_purging(unsigned arena_ind, bool expect_deferred) {
+expect_purging(unsigned arena_ind) {
+	size_t empty_ndirty = get_empty_ndirty(arena_ind);
+	expect_zu_eq(0, empty_ndirty, "Expected arena to start unused.");
+
+	void *ptrs[2];
+	ptrs[0] = mallocx(PAGE,
+	    MALLOCX_TCACHE_NONE | MALLOCX_ARENA(arena_ind));
+	ptrs[1] = mallocx(PAGE,
+	    MALLOCX_TCACHE_NONE | MALLOCX_ARENA(arena_ind));
+
+	empty_ndirty = get_empty_ndirty(arena_ind);
+	expect_zu_eq(0, empty_ndirty, "All pages should be active");
+
+	dallocx(ptrs[0], MALLOCX_TCACHE_NONE);
+	expect_true(empty_ndirty == 0 || empty_ndirty == 1,
+	    "Unexpected extra dirty page count: %zu", empty_ndirty);
+
+	/*
+	 * Wait for at least hpa_min_purge_interval_ms to trigger purge on next
+	 * deallocation.
+	 */
+	sleep_for_background_thread_interval();
+
+	dallocx(ptrs[1], MALLOCX_TCACHE_NONE);
+	empty_ndirty = get_empty_ndirty(arena_ind);
+	expect_zu_eq(0, empty_ndirty, "There are should be no dirty pages");
+}
+
+static void
+expect_deferred_purging(unsigned arena_ind) {
 	size_t empty_ndirty;
 
 	empty_ndirty = get_empty_ndirty(arena_ind);
@@ -103,20 +132,15 @@ expect_purging(unsigned arena_ind, bool expect_deferred) {
 		expect_zu_eq(0, empty_ndirty, "All pages should be active");
 		dallocx(ptr, MALLOCX_TCACHE_NONE);
 		empty_ndirty = get_empty_ndirty(arena_ind);
-		if (expect_deferred) {
-			expect_true(empty_ndirty == 0 || empty_ndirty == 1 ||
-			    opt_prof, "Unexpected extra dirty page count: %zu",
-			    empty_ndirty);
-		} else {
-			assert_zu_eq(0, empty_ndirty,
-			    "Saw dirty pages without deferred purging");
-		}
+		expect_true(empty_ndirty == 0 || empty_ndirty == 1 ||
+		    opt_prof, "Unexpected extra dirty page count: %zu",
+		    empty_ndirty);
 		if (empty_ndirty > 0) {
 			observed_dirty_page = true;
 			break;
 		}
 	}
-	expect_b_eq(expect_deferred, observed_dirty_page, "");
+	expect_true(observed_dirty_page, "");
 
 	/*
 	 * Under high concurrency / heavy test load (e.g. using run_test.sh),
@@ -125,7 +149,7 @@ expect_purging(unsigned arena_ind, bool expect_deferred) {
 	 */
 	unsigned retry = 0;
 	while ((empty_ndirty = get_empty_ndirty(arena_ind)) > 0 &&
-	    expect_deferred && (retry++ < 100)) {
+	    (retry++ < 100)) {
 		sleep_for_background_thread_interval();
 	}
 
@@ -144,7 +168,7 @@ TEST_BEGIN(test_hpa_background_thread_purges) {
 	 * Our .sh sets dirty mult to 0, so all dirty pages should get purged
 	 * any time any thread frees.
 	 */
-	expect_purging(arena_ind, /* expect_deferred */ true);
+	expect_deferred_purging(arena_ind);
 }
 TEST_END
 
@@ -158,11 +182,11 @@ TEST_BEGIN(test_hpa_background_thread_enable_disable) {
 	unsigned arena_ind = create_arena();
 
 	set_background_thread_enabled(false);
-	expect_purging(arena_ind, false);
+	expect_purging(arena_ind);
 
 	set_background_thread_enabled(true);
 	wait_until_thread_is_enabled(arena_ind);
-	expect_purging(arena_ind, true);
+	expect_deferred_purging(arena_ind);
 }
 TEST_END
 

From 867c6dd7dc88adb0489b8b815dd70c68807325fc Mon Sep 17 00:00:00 2001
From: Dmitry Ilvokhin <d@ilvokhin.com>
Date: Fri, 7 Jun 2024 05:32:24 -0700
Subject: [PATCH 185/395] Option to guard `hpa_min_purge_interval_ms` fix

Change in `hpa_min_purge_interval_ms` handling logic is not backward
compatible as it might increase memory usage. Now this logic guarded by
`hpa_strict_min_purge_interval` option.

When `hpa_strict_min_purge_interval` is true, we will purge no more than
`hpa_min_purge_interval_ms`. When `hpa_strict_min_purge_interval` is
false, old purging logic behaviour is preserved.

Long term strategy migrate all users of hpa to new logic and then delete
`hpa_strict_min_purge_interval` option.
---
 include/jemalloc/internal/hpa_opts.h | 12 +++++++++++-
 src/ctl.c                            |  4 ++++
 src/hpa.c                            | 10 ++++++----
 src/jemalloc.c                       |  4 ++++
 src/stats.c                          |  1 +
 test/unit/hpa_background_thread.sh   |  2 +-
 6 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/include/jemalloc/internal/hpa_opts.h b/include/jemalloc/internal/hpa_opts.h
index 6e58c86b..93add641 100644
--- a/include/jemalloc/internal/hpa_opts.h
+++ b/include/jemalloc/internal/hpa_opts.h
@@ -49,6 +49,14 @@ struct hpa_shard_opts_s {
 	 * Minimum amount of time between purges.
 	 */
 	uint64_t min_purge_interval_ms;
+
+	/*
+	 * Strictly respect minimum amout of time between purges.
+	 *
+	 * This is an option to provide backward compatibility for staged rollout of
+	 * purging logic fix.
+	 */
+	bool strict_min_purge_interval;
 };
 
 #define HPA_SHARD_OPTS_DEFAULT {					\
@@ -69,7 +77,9 @@ struct hpa_shard_opts_s {
 	/* hugify_delay_ms */						\
 	10 * 1000,							\
 	/* min_purge_interval_ms */					\
-	5 * 1000							\
+	5 * 1000,							\
+	/* strict_min_purge_interval */					\
+	false								\
 }
 
 #endif /* JEMALLOC_INTERNAL_HPA_OPTS_H */
diff --git a/src/ctl.c b/src/ctl.c
index 4347dd2b..62589d77 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -103,6 +103,7 @@ CTL_PROTO(opt_hpa_slab_max_alloc)
 CTL_PROTO(opt_hpa_hugification_threshold)
 CTL_PROTO(opt_hpa_hugify_delay_ms)
 CTL_PROTO(opt_hpa_min_purge_interval_ms)
+CTL_PROTO(opt_hpa_strict_min_purge_interval)
 CTL_PROTO(opt_hpa_dirty_mult)
 CTL_PROTO(opt_hpa_sec_nshards)
 CTL_PROTO(opt_hpa_sec_max_alloc)
@@ -459,6 +460,7 @@ static const ctl_named_node_t opt_node[] = {
 		CTL(opt_hpa_hugification_threshold)},
 	{NAME("hpa_hugify_delay_ms"), CTL(opt_hpa_hugify_delay_ms)},
 	{NAME("hpa_min_purge_interval_ms"), CTL(opt_hpa_min_purge_interval_ms)},
+	{NAME("hpa_strict_min_purge_interval"), CTL(opt_hpa_strict_min_purge_interval)},
 	{NAME("hpa_dirty_mult"), CTL(opt_hpa_dirty_mult)},
 	{NAME("hpa_sec_nshards"),	CTL(opt_hpa_sec_nshards)},
 	{NAME("hpa_sec_max_alloc"),	CTL(opt_hpa_sec_max_alloc)},
@@ -2193,6 +2195,8 @@ CTL_RO_NL_GEN(opt_hpa_hugification_threshold,
 CTL_RO_NL_GEN(opt_hpa_hugify_delay_ms, opt_hpa_opts.hugify_delay_ms, uint64_t)
 CTL_RO_NL_GEN(opt_hpa_min_purge_interval_ms, opt_hpa_opts.min_purge_interval_ms,
     uint64_t)
+CTL_RO_NL_GEN(opt_hpa_strict_min_purge_interval,
+    opt_hpa_opts.strict_min_purge_interval, bool)
 
 /*
  * This will have to change before we publicly document this option; fxp_t and
diff --git a/src/hpa.c b/src/hpa.c
index fe925ad4..49d6b037 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -382,10 +382,12 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 	 * Make sure we respect purge interval setting and don't purge
 	 * too frequently.
 	 */
-	uint64_t since_last_purge_ms = shard->central->hooks.ms_since(
-	    &shard->last_purge);
-	if (since_last_purge_ms < shard->opts.min_purge_interval_ms) {
-	     return false;
+	if (shard->opts.strict_min_purge_interval) {
+		uint64_t since_last_purge_ms = shard->central->hooks.ms_since(
+		    &shard->last_purge);
+		if (since_last_purge_ms < shard->opts.min_purge_interval_ms) {
+		     return false;
+		}
 	}
 
 	hpdata_t *to_purge = psset_pick_purge(&shard->psset);
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 89f4b29d..abd7540f 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1554,6 +1554,10 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    "hpa_min_purge_interval_ms", 0, 0,
 			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, false);
 
+			CONF_HANDLE_BOOL(
+			    opt_hpa_opts.strict_min_purge_interval,
+			    "hpa_strict_min_purge_interval");
+
 			if (CONF_MATCH("hpa_dirty_mult")) {
 				if (CONF_MATCH_VALUE("-1")) {
 					opt_hpa_opts.dirty_mult = (fxp_t)-1;
diff --git a/src/stats.c b/src/stats.c
index 4df0ae62..726007f5 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1564,6 +1564,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_SIZE_T("hpa_hugification_threshold")
 	OPT_WRITE_UINT64("hpa_hugify_delay_ms")
 	OPT_WRITE_UINT64("hpa_min_purge_interval_ms")
+	OPT_WRITE_BOOL("hpa_strict_min_purge_interval")
 	if (je_mallctl("opt.hpa_dirty_mult", (void *)&u32v, &u32sz, NULL, 0)
 	    == 0) {
 		/*
diff --git a/test/unit/hpa_background_thread.sh b/test/unit/hpa_background_thread.sh
index 33b70e19..666da8fc 100644
--- a/test/unit/hpa_background_thread.sh
+++ b/test/unit/hpa_background_thread.sh
@@ -1,4 +1,4 @@
 #!/bin/sh
 
-export MALLOC_CONF="hpa_dirty_mult:0.001,hpa_hugification_threshold_ratio:1.0,hpa_min_purge_interval_ms:50,hpa_sec_nshards:0"
+export MALLOC_CONF="hpa_dirty_mult:0.001,hpa_hugification_threshold_ratio:1.0,hpa_min_purge_interval_ms:50,hpa_strict_min_purge_interval:true,hpa_sec_nshards:0"
 

From 21bcc0a8d49ab2944ae53c7e43f5c84fc8a34322 Mon Sep 17 00:00:00 2001
From: Guangli Dai <gdai@meta.com>
Date: Thu, 6 Jun 2024 11:14:40 -0700
Subject: [PATCH 186/395] Make JEMALLOC_CXX_THROW definition compatible with
 newer C++ versions

---
 include/jemalloc/jemalloc_macros.h.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/jemalloc_macros.h.in b/include/jemalloc/jemalloc_macros.h.in
index a0679af5..06f47b8a 100644
--- a/include/jemalloc/jemalloc_macros.h.in
+++ b/include/jemalloc/jemalloc_macros.h.in
@@ -52,7 +52,7 @@
 #define MALLCTL_ARENAS_DESTROYED	4097
 
 #if defined(__cplusplus) && defined(JEMALLOC_USE_CXX_THROW)
-#  define JEMALLOC_CXX_THROW throw()
+#  define JEMALLOC_CXX_THROW noexcept (true)
 #else
 #  define JEMALLOC_CXX_THROW
 #endif

From 8477ec9562632b0808874416cb2d11ad6fbf99ea Mon Sep 17 00:00:00 2001
From: Guangli Dai <gdai@meta.com>
Date: Fri, 21 Jun 2024 15:21:52 -0700
Subject: [PATCH 187/395] Set dependent as false for all rtree reads without
 ownership

---
 include/jemalloc/internal/emap.h | 4 ++--
 src/emap.c                       | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index 08262f1f..7ac0ae95 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -186,13 +186,13 @@ emap_edata_is_acquired(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
 	 */
 	EMAP_DECLARE_RTREE_CTX;
 	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, &emap->rtree,
-	    rtree_ctx, (uintptr_t)edata_base_get(edata), /* dependent */ true,
+	    rtree_ctx, (uintptr_t)edata_base_get(edata), /* dependent */ false,
 	    /* init_missing */ false);
 	if (elm == NULL) {
 		return true;
 	}
 	rtree_contents_t contents = rtree_leaf_elm_read(tsdn, &emap->rtree, elm,
-	    /* dependent */ true);
+	    /* dependent */ false);
 	if (contents.edata == NULL ||
 	    contents.metadata.state == extent_state_active ||
 	    edata_state_in_transition(contents.metadata.state)) {
diff --git a/src/emap.c b/src/emap.c
index 9cc95a72..f7d5c25a 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -74,7 +74,7 @@ emap_try_acquire_edata_neighbor_impl(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
 	}
 
 	rtree_contents_t neighbor_contents = rtree_leaf_elm_read(tsdn,
-	    &emap->rtree, elm, /* dependent */ true);
+	    &emap->rtree, elm, /* dependent */ false);
 	if (!extent_can_acquire_neighbor(edata, neighbor_contents, pai,
 	    expected_state, forward, expanding)) {
 		return NULL;

From a1fcbebb186b8909693a2f543030f94e5f448f5f Mon Sep 17 00:00:00 2001
From: Shirui Cheng <sherrycheng@meta.com>
Date: Thu, 23 May 2024 12:19:57 -0700
Subject: [PATCH 188/395] skip tcache GC for tcache_max unit test

---
 test/unit/tcache_max.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/unit/tcache_max.sh b/test/unit/tcache_max.sh
index 4480d733..0de75e4b 100644
--- a/test/unit/tcache_max.sh
+++ b/test/unit/tcache_max.sh
@@ -1,3 +1,3 @@
 #!/bin/sh
 
-export MALLOC_CONF="tcache_max:1024"
+export MALLOC_CONF="tcache_max:1024,tcache_gc_incr_bytes:939524096"

From c893fcd169fffca1b9d3156c6637a197765b82d0 Mon Sep 17 00:00:00 2001
From: Danny Lin <danny@kdrag0n.dev>
Date: Mon, 10 Jun 2024 15:03:23 -0700
Subject: [PATCH 189/395] Change macOS mmap tag to fix conflict with CoreMedia
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Tag 101 is assigned to "CoreMedia Capture Data", which makes for confusing output when debugging.

To avoid conflicts, use a tag in the reserved application-specific range from 240–255 (inclusive).

All assigned tags: https://github.com/apple-oss-distributions/xnu/blob/94d3b452840153a99b38a3a9659680b2a006908e/osfmk/mach/vm_statistics.h#L773-L775
---
 src/pages.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/pages.c b/src/pages.c
index 8cf2fd9f..981aae9b 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -17,7 +17,7 @@
 #include <sys/bitops.h>	/* ilog2 */
 #endif
 #ifdef JEMALLOC_HAVE_VM_MAKE_TAG
-#define PAGES_FD_TAG VM_MAKE_TAG(101U)
+#define PAGES_FD_TAG VM_MAKE_TAG(254U)
 #else
 #define PAGES_FD_TAG -1
 #endif

From b66f689764e05084f5b995bf2f8d277b70e084fd Mon Sep 17 00:00:00 2001
From: Dmitry Ilvokhin <d@ilvokhin.com>
Date: Thu, 18 Jul 2024 07:58:51 -0700
Subject: [PATCH 190/395] Emit long string values without truncation

There are few long options (`bin_shards` and `slab_sizes` for example)
when they are specified and we emit statistics value gets truncated.

Moved emitting logic for strings into separate `emitter_emit_str`
function. It will try to emit string same way as before and if value is
too long will fallback emiting rest partially with chunks of `BUF_SIZE`.

Justification for long strings (longer than `BUF_SIZE`) is not
supported.
---
 include/jemalloc/internal/emitter.h | 44 ++++++++++++++++------
 test/unit/emitter.c                 | 58 +++++++++++++++++++++++++----
 2 files changed, 82 insertions(+), 20 deletions(-)

diff --git a/include/jemalloc/internal/emitter.h b/include/jemalloc/internal/emitter.h
index bc12fe92..11153254 100644
--- a/include/jemalloc/internal/emitter.h
+++ b/include/jemalloc/internal/emitter.h
@@ -118,6 +118,37 @@ emitter_gen_fmt(char *out_fmt, size_t out_size, const char *fmt_specifier,
 	return out_fmt;
 }
 
+static inline void
+emitter_emit_str(emitter_t *emitter, emitter_justify_t justify, int width,
+    char *fmt, size_t fmt_size, const char *str) {
+#define BUF_SIZE 256
+	char buf[BUF_SIZE];
+	size_t str_written = malloc_snprintf(buf, BUF_SIZE, "\"%s\"", str);
+	emitter_printf(emitter,
+	    emitter_gen_fmt(fmt, fmt_size, "%s", justify, width), buf);
+	if (str_written < BUF_SIZE) {
+		return;
+	}
+	/*
+	 * There is no support for long string justification at the moment as
+	 * we output them partially with multiple malloc_snprintf calls and
+	 * justufication will work correctly only withing one call.
+	 * Fortunately this is not a big concern as we don't use justufication
+	 * with long strings right now.
+	 *
+	 * We emitted leading quotation mark and trailing '\0', hence need to
+	 * exclude extra characters from str shift.
+	 */
+	str += BUF_SIZE - 2;
+	do {
+		str_written = malloc_snprintf(buf, BUF_SIZE, "%s\"", str);
+		str += str_written >= BUF_SIZE ? BUF_SIZE - 1 : str_written;
+		emitter_printf(emitter,
+		    emitter_gen_fmt(fmt, fmt_size, "%s", justify, width), buf);
+	} while (str_written >= BUF_SIZE);
+#undef BUF_SIZE
+}
+
 /*
  * Internal.  Emit the given value type in the relevant encoding (so that the
  * bool true gets mapped to json "true", but the string "true" gets mapped to
@@ -128,8 +159,6 @@ emitter_gen_fmt(char *out_fmt, size_t out_size, const char *fmt_specifier,
 static inline void
 emitter_print_value(emitter_t *emitter, emitter_justify_t justify, int width,
     emitter_type_t value_type, const void *value) {
-	size_t str_written;
-#define BUF_SIZE 256
 #define FMT_SIZE 10
 	/*
 	 * We dynamically generate a format string to emit, to let us use the
@@ -138,7 +167,6 @@ emitter_print_value(emitter_t *emitter, emitter_justify_t justify, int width,
 	 * cases.
 	 */
 	char fmt[FMT_SIZE];
-	char buf[BUF_SIZE];
 
 #define EMIT_SIMPLE(type, format)					\
 	emitter_printf(emitter,						\
@@ -167,15 +195,8 @@ emitter_print_value(emitter_t *emitter, emitter_justify_t justify, int width,
 		EMIT_SIMPLE(size_t, "%zu")
 		break;
 	case emitter_type_string:
-		str_written = malloc_snprintf(buf, BUF_SIZE, "\"%s\"",
+		emitter_emit_str(emitter, justify, width, fmt, FMT_SIZE,
 		    *(const char *const *)value);
-		/*
-		 * We control the strings we output; we shouldn't get anything
-		 * anywhere near the fmt size.
-		 */
-		assert(str_written < BUF_SIZE);
-		emitter_printf(emitter,
-		    emitter_gen_fmt(fmt, FMT_SIZE, "%s", justify, width), buf);
 		break;
 	case emitter_type_uint32:
 		EMIT_SIMPLE(uint32_t, "%" FMTu32)
@@ -189,7 +210,6 @@ emitter_print_value(emitter_t *emitter, emitter_justify_t justify, int width,
 	default:
 		unreachable();
 	}
-#undef BUF_SIZE
 #undef FMT_SIZE
 }
 
diff --git a/test/unit/emitter.c b/test/unit/emitter.c
index ef8f9ff5..af0da90d 100644
--- a/test/unit/emitter.c
+++ b/test/unit/emitter.c
@@ -222,6 +222,17 @@ emit_types(emitter_t *emitter) {
 	ssize_t zd = -456;
 	size_t zu = 456;
 	const char *str = "string";
+	const char *long_str =
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz";
 	uint32_t u32 = 789;
 	uint64_t u64 = 10000000000ULL;
 
@@ -232,8 +243,9 @@ emit_types(emitter_t *emitter) {
 	emitter_kv(emitter, "k4", "K4", emitter_type_ssize, &zd);
 	emitter_kv(emitter, "k5", "K5", emitter_type_size, &zu);
 	emitter_kv(emitter, "k6", "K6", emitter_type_string, &str);
-	emitter_kv(emitter, "k7", "K7", emitter_type_uint32, &u32);
-	emitter_kv(emitter, "k8", "K8", emitter_type_uint64, &u64);
+	emitter_kv(emitter, "k7", "K7", emitter_type_string, &long_str);
+	emitter_kv(emitter, "k8", "K8", emitter_type_uint32, &u32);
+	emitter_kv(emitter, "k9", "K9", emitter_type_uint64, &u64);
 	/*
 	 * We don't test the title type, since it's only used for tables.  It's
 	 * tested in the emitter_table_row tests.
@@ -249,8 +261,18 @@ static const char *types_json =
 "\t\"k4\": -456,\n"
 "\t\"k5\": 456,\n"
 "\t\"k6\": \"string\",\n"
-"\t\"k7\": 789,\n"
-"\t\"k8\": 10000000000\n"
+"\t\"k7\": \"abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz\",\n"
+"\t\"k8\": 789,\n"
+"\t\"k9\": 10000000000\n"
 "}\n";
 static const char *types_json_compact =
 "{"
@@ -260,8 +282,18 @@ static const char *types_json_compact =
 	"\"k4\":-456,"
 	"\"k5\":456,"
 	"\"k6\":\"string\","
-	"\"k7\":789,"
-	"\"k8\":10000000000"
+	"\"k7\":\"abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz\","
+	"\"k8\":789,"
+	"\"k9\":10000000000"
 "}";
 static const char *types_table =
 "K1: false\n"
@@ -270,8 +302,18 @@ static const char *types_table =
 "K4: -456\n"
 "K5: 456\n"
 "K6: \"string\"\n"
-"K7: 789\n"
-"K8: 10000000000\n";
+"K7: \"abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz\"\n"
+"K8: 789\n"
+"K9: 10000000000\n";
 
 static void
 emit_modal(emitter_t *emitter) {

From bc32ddff2da6e58df90b1762f17519a2c04b26b0 Mon Sep 17 00:00:00 2001
From: Nathan Slingerland <slinger@fb.com>
Date: Mon, 29 Jul 2024 11:28:41 -0700
Subject: [PATCH 191/395] Add usize to prof_sample_hook_t

---
 include/jemalloc/internal/prof_hook.h | 4 ++--
 src/prof.c                            | 2 +-
 test/unit/prof_hook.c                 | 9 +++++++--
 3 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/include/jemalloc/internal/prof_hook.h b/include/jemalloc/internal/prof_hook.h
index 3c5ff8bf..087dadc6 100644
--- a/include/jemalloc/internal/prof_hook.h
+++ b/include/jemalloc/internal/prof_hook.h
@@ -20,8 +20,8 @@ typedef void (*prof_backtrace_hook_t)(void **, unsigned *, unsigned);
  */
 typedef void (*prof_dump_hook_t)(const char *filename);
 
-/* ptr, size, backtrace vector, backtrace vector length */
-typedef void (*prof_sample_hook_t)(const void *, size_t, void **, unsigned);
+/* ptr, size, backtrace vector, backtrace vector length, usize */
+typedef void (*prof_sample_hook_t)(const void *ptr, size_t size, void **backtrace, unsigned backtrace_length, size_t usize);
 
 /* ptr, size */
 typedef void (*prof_sample_free_hook_t)(const void *, size_t);
diff --git a/src/prof.c b/src/prof.c
index 6ae7f768..8fdc6f71 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -166,7 +166,7 @@ prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
 	if (prof_sample_hook != NULL) {
 		prof_bt_t *bt = &tctx->gctx->bt;
 		pre_reentrancy(tsd, NULL);
-		prof_sample_hook(ptr, size, bt->vec, bt->len);
+		prof_sample_hook(ptr, size, bt->vec, bt->len, usize);
 		post_reentrancy(tsd);
 	}
 }
diff --git a/test/unit/prof_hook.c b/test/unit/prof_hook.c
index a48b237b..fd2871e5 100644
--- a/test/unit/prof_hook.c
+++ b/test/unit/prof_hook.c
@@ -16,6 +16,7 @@ bool mock_prof_sample_free_hook_called = false;
 
 void *sampled_ptr = NULL;
 size_t sampled_ptr_sz = 0;
+size_t sampled_ptr_usz = 0;
 void *free_sampled_ptr = NULL;
 size_t free_sampled_ptr_sz = 0;
 
@@ -60,10 +61,11 @@ mock_dump_hook(const char *filename) {
 }
 
 void
-mock_prof_sample_hook(const void *ptr, size_t sz, void **vec, unsigned len) {
+mock_prof_sample_hook(const void *ptr, size_t sz, void **vec, unsigned len, size_t usz) {
 	mock_prof_sample_hook_called = true;
 	sampled_ptr = (void *)ptr;
 	sampled_ptr_sz = sz;
+	sampled_ptr_usz = usz;
 	for (unsigned i = 0; i < len; i++) {
 		expect_ptr_not_null((void **)vec[i],
 		    "Backtrace should not contain NULL");
@@ -244,6 +246,7 @@ check_prof_sample_hooks(bool sample_hook_set, bool sample_free_hook_set) {
 	    "Should not have called prof_sample_free hook");
 	expect_ptr_null(sampled_ptr, "Unexpected sampled ptr");
 	expect_zu_eq(sampled_ptr_sz, 0, "Unexpected sampled ptr size");
+	expect_zu_eq(sampled_ptr_usz, 0, "Unexpected sampled ptr usize");
 	expect_ptr_null(free_sampled_ptr, "Unexpected free sampled ptr");
 	expect_zu_eq(free_sampled_ptr_sz, 0,
 	    "Unexpected free sampled ptr size");
@@ -258,6 +261,7 @@ check_prof_sample_hooks(bool sample_hook_set, bool sample_free_hook_set) {
 	    "Unexpected non NULL default hook");
 
 	size_t alloc_sz = 10;
+	size_t alloc_usz = 16;
 	void *p = mallocx(alloc_sz, 0);
 	expect_ptr_not_null(p, "Failed to allocate");
 	expect_true(mock_prof_sample_hook_called == sample_hook_set,
@@ -266,6 +270,7 @@ check_prof_sample_hooks(bool sample_hook_set, bool sample_free_hook_set) {
 		expect_ptr_eq(p, sampled_ptr, "Unexpected sampled ptr");
 		expect_zu_eq(alloc_sz, sampled_ptr_sz,
 		    "Unexpected sampled usize");
+		expect_zu_eq(alloc_usz, sampled_ptr_usz, "Unexpected sampled usize");
 	}
 
 	dallocx(p, 0);
@@ -278,7 +283,7 @@ check_prof_sample_hooks(bool sample_hook_set, bool sample_free_hook_set) {
 	}
 
 	sampled_ptr = free_sampled_ptr = NULL;
-	sampled_ptr_sz = free_sampled_ptr_sz = 0;
+	sampled_ptr_sz = sampled_ptr_usz = free_sampled_ptr_sz = 0;
 	mock_prof_sample_hook_called = false;
 	mock_prof_sample_free_hook_called = false;
 }

From 8dc97b11089be6d58a52009ea3da610bf90331d3 Mon Sep 17 00:00:00 2001
From: Burton Li <pul@microsoft.com>
Date: Mon, 1 Jul 2024 15:49:58 -0700
Subject: [PATCH 192/395] Fix NSTIME_MONOTONIC for win32 implementation

---
 src/nstime.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/nstime.c b/src/nstime.c
index 7fb9100e..72f04227 100644
--- a/src/nstime.c
+++ b/src/nstime.c
@@ -172,7 +172,7 @@ nstime_ns_since(const nstime_t *past) {
 }
 
 #ifdef _WIN32
-#  define NSTIME_MONOTONIC true
+#  define NSTIME_MONOTONIC false
 static void
 nstime_get(nstime_t *time) {
 	FILETIME ft;

From 48f66cf4a22af3b380d4c049f79fb7e820eba3d3 Mon Sep 17 00:00:00 2001
From: Shirui Cheng <sherrycheng@meta.com>
Date: Thu, 18 Jul 2024 15:36:08 -0700
Subject: [PATCH 193/395] add a size check when declare a stack array to be
 less than 2048 bytes

---
 include/jemalloc/internal/jemalloc_internal_types.h | 8 ++++++--
 src/ctl.c                                           | 4 ++--
 src/stats.c                                         | 2 +-
 test/unit/hash.c                                    | 4 ++--
 4 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_types.h b/include/jemalloc/internal/jemalloc_internal_types.h
index 4ab5a0cf..6a81f3cd 100644
--- a/include/jemalloc/internal/jemalloc_internal_types.h
+++ b/include/jemalloc/internal/jemalloc_internal_types.h
@@ -135,10 +135,14 @@ typedef enum malloc_init_e malloc_init_t;
 #      include <stdlib.h>
 #    endif
 #  endif
-#  define VARIABLE_ARRAY(type, name, count) \
+#  define VARIABLE_ARRAY_UNSAFE(type, name, count) \
 	type *name = alloca(sizeof(type) * (count))
 #else
-#  define VARIABLE_ARRAY(type, name, count) type name[(count)]
+#  define VARIABLE_ARRAY_UNSAFE(type, name, count) type name[(count)]
 #endif
+#define VARIABLE_ARRAY_SIZE_MAX	2048
+#define VARIABLE_ARRAY(type, name, count)	\
+	assert(sizeof(type) * (count) <= VARIABLE_ARRAY_SIZE_MAX);	\
+	VARIABLE_ARRAY_UNSAFE(type, name, count)
 
 #endif /* JEMALLOC_INTERNAL_TYPES_H */
diff --git a/src/ctl.c b/src/ctl.c
index 62589d77..ebe5c61c 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1379,7 +1379,7 @@ ctl_refresh(tsdn_t *tsdn) {
 	const unsigned narenas = ctl_arenas->narenas;
 	assert(narenas > 0);
 	ctl_arena_t *ctl_sarena = arenas_i(MALLCTL_ARENAS_ALL);
-	VARIABLE_ARRAY(arena_t *, tarenas, narenas);
+	VARIABLE_ARRAY_UNSAFE(arena_t *, tarenas, narenas);
 
 	/*
 	 * Clear sum stats, since they will be merged into by
@@ -2726,7 +2726,7 @@ arena_i_decay(tsdn_t *tsdn, unsigned arena_ind, bool all) {
 		 */
 		if (arena_ind == MALLCTL_ARENAS_ALL || arena_ind == narenas) {
 			unsigned i;
-			VARIABLE_ARRAY(arena_t *, tarenas, narenas);
+			VARIABLE_ARRAY_UNSAFE(arena_t *, tarenas, narenas);
 
 			for (i = 0; i < narenas; i++) {
 				tarenas[i] = arena_get(tsdn, i, false);
diff --git a/src/stats.c b/src/stats.c
index 726007f5..fbfacabf 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1896,7 +1896,7 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 		size_t mib[3];
 		size_t miblen = sizeof(mib) / sizeof(size_t);
 		size_t sz;
-		VARIABLE_ARRAY(bool, initialized, narenas);
+		VARIABLE_ARRAY_UNSAFE(bool, initialized, narenas);
 		bool destroyed_initialized;
 		unsigned i, ninitialized;
 
diff --git a/test/unit/hash.c b/test/unit/hash.c
index 49f08238..17c66ec6 100644
--- a/test/unit/hash.c
+++ b/test/unit/hash.c
@@ -61,8 +61,8 @@ static void
 hash_variant_verify_key(hash_variant_t variant, uint8_t *key) {
 	const int hashbytes = hash_variant_bits(variant) / 8;
 	const int hashes_size = hashbytes * 256;
-	VARIABLE_ARRAY(uint8_t, hashes, hashes_size);
-	VARIABLE_ARRAY(uint8_t, final, hashbytes);
+	VARIABLE_ARRAY_UNSAFE(uint8_t, hashes, hashes_size);
+	VARIABLE_ARRAY_UNSAFE(uint8_t, final, hashbytes);
 	unsigned i;
 	uint32_t computed, expected;
 

From 47c9bcd402110be3f64517ad9366d1cfaa751d48 Mon Sep 17 00:00:00 2001
From: Shirui Cheng <sherrycheng@meta.com>
Date: Thu, 18 Jul 2024 17:33:07 -0700
Subject: [PATCH 194/395] Use a for-loop to fulfill flush requests that are
 larger than CACHE_BIN_NFLUSH_BATCH_MAX items

---
 include/jemalloc/internal/cache_bin.h |  8 +++++
 src/tcache.c                          | 47 ++++++++++++++++++---------
 2 files changed, 39 insertions(+), 16 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index a26c3671..a7a5e40e 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -600,6 +600,14 @@ cache_bin_nitems_get_remote(cache_bin_t *bin, cache_bin_sz_t *ncached,
 	 */
 }
 
+/*
+ * Limit how many items can be flushed in a batch (Which is the upper bound
+ * for the nflush parameter in tcache_bin_flush_impl()).
+ * This is to avoid stack overflow when we do batch edata look up, which
+ * reserves a nflush * sizeof(emap_batch_lookup_result_t) stack variable.
+ */
+#define CACHE_BIN_NFLUSH_BATCH_MAX (VARIABLE_ARRAY_SIZE_MAX >> LG_SIZEOF_PTR)
+
 /*
  * Filling and flushing are done in batch, on arrays of void *s.  For filling,
  * the arrays go forward, and can be accessed with ordinary array arithmetic.
diff --git a/src/tcache.c b/src/tcache.c
index 03ec5136..4144524d 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -712,22 +712,37 @@ tcache_bin_flush_impl_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin
 JEMALLOC_ALWAYS_INLINE void
 tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
     szind_t binind, cache_bin_ptr_array_t *ptrs, unsigned nflush, bool small) {
-	/*
-	 * The small/large flush logic is very similar; you might conclude that
-	 * it's a good opportunity to share code.  We've tried this, and by and
-	 * large found this to obscure more than it helps; there are so many
-	 * fiddly bits around things like stats handling, precisely when and
-	 * which mutexes are acquired, etc., that almost all code ends up being
-	 * gated behind 'if (small) { ... } else { ... }'.  Even though the
-	 * '...' is morally equivalent, the code itself needs slight tweaks.
-	 */
-	if (small) {
-		tcache_bin_flush_impl_small(tsd, tcache, cache_bin, binind,
-		    ptrs, nflush);
-	} else {
-		tcache_bin_flush_impl_large(tsd, tcache, cache_bin, binind,
-		    ptrs, nflush);
-	}
+	assert(ptrs != NULL && ptrs->ptr != NULL);
+	unsigned nflush_batch, nflushed = 0;
+	cache_bin_ptr_array_t ptrs_batch;
+	do {
+		nflush_batch = nflush - nflushed;
+		if (nflush_batch > CACHE_BIN_NFLUSH_BATCH_MAX) {
+			nflush_batch = CACHE_BIN_NFLUSH_BATCH_MAX;
+		}
+		assert(nflush_batch <= CACHE_BIN_NFLUSH_BATCH_MAX);
+		(&ptrs_batch)->n = (cache_bin_sz_t)nflush_batch;
+		(&ptrs_batch)->ptr = ptrs->ptr + nflushed;
+		/*
+		 * The small/large flush logic is very similar; you might conclude that
+		 * it's a good opportunity to share code.  We've tried this, and by and
+		 * large found this to obscure more than it helps; there are so many
+		 * fiddly bits around things like stats handling, precisely when and
+		 * which mutexes are acquired, etc., that almost all code ends up being
+		 * gated behind 'if (small) { ... } else { ... }'.  Even though the
+		 * '...' is morally equivalent, the code itself needs slight tweaks.
+		 */
+		if (small) {
+			tcache_bin_flush_impl_small(tsd, tcache, cache_bin, binind,
+			    &ptrs_batch, nflush_batch);
+		} else {
+			tcache_bin_flush_impl_large(tsd, tcache, cache_bin, binind,
+			    &ptrs_batch, nflush_batch);
+		}
+		nflushed += nflush_batch;
+	} while (nflushed < nflush);
+	assert(nflush == nflushed);
+	assert((ptrs->ptr + nflush) == ((&ptrs_batch)->ptr + nflush_batch));
 }
 
 JEMALLOC_ALWAYS_INLINE void

From 8fefabd3a49d1f090fe677722f1e2a66f162237a Mon Sep 17 00:00:00 2001
From: Shirui Cheng <sherrycheng@meta.com>
Date: Thu, 18 Jul 2024 17:49:21 -0700
Subject: [PATCH 195/395] increase the ncached_max in fill_flush test case to
 1024

---
 test/stress/fill_flush.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/stress/fill_flush.c b/test/stress/fill_flush.c
index a2db044d..546bcc0b 100644
--- a/test/stress/fill_flush.c
+++ b/test/stress/fill_flush.c
@@ -5,6 +5,7 @@
 #define LARGE_ALLOC_SIZE SC_LARGE_MINCLASS
 #define NALLOCS 1000
 
+const char *malloc_conf = "tcache_ncached_max:8-128:1024";
 /*
  * We make this volatile so the 1-at-a-time variants can't leave the allocation
  * in a register, just to try to get the cache behavior closer.

From a25b9b8ba91881964be3083db349991bbbbf1661 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Amaury=20S=C3=A9chet?= <deadalnix@gmail.com>
Date: Wed, 24 Jul 2024 21:16:11 +0000
Subject: [PATCH 196/395] Simplify the logic when bumping lg_fill_div.

---
 src/tcache.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tcache.c b/src/tcache.c
index 4144524d..b90907ad 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -166,7 +166,7 @@ tcache_gc_small(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	 * the fill count is always at least 1.
 	 */
 	if ((cache_bin_ncached_max_get(cache_bin) >>
-	    (tcache_slow->lg_fill_div[szind] + 1)) >= 1) {
+	     tcache_slow->lg_fill_div[szind]) > 1) {
 		tcache_slow->lg_fill_div[szind]++;
 	}
 }

From 0a9f51d0d8d2a8135cc853be7ed771230854ede6 Mon Sep 17 00:00:00 2001
From: Dmitry Ilvokhin <d@ilvokhin.com>
Date: Fri, 2 Aug 2024 09:35:44 -0700
Subject: [PATCH 197/395] Simplify `hpa_shard_maybe_do_deferred_work`

It doesn't make much sense to repeat purging once we done with
hugification, because we can de-hugify pages that were hugified just
moment ago for no good reason. Let them wait next deferred work phase
instead. And if they still meeting purging conditions then, purge them.
---
 src/hpa.c | 50 +++++++++++++++++++++++++-------------------------
 1 file changed, 25 insertions(+), 25 deletions(-)

diff --git a/src/hpa.c b/src/hpa.c
index 49d6b037..27fc1589 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -532,41 +532,41 @@ hpa_shard_maybe_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard,
 	if (!forced && shard->opts.deferral_allowed) {
 		return;
 	}
+
 	/*
 	 * If we're on a background thread, do work so long as there's work to
 	 * be done.  Otherwise, bound latency to not be *too* bad by doing at
 	 * most a small fixed number of operations.
 	 */
-	bool hugified = false;
-	bool purged = false;
 	size_t max_ops = (forced ? (size_t)-1 : 16);
 	size_t nops = 0;
-	do {
-		/*
-		 * Always purge before hugifying, to make sure we get some
-		 * ability to hit our quiescence targets.
-		 */
-		purged = false;
-		while (hpa_should_purge(tsdn, shard) && nops < max_ops) {
-			purged = hpa_try_purge(tsdn, shard);
-			if (!purged) {
-				/*
-				 * It is fine if we couldn't purge as sometimes
-				 * we try to purge just to unblock
-				 * hugification, but there is maybe no dirty
-				 * pages at all at the moment.
-				 */
-				break;
-			}
-			nops++;
-		}
-		hugified = hpa_try_hugify(tsdn, shard);
-		if (hugified) {
-			nops++;
+
+	/*
+	 * Always purge before hugifying, to make sure we get some
+	 * ability to hit our quiescence targets.
+	 */
+	while (hpa_should_purge(tsdn, shard) && nops < max_ops) {
+		if (!hpa_try_purge(tsdn, shard)) {
+			/*
+			 * It is fine if we couldn't purge as sometimes
+			 * we try to purge just to unblock
+			 * hugification, but there is maybe no dirty
+			 * pages at all at the moment.
+			 */
+			break;
 		}
 		malloc_mutex_assert_owner(tsdn, &shard->mtx);
+		nops++;
+	}
+
+	/*
+	 * Try to hugify at least once, even if we out of operations to make at
+	 * least some progress on hugification even at worst case.
+	 */
+	while (hpa_try_hugify(tsdn, shard) && nops < max_ops) {
 		malloc_mutex_assert_owner(tsdn, &shard->mtx);
-	} while ((hugified || purged) && nops < max_ops);
+		nops++;
+	}
 }
 
 static edata_t *

From 143f458188d2d5a02418e7f72e56152dab118786 Mon Sep 17 00:00:00 2001
From: Dmitry Ilvokhin <d@ilvokhin.com>
Date: Tue, 6 Aug 2024 08:37:04 -0700
Subject: [PATCH 198/395] Fix `hpa_strict_min_purge_interval` option logic

We update `shard->last_purge` on each call of `hpa_try_purge` if we
purged something. This means, when `hpa_strict_min_purge_interval`
option is set only one slab will be purged, because on the next
call condition for too frequent purge protection
`since_last_purge_ms < shard->opts.min_purge_interval_ms` will always
be true. This is not an intended behaviour.

Instead, we need to check `min_purge_interval_ms` once and purge as many
pages as needed to satisfy requirements for `hpa_dirty_mult` option.

Make possible to count number of actions performed in unit tests (purge,
hugify, dehugify) instead of binary: called/not called. Extended current
unit tests with cases where we need to purge more than one page for a
purge phase.
---
 src/hpa.c       |  54 +++++++------
 test/unit/hpa.c | 197 +++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 209 insertions(+), 42 deletions(-)

diff --git a/src/hpa.c b/src/hpa.c
index 27fc1589..d3b9c6c2 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -378,18 +378,6 @@ static bool
 hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
 
-	/*
-	 * Make sure we respect purge interval setting and don't purge
-	 * too frequently.
-	 */
-	if (shard->opts.strict_min_purge_interval) {
-		uint64_t since_last_purge_ms = shard->central->hooks.ms_since(
-		    &shard->last_purge);
-		if (since_last_purge_ms < shard->opts.min_purge_interval_ms) {
-		     return false;
-		}
-	}
-
 	hpdata_t *to_purge = psset_pick_purge(&shard->psset);
 	if (to_purge == NULL) {
 		return false;
@@ -521,6 +509,19 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
 	return true;
 }
 
+static bool
+hpa_min_purge_interval_passed(tsdn_t *tsdn, hpa_shard_t *shard) {
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+	if (shard->opts.strict_min_purge_interval) {
+		uint64_t since_last_purge_ms = shard->central->hooks.ms_since(
+		    &shard->last_purge);
+		if (since_last_purge_ms < shard->opts.min_purge_interval_ms) {
+		     return false;
+		}
+	}
+	return true;
+}
+
 /*
  * Execution of deferred work is forced if it's triggered by an explicit
  * hpa_shard_do_deferred_work() call.
@@ -545,18 +546,25 @@ hpa_shard_maybe_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard,
 	 * Always purge before hugifying, to make sure we get some
 	 * ability to hit our quiescence targets.
 	 */
-	while (hpa_should_purge(tsdn, shard) && nops < max_ops) {
-		if (!hpa_try_purge(tsdn, shard)) {
-			/*
-			 * It is fine if we couldn't purge as sometimes
-			 * we try to purge just to unblock
-			 * hugification, but there is maybe no dirty
-			 * pages at all at the moment.
-			 */
-			break;
+
+	/*
+	 * Make sure we respect purge interval setting and don't purge
+	 * too frequently.
+	 */
+	if (hpa_min_purge_interval_passed(tsdn, shard)) {
+		while (hpa_should_purge(tsdn, shard) && nops < max_ops) {
+			if (!hpa_try_purge(tsdn, shard)) {
+				/*
+				 * It is fine if we couldn't purge as sometimes
+				 * we try to purge just to unblock
+				 * hugification, but there is maybe no dirty
+				 * pages at all at the moment.
+				 */
+				break;
+			}
+			malloc_mutex_assert_owner(tsdn, &shard->mtx);
+			nops++;
 		}
-		malloc_mutex_assert_owner(tsdn, &shard->mtx);
-		nops++;
 	}
 
 	/*
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index a8a26e13..2c11e0a8 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -34,6 +34,8 @@ static hpa_shard_opts_t test_hpa_shard_opts_default = {
 	10 * 1000,
 	/* min_purge_interval_ms */
 	5 * 1000,
+	/* strict_min_purge_interval */
+	false
 };
 
 static hpa_shard_opts_t test_hpa_shard_opts_purge = {
@@ -49,6 +51,8 @@ static hpa_shard_opts_t test_hpa_shard_opts_purge = {
 	0,
 	/* min_purge_interval_ms */
 	5 * 1000,
+	/* strict_min_purge_interval */
+	false
 };
 
 static hpa_shard_t *
@@ -358,24 +362,24 @@ defer_test_unmap(void *ptr, size_t size) {
 	(void)size;
 }
 
-static bool defer_purge_called = false;
+static size_t ndefer_purge_calls = 0;
 static void
 defer_test_purge(void *ptr, size_t size) {
 	(void)ptr;
 	(void)size;
-	defer_purge_called = true;
+	++ndefer_purge_calls;
 }
 
-static bool defer_hugify_called = false;
+static size_t ndefer_hugify_calls = 0;
 static void
 defer_test_hugify(void *ptr, size_t size) {
-	defer_hugify_called = true;
+	++ndefer_hugify_calls;
 }
 
-static bool defer_dehugify_called = false;
+static size_t ndefer_dehugify_calls = 0;
 static void
 defer_test_dehugify(void *ptr, size_t size) {
-	defer_dehugify_called = true;
+	++ndefer_dehugify_calls;
 }
 
 static nstime_t defer_curtime;
@@ -417,14 +421,14 @@ TEST_BEGIN(test_defer_time) {
 		expect_ptr_not_null(edatas[i], "Unexpected null edata");
 	}
 	hpa_shard_do_deferred_work(tsdn, shard);
-	expect_false(defer_hugify_called, "Hugified too early");
+	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
 
 	/* Hugification delay is set to 10 seconds in options. */
 	nstime_init2(&defer_curtime, 11, 0);
 	hpa_shard_do_deferred_work(tsdn, shard);
-	expect_true(defer_hugify_called, "Failed to hugify");
+	expect_zu_eq(1, ndefer_hugify_calls, "Failed to hugify");
 
-	defer_hugify_called = false;
+	ndefer_hugify_calls = 0;
 
 	/* Purge.  Recall that dirty_mult is .25. */
 	for (int i = 0; i < (int)HUGEPAGE_PAGES / 2; i++) {
@@ -434,12 +438,12 @@ TEST_BEGIN(test_defer_time) {
 
 	hpa_shard_do_deferred_work(tsdn, shard);
 
-	expect_false(defer_hugify_called, "Hugified too early");
-	expect_true(defer_dehugify_called, "Should have dehugified");
-	expect_true(defer_purge_called, "Should have purged");
-	defer_hugify_called = false;
-	defer_dehugify_called = false;
-	defer_purge_called = false;
+	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(1, ndefer_dehugify_calls, "Should have dehugified");
+	expect_zu_eq(1, ndefer_purge_calls, "Should have purged");
+	ndefer_hugify_calls = 0;
+	ndefer_dehugify_calls = 0;
+	ndefer_purge_calls = 0;
 
 	/*
 	 * Refill the page.  We now meet the hugification threshold; we should
@@ -459,9 +463,10 @@ TEST_BEGIN(test_defer_time) {
 	/* Wait for the threshold again. */
 	nstime_init2(&defer_curtime, 22, 0);
 	hpa_shard_do_deferred_work(tsdn, shard);
-	expect_true(defer_hugify_called, "Hugified too early");
-	expect_false(defer_dehugify_called, "Unexpected dehugify");
-	expect_false(defer_purge_called, "Unexpected purge");
+	expect_zu_eq(1, ndefer_hugify_calls, "Failed to hugify");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Unexpected dehugify");
+	expect_zu_eq(0, ndefer_purge_calls, "Unexpected purge");
+	ndefer_hugify_calls = 0;
 
 	destroy_test_data(shard);
 }
@@ -497,6 +502,157 @@ TEST_BEGIN(test_purge_no_infinite_loop) {
 }
 TEST_END
 
+TEST_BEGIN(test_strict_no_min_purge_interval) {
+	test_skip_if(!hpa_supported());
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
+	opts.deferral_allowed = true;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+
+	bool deferred_work_generated = false;
+
+	nstime_init(&defer_curtime, 0);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+
+	edata_t *edata = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+	    false, false, &deferred_work_generated);
+	expect_ptr_not_null(edata, "Unexpected null edata");
+	pai_dalloc(tsdn, &shard->pai, edata, &deferred_work_generated);
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	/*
+	 * Strict minimum purge interval is not set, we should purge as long as
+	 * we have dirty pages.
+	 */
+	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
+	expect_zu_eq(1, ndefer_purge_calls, "Expect purge");
+	ndefer_purge_calls = 0;
+
+	destroy_test_data(shard);
+}
+TEST_END
+
+TEST_BEGIN(test_strict_min_purge_interval) {
+	test_skip_if(!hpa_supported());
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
+	opts.deferral_allowed = true;
+	opts.strict_min_purge_interval = true;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+
+	bool deferred_work_generated = false;
+
+	nstime_init(&defer_curtime, 0);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+
+	edata_t *edata = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+	    false, false, &deferred_work_generated);
+	expect_ptr_not_null(edata, "Unexpected null edata");
+	pai_dalloc(tsdn, &shard->pai, edata, &deferred_work_generated);
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	/*
+	 * We have a slab with dirty page and no active pages, but
+	 * opt.min_purge_interval_ms didn't pass yet.
+	 */
+	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
+	expect_zu_eq(0, ndefer_purge_calls, "Purged too early");
+
+	/* Minumum purge interval is set to 5 seconds in options. */
+	nstime_init2(&defer_curtime, 6, 0);
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	/* Now we should purge, but nothing else. */
+	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
+	expect_zu_eq(1, ndefer_purge_calls, "Expect purge");
+	ndefer_purge_calls = 0;
+
+	destroy_test_data(shard);
+}
+TEST_END
+
+TEST_BEGIN(test_purge) {
+	test_skip_if(!hpa_supported());
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
+	opts.deferral_allowed = true;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+
+	bool deferred_work_generated = false;
+
+	nstime_init(&defer_curtime, 0);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	enum {NALLOCS = 8 * HUGEPAGE_PAGES};
+	edata_t *edatas[NALLOCS];
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	/* Deallocate 3 hugepages out of 8. */
+	for (int i = 0; i < 3 * (int)HUGEPAGE_PAGES; i++) {
+		pai_dalloc(tsdn, &shard->pai, edatas[i],
+		    &deferred_work_generated);
+	}
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
+	/*
+	 * Expect only 2 purges, because opt.dirty_mult is set to 0.25 and we still
+	 * have 5 active hugepages (1 / 5 = 0.2 < 0.25).
+	 */
+	expect_zu_eq(2, ndefer_purge_calls, "Expect purges");
+	ndefer_purge_calls = 0;
+
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
+	/*
+	 * We still have completely dirty hugepage, but we are below
+	 * opt.dirty_mult.
+	 */
+	expect_zu_eq(0, ndefer_purge_calls, "Purged too early");
+	ndefer_purge_calls = 0;
+
+	destroy_test_data(shard);
+}
+TEST_END
+
 int
 main(void) {
 	/*
@@ -516,5 +672,8 @@ main(void) {
 	    test_stress,
 	    test_alloc_dalloc_batch,
 	    test_defer_time,
-	    test_purge_no_infinite_loop);
+	    test_purge_no_infinite_loop,
+	    test_strict_no_min_purge_interval,
+	    test_strict_min_purge_interval,
+	    test_purge);
 }

From aaa29003ab90b574c29dc4c0c331085c07f1c1fd Mon Sep 17 00:00:00 2001
From: Dmitry Ilvokhin <d@ilvokhin.com>
Date: Tue, 6 Aug 2024 08:47:57 -0700
Subject: [PATCH 199/395] Limit maximum number of purged slabs with option

Option `experimental_hpa_max_purge_nhp` introduced for backward
compatibility reasons: to make it possible to have behaviour similar
to buggy `hpa_strict_min_purge_interval` implementation.

When `experimental_hpa_max_purge_nhp` is set to -1, there is no limit
to number of slabs we'll purge on each iteration. Otherwise, we'll purge
no more than `experimental_hpa_max_purge_nhp` hugepages (slabs). This in
turn means we might not purge enough dirty pages to satisfy
`hpa_dirty_mult` requirement.

Combination of `hpa_dirty_mult`, `experimental_hpa_max_purge_nhp` and
`hpa_strict_min_purge_interval` options allows us to have steady rate of
pages returned back to the system. This provides a strickier latency
guarantees as number of `madvise` calls is bounded (and hence number of
TLB shootdowns is limited) in exchange to weaker memory usage
guarantees.
---
 include/jemalloc/internal/hpa_opts.h |  9 +++-
 src/ctl.c                            |  8 ++-
 src/hpa.c                            | 17 ++++++-
 src/jemalloc.c                       |  4 ++
 src/stats.c                          |  1 +
 test/unit/hpa.c                      | 75 ++++++++++++++++++++++++++--
 test/unit/mallctl.c                  |  1 +
 7 files changed, 109 insertions(+), 6 deletions(-)

diff --git a/include/jemalloc/internal/hpa_opts.h b/include/jemalloc/internal/hpa_opts.h
index 93add641..bf3de0e9 100644
--- a/include/jemalloc/internal/hpa_opts.h
+++ b/include/jemalloc/internal/hpa_opts.h
@@ -57,6 +57,11 @@ struct hpa_shard_opts_s {
 	 * purging logic fix.
 	 */
 	bool strict_min_purge_interval;
+
+	/*
+	 * Maximum number of hugepages to purge on each purging attempt.
+	 */
+	ssize_t experimental_max_purge_nhp;
 };
 
 #define HPA_SHARD_OPTS_DEFAULT {					\
@@ -79,7 +84,9 @@ struct hpa_shard_opts_s {
 	/* min_purge_interval_ms */					\
 	5 * 1000,							\
 	/* strict_min_purge_interval */					\
-	false								\
+	false,								\
+	/* experimental_max_purge_nhp */				\
+	-1								\
 }
 
 #endif /* JEMALLOC_INTERNAL_HPA_OPTS_H */
diff --git a/src/ctl.c b/src/ctl.c
index ebe5c61c..a01f643e 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -104,6 +104,7 @@ CTL_PROTO(opt_hpa_hugification_threshold)
 CTL_PROTO(opt_hpa_hugify_delay_ms)
 CTL_PROTO(opt_hpa_min_purge_interval_ms)
 CTL_PROTO(opt_hpa_strict_min_purge_interval)
+CTL_PROTO(opt_experimental_hpa_max_purge_nhp)
 CTL_PROTO(opt_hpa_dirty_mult)
 CTL_PROTO(opt_hpa_sec_nshards)
 CTL_PROTO(opt_hpa_sec_max_alloc)
@@ -460,7 +461,10 @@ static const ctl_named_node_t opt_node[] = {
 		CTL(opt_hpa_hugification_threshold)},
 	{NAME("hpa_hugify_delay_ms"), CTL(opt_hpa_hugify_delay_ms)},
 	{NAME("hpa_min_purge_interval_ms"), CTL(opt_hpa_min_purge_interval_ms)},
-	{NAME("hpa_strict_min_purge_interval"), CTL(opt_hpa_strict_min_purge_interval)},
+	{NAME("hpa_strict_min_purge_interval"),
+		CTL(opt_hpa_strict_min_purge_interval)},
+	{NAME("experimental_hpa_max_purge_nhp"),
+		CTL(opt_experimental_hpa_max_purge_nhp)},
 	{NAME("hpa_dirty_mult"), CTL(opt_hpa_dirty_mult)},
 	{NAME("hpa_sec_nshards"),	CTL(opt_hpa_sec_nshards)},
 	{NAME("hpa_sec_max_alloc"),	CTL(opt_hpa_sec_max_alloc)},
@@ -2197,6 +2201,8 @@ CTL_RO_NL_GEN(opt_hpa_min_purge_interval_ms, opt_hpa_opts.min_purge_interval_ms,
     uint64_t)
 CTL_RO_NL_GEN(opt_hpa_strict_min_purge_interval,
     opt_hpa_opts.strict_min_purge_interval, bool)
+CTL_RO_NL_GEN(opt_experimental_hpa_max_purge_nhp,
+    opt_hpa_opts.experimental_max_purge_nhp, ssize_t)
 
 /*
  * This will have to change before we publicly document this option; fxp_t and
diff --git a/src/hpa.c b/src/hpa.c
index d3b9c6c2..0410fefc 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -552,7 +552,22 @@ hpa_shard_maybe_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard,
 	 * too frequently.
 	 */
 	if (hpa_min_purge_interval_passed(tsdn, shard)) {
-		while (hpa_should_purge(tsdn, shard) && nops < max_ops) {
+		size_t max_purges = max_ops;
+		/*
+		 * Limit number of hugepages (slabs) to purge.
+		 * When experimental_max_purge_nhp option is used, there is no
+		 * guarantee we'll always respect dirty_mult option.  Option
+		 * experimental_max_purge_nhp provides a way to configure same
+		 * behaviour as was possible before, with buggy implementation
+		 * of purging algorithm.
+		 */
+		ssize_t max_purge_nhp = shard->opts.experimental_max_purge_nhp;
+		if (max_purge_nhp != -1 &&
+		    max_purges > (size_t)max_purge_nhp) {
+			max_purges = max_purge_nhp;
+		}
+
+		while (hpa_should_purge(tsdn, shard) && nops < max_purges) {
 			if (!hpa_try_purge(tsdn, shard)) {
 				/*
 				 * It is fine if we couldn't purge as sometimes
diff --git a/src/jemalloc.c b/src/jemalloc.c
index abd7540f..4859cff6 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1558,6 +1558,10 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    opt_hpa_opts.strict_min_purge_interval,
 			    "hpa_strict_min_purge_interval");
 
+			CONF_HANDLE_SSIZE_T(
+			    opt_hpa_opts.experimental_max_purge_nhp,
+			    "experimental_hpa_max_purge_nhp", -1, SSIZE_MAX);
+
 			if (CONF_MATCH("hpa_dirty_mult")) {
 				if (CONF_MATCH_VALUE("-1")) {
 					opt_hpa_opts.dirty_mult = (fxp_t)-1;
diff --git a/src/stats.c b/src/stats.c
index fbfacabf..a5c3f0fe 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1565,6 +1565,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_UINT64("hpa_hugify_delay_ms")
 	OPT_WRITE_UINT64("hpa_min_purge_interval_ms")
 	OPT_WRITE_BOOL("hpa_strict_min_purge_interval")
+	OPT_WRITE_SSIZE_T("experimental_hpa_max_purge_nhp")
 	if (je_mallctl("opt.hpa_dirty_mult", (void *)&u32v, &u32sz, NULL, 0)
 	    == 0) {
 		/*
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index 2c11e0a8..4f15876b 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -35,7 +35,9 @@ static hpa_shard_opts_t test_hpa_shard_opts_default = {
 	/* min_purge_interval_ms */
 	5 * 1000,
 	/* strict_min_purge_interval */
-	false
+	false,
+	/* experimental_max_purge_nhp */
+	-1
 };
 
 static hpa_shard_opts_t test_hpa_shard_opts_purge = {
@@ -52,7 +54,9 @@ static hpa_shard_opts_t test_hpa_shard_opts_purge = {
 	/* min_purge_interval_ms */
 	5 * 1000,
 	/* strict_min_purge_interval */
-	false
+	false,
+	/* experimental_max_purge_nhp */
+	-1
 };
 
 static hpa_shard_t *
@@ -653,6 +657,70 @@ TEST_BEGIN(test_purge) {
 }
 TEST_END
 
+TEST_BEGIN(test_experimental_max_purge_nhp) {
+	test_skip_if(!hpa_supported());
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
+	opts.deferral_allowed = true;
+	opts.experimental_max_purge_nhp = 1;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+
+	bool deferred_work_generated = false;
+
+	nstime_init(&defer_curtime, 0);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	enum {NALLOCS = 8 * HUGEPAGE_PAGES};
+	edata_t *edatas[NALLOCS];
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	/* Deallocate 3 hugepages out of 8. */
+	for (int i = 0; i < 3 * (int)HUGEPAGE_PAGES; i++) {
+		pai_dalloc(tsdn, &shard->pai, edatas[i],
+		    &deferred_work_generated);
+	}
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
+	/*
+	 * Expect only one purge call, because opts.experimental_max_purge_nhp
+	 * is set to 1.
+	 */
+	expect_zu_eq(1, ndefer_purge_calls, "Expect purges");
+	ndefer_purge_calls = 0;
+
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
+	/* We still above the limit for dirty pages. */
+	expect_zu_eq(1, ndefer_purge_calls, "Expect purge");
+	ndefer_purge_calls = 0;
+
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
+	/* Finally, we are below the limit, no purges are expected. */
+	expect_zu_eq(0, ndefer_purge_calls, "Purged too early");
+
+	destroy_test_data(shard);
+}
+TEST_END
+
 int
 main(void) {
 	/*
@@ -675,5 +743,6 @@ main(void) {
 	    test_purge_no_infinite_loop,
 	    test_strict_no_min_purge_interval,
 	    test_strict_min_purge_interval,
-	    test_purge);
+	    test_purge,
+	    test_experimental_max_purge_nhp);
 }
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 84cd3995..ffe5c411 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -292,6 +292,7 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(size_t, hpa_sec_max_bytes, always);
 	TEST_MALLCTL_OPT(size_t, hpa_sec_bytes_after_flush, always);
 	TEST_MALLCTL_OPT(size_t, hpa_sec_batch_fill_extra, always);
+	TEST_MALLCTL_OPT(ssize_t, experimental_hpa_max_purge_nhp, always);
 	TEST_MALLCTL_OPT(unsigned, narenas, always);
 	TEST_MALLCTL_OPT(const char *, percpu_arena, always);
 	TEST_MALLCTL_OPT(size_t, oversize_threshold, always);

From c7ccb8d7e99a1c3f1ba3cc3e465bc6dd1b0fbe0b Mon Sep 17 00:00:00 2001
From: Dmitry Ilvokhin <d@ilvokhin.com>
Date: Mon, 19 Aug 2024 10:54:19 -0700
Subject: [PATCH 200/395] Add `experimental` prefix to
 `hpa_strict_min_purge_interval`

Goal is to make it obvious this option is experimental.
---
 include/jemalloc/internal/hpa_opts.h |  4 ++--
 src/ctl.c                            | 10 +++++-----
 src/hpa.c                            |  2 +-
 src/jemalloc.c                       |  4 ++--
 src/stats.c                          |  2 +-
 test/unit/hpa.c                      | 14 +++++++-------
 test/unit/hpa_background_thread.sh   |  2 +-
 test/unit/mallctl.c                  |  2 ++
 8 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/include/jemalloc/internal/hpa_opts.h b/include/jemalloc/internal/hpa_opts.h
index bf3de0e9..15765689 100644
--- a/include/jemalloc/internal/hpa_opts.h
+++ b/include/jemalloc/internal/hpa_opts.h
@@ -56,7 +56,7 @@ struct hpa_shard_opts_s {
 	 * This is an option to provide backward compatibility for staged rollout of
 	 * purging logic fix.
 	 */
-	bool strict_min_purge_interval;
+	bool experimental_strict_min_purge_interval;
 
 	/*
 	 * Maximum number of hugepages to purge on each purging attempt.
@@ -83,7 +83,7 @@ struct hpa_shard_opts_s {
 	10 * 1000,							\
 	/* min_purge_interval_ms */					\
 	5 * 1000,							\
-	/* strict_min_purge_interval */					\
+	/* experimental_strict_min_purge_interval */			\
 	false,								\
 	/* experimental_max_purge_nhp */				\
 	-1								\
diff --git a/src/ctl.c b/src/ctl.c
index a01f643e..0f69e1ec 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -103,7 +103,7 @@ CTL_PROTO(opt_hpa_slab_max_alloc)
 CTL_PROTO(opt_hpa_hugification_threshold)
 CTL_PROTO(opt_hpa_hugify_delay_ms)
 CTL_PROTO(opt_hpa_min_purge_interval_ms)
-CTL_PROTO(opt_hpa_strict_min_purge_interval)
+CTL_PROTO(opt_experimental_hpa_strict_min_purge_interval)
 CTL_PROTO(opt_experimental_hpa_max_purge_nhp)
 CTL_PROTO(opt_hpa_dirty_mult)
 CTL_PROTO(opt_hpa_sec_nshards)
@@ -461,8 +461,8 @@ static const ctl_named_node_t opt_node[] = {
 		CTL(opt_hpa_hugification_threshold)},
 	{NAME("hpa_hugify_delay_ms"), CTL(opt_hpa_hugify_delay_ms)},
 	{NAME("hpa_min_purge_interval_ms"), CTL(opt_hpa_min_purge_interval_ms)},
-	{NAME("hpa_strict_min_purge_interval"),
-		CTL(opt_hpa_strict_min_purge_interval)},
+	{NAME("experimental_hpa_strict_min_purge_interval"),
+		CTL(opt_experimental_hpa_strict_min_purge_interval)},
 	{NAME("experimental_hpa_max_purge_nhp"),
 		CTL(opt_experimental_hpa_max_purge_nhp)},
 	{NAME("hpa_dirty_mult"), CTL(opt_hpa_dirty_mult)},
@@ -2199,8 +2199,8 @@ CTL_RO_NL_GEN(opt_hpa_hugification_threshold,
 CTL_RO_NL_GEN(opt_hpa_hugify_delay_ms, opt_hpa_opts.hugify_delay_ms, uint64_t)
 CTL_RO_NL_GEN(opt_hpa_min_purge_interval_ms, opt_hpa_opts.min_purge_interval_ms,
     uint64_t)
-CTL_RO_NL_GEN(opt_hpa_strict_min_purge_interval,
-    opt_hpa_opts.strict_min_purge_interval, bool)
+CTL_RO_NL_GEN(opt_experimental_hpa_strict_min_purge_interval,
+    opt_hpa_opts.experimental_strict_min_purge_interval, bool)
 CTL_RO_NL_GEN(opt_experimental_hpa_max_purge_nhp,
     opt_hpa_opts.experimental_max_purge_nhp, ssize_t)
 
diff --git a/src/hpa.c b/src/hpa.c
index 0410fefc..3d7a6f60 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -512,7 +512,7 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
 static bool
 hpa_min_purge_interval_passed(tsdn_t *tsdn, hpa_shard_t *shard) {
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
-	if (shard->opts.strict_min_purge_interval) {
+	if (shard->opts.experimental_strict_min_purge_interval) {
 		uint64_t since_last_purge_ms = shard->central->hooks.ms_since(
 		    &shard->last_purge);
 		if (since_last_purge_ms < shard->opts.min_purge_interval_ms) {
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 4859cff6..512b3fcc 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1555,8 +1555,8 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, false);
 
 			CONF_HANDLE_BOOL(
-			    opt_hpa_opts.strict_min_purge_interval,
-			    "hpa_strict_min_purge_interval");
+			    opt_hpa_opts.experimental_strict_min_purge_interval,
+			    "experimental_hpa_strict_min_purge_interval");
 
 			CONF_HANDLE_SSIZE_T(
 			    opt_hpa_opts.experimental_max_purge_nhp,
diff --git a/src/stats.c b/src/stats.c
index a5c3f0fe..bcd08ce5 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1564,7 +1564,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_SIZE_T("hpa_hugification_threshold")
 	OPT_WRITE_UINT64("hpa_hugify_delay_ms")
 	OPT_WRITE_UINT64("hpa_min_purge_interval_ms")
-	OPT_WRITE_BOOL("hpa_strict_min_purge_interval")
+	OPT_WRITE_BOOL("experimental_hpa_strict_min_purge_interval")
 	OPT_WRITE_SSIZE_T("experimental_hpa_max_purge_nhp")
 	if (je_mallctl("opt.hpa_dirty_mult", (void *)&u32v, &u32sz, NULL, 0)
 	    == 0) {
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index 4f15876b..ae8a976c 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -34,7 +34,7 @@ static hpa_shard_opts_t test_hpa_shard_opts_default = {
 	10 * 1000,
 	/* min_purge_interval_ms */
 	5 * 1000,
-	/* strict_min_purge_interval */
+	/* experimental_strict_min_purge_interval */
 	false,
 	/* experimental_max_purge_nhp */
 	-1
@@ -53,7 +53,7 @@ static hpa_shard_opts_t test_hpa_shard_opts_purge = {
 	0,
 	/* min_purge_interval_ms */
 	5 * 1000,
-	/* strict_min_purge_interval */
+	/* experimental_strict_min_purge_interval */
 	false,
 	/* experimental_max_purge_nhp */
 	-1
@@ -506,7 +506,7 @@ TEST_BEGIN(test_purge_no_infinite_loop) {
 }
 TEST_END
 
-TEST_BEGIN(test_strict_no_min_purge_interval) {
+TEST_BEGIN(test_no_experimental_strict_min_purge_interval) {
 	test_skip_if(!hpa_supported());
 
 	hpa_hooks_t hooks;
@@ -547,7 +547,7 @@ TEST_BEGIN(test_strict_no_min_purge_interval) {
 }
 TEST_END
 
-TEST_BEGIN(test_strict_min_purge_interval) {
+TEST_BEGIN(test_experimental_strict_min_purge_interval) {
 	test_skip_if(!hpa_supported());
 
 	hpa_hooks_t hooks;
@@ -561,7 +561,7 @@ TEST_BEGIN(test_strict_min_purge_interval) {
 
 	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
 	opts.deferral_allowed = true;
-	opts.strict_min_purge_interval = true;
+	opts.experimental_strict_min_purge_interval = true;
 
 	hpa_shard_t *shard = create_test_data(&hooks, &opts);
 
@@ -741,8 +741,8 @@ main(void) {
 	    test_alloc_dalloc_batch,
 	    test_defer_time,
 	    test_purge_no_infinite_loop,
-	    test_strict_no_min_purge_interval,
-	    test_strict_min_purge_interval,
+	    test_no_experimental_strict_min_purge_interval,
+	    test_experimental_strict_min_purge_interval,
 	    test_purge,
 	    test_experimental_max_purge_nhp);
 }
diff --git a/test/unit/hpa_background_thread.sh b/test/unit/hpa_background_thread.sh
index 666da8fc..33b70e19 100644
--- a/test/unit/hpa_background_thread.sh
+++ b/test/unit/hpa_background_thread.sh
@@ -1,4 +1,4 @@
 #!/bin/sh
 
-export MALLOC_CONF="hpa_dirty_mult:0.001,hpa_hugification_threshold_ratio:1.0,hpa_min_purge_interval_ms:50,hpa_strict_min_purge_interval:true,hpa_sec_nshards:0"
+export MALLOC_CONF="hpa_dirty_mult:0.001,hpa_hugification_threshold_ratio:1.0,hpa_min_purge_interval_ms:50,hpa_sec_nshards:0"
 
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index ffe5c411..028a27f7 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -292,6 +292,8 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(size_t, hpa_sec_max_bytes, always);
 	TEST_MALLCTL_OPT(size_t, hpa_sec_bytes_after_flush, always);
 	TEST_MALLCTL_OPT(size_t, hpa_sec_batch_fill_extra, always);
+	TEST_MALLCTL_OPT(bool, experimental_hpa_strict_min_purge_interval,
+	    always);
 	TEST_MALLCTL_OPT(ssize_t, experimental_hpa_max_purge_nhp, always);
 	TEST_MALLCTL_OPT(unsigned, narenas, always);
 	TEST_MALLCTL_OPT(const char *, percpu_arena, always);

From 8c54637f8c7a98bbaec6ee38229a904bbf22170c Mon Sep 17 00:00:00 2001
From: Shirui Cheng <sherrycheng@meta.com>
Date: Thu, 22 Aug 2024 21:58:58 -0700
Subject: [PATCH 201/395] Better trigger race condition in bin_batching unit
 test

---
 test/unit/bin_batching.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/test/unit/bin_batching.c b/test/unit/bin_batching.c
index 525f59e0..19975341 100644
--- a/test/unit/bin_batching.c
+++ b/test/unit/bin_batching.c
@@ -45,9 +45,16 @@ increment_push_failure(size_t push_idx) {
 		atomic_fetch_add_zu(&push_failure_count, 1, ATOMIC_RELAXED);
 	} else {
 		assert_zu_lt(push_idx, 4, "Only 4 elems");
-		volatile int x = 10000;
+		volatile size_t x = 10000;
 		while (--x) {
 			/* Spin for a while, to try to provoke a failure. */
+			if (x == push_idx) {
+#ifdef _WIN32
+				SwitchToThread();
+#else
+				sched_yield();
+#endif
+			}
 		}
 	}
 }

From 5b72ac098abce464add567869d082f2097bd59a2 Mon Sep 17 00:00:00 2001
From: Guangli Dai <gdai@meta.com>
Date: Fri, 23 Aug 2024 14:25:47 -0700
Subject: [PATCH 202/395] Remove tests for ppc64 on Travic CI.

---
 .travis.yml           | 33 ---------------------------------
 scripts/gen_travis.py |  3 ++-
 2 files changed, 2 insertions(+), 34 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index ec1481c4..20605fc0 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -271,39 +271,6 @@ jobs:
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: linux
-      arch: ppc64le
-      env: CC=gcc CXX=g++ EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: linux
-      arch: ppc64le
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: linux
-      arch: ppc64le
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: linux
-      arch: ppc64le
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: linux
-      arch: ppc64le
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: linux
-      arch: ppc64le
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: linux
-      arch: ppc64le
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: linux
-      arch: ppc64le
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: linux
-      arch: ppc64le
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: linux
-      arch: ppc64le
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: linux
-      arch: ppc64le
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: osx
       arch: amd64
       env: CC=gcc CXX=g++ EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
index 651006ca..d2fd25e3 100755
--- a/scripts/gen_travis.py
+++ b/scripts/gen_travis.py
@@ -316,7 +316,8 @@ def main():
         # generate_freebsd(AMD64),
 
         generate_linux(AMD64),
-        generate_linux(PPC64LE),
+        # PPC tests on travis has been down for a while, disable it for now.
+        # generate_linux(PPC64LE),
 
         generate_macos(AMD64),
 

From bd0a5b0f3b6ce17a5f888e8e08ee5de774b29579 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 28 Aug 2024 11:33:07 -0700
Subject: [PATCH 203/395] Fix static analysis warnings.

Newly reported warnings included several reserved macro identifier, and
false-positive used-uninitialized.
---
 scripts/run_static_analysis.sh | 3 ++-
 src/arena.c                    | 2 +-
 src/ctl.c                      | 2 +-
 src/tcache.c                   | 2 +-
 4 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/scripts/run_static_analysis.sh b/scripts/run_static_analysis.sh
index e2185ec9..1662f7eb 100755
--- a/scripts/run_static_analysis.sh
+++ b/scripts/run_static_analysis.sh
@@ -53,7 +53,8 @@ CC_ANALYZERS_FROM_PATH=1 CodeChecker analyze compile_commands.json --jobs "$(npr
 	--ctu --compile-uniqueing strict --output static_analysis_raw_results \
 	--analyzers clangsa clang-tidy --skip "$skipfile" \
 	--enable readability-inconsistent-declaration-parameter-name \
-	--enable performance-no-int-to-ptr
+	--enable performance-no-int-to-ptr \
+	--disable clang-diagnostic-reserved-macro-identifier
 	# `--enable` is additive, the vast majority of the checks we want are
 	# enabled by default.
 
diff --git a/src/arena.c b/src/arena.c
index 21010279..bee106f0 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -473,7 +473,7 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 	    arena_decide_unforced_purge_eagerness(is_background_thread);
 	bool epoch_advanced = pac_maybe_decay_purge(tsdn, &arena->pa_shard.pac,
 	    decay, decay_stats, ecache, eagerness);
-	size_t npages_new;
+	size_t npages_new JEMALLOC_CLANG_ANALYZER_SILENCE_INIT(0);
 	if (epoch_advanced) {
 		/* Backlog is updated on epoch advance. */
 		npages_new = decay_epoch_npages_delta(decay);
diff --git a/src/ctl.c b/src/ctl.c
index 0f69e1ec..ec569d7c 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -3165,7 +3165,7 @@ arena_i_name_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 	unsigned arena_ind;
-	char *name;
+	char *name JEMALLOC_CLANG_ANALYZER_SILENCE_INIT(NULL);
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	MIB_UNSIGNED(arena_ind, 1);
diff --git a/src/tcache.c b/src/tcache.c
index b90907ad..003469a3 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -1208,7 +1208,7 @@ thread_tcache_max_set(tsd_t *tsd, size_t tcache_max) {
 	assert(tcache != NULL && tcache_slow != NULL);
 
 	bool enabled = tcache_available(tsd);
-	arena_t *assigned_arena;
+	arena_t *assigned_arena JEMALLOC_CLANG_ANALYZER_SILENCE_INIT(NULL);
 	if (enabled) {
 		assigned_arena = tcache_slow->arena;
 		/* Carry over the bin settings during the reboot. */

From e29ac619870b99a2cdde991c9d6d4c08d11a8ec2 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 28 Aug 2024 16:19:16 -0700
Subject: [PATCH 204/395] Limit Cirrus CI to freebsd 15 and 14

---
 .cirrus.yml | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/.cirrus.yml b/.cirrus.yml
index 63a96d2a..13714014 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -34,12 +34,6 @@ task:
      - name: 14-STABLE
        freebsd_instance:
          image_family: freebsd-14-0-snap
-     - name: 14.0-RELEASE
-       freebsd_instance:
-         image_family: freebsd-14-0
-     - name: 13-STABLE
-       freebsd_instance:
-         image_family: freebsd-13-2-snap
   install_script:
     - sed -i.bak -e 's,pkg+http://pkg.FreeBSD.org/\${ABI}/quarterly,pkg+http://pkg.FreeBSD.org/\${ABI}/latest,' /etc/pkg/FreeBSD.conf
     - pkg upgrade -y

From 9e123a833cc6f56381c46a1656a323f893fa2528 Mon Sep 17 00:00:00 2001
From: Ben Niu <beniu@microsoft.com>
Date: Thu, 21 Dec 2023 20:33:41 -0800
Subject: [PATCH 205/395] Leverage new Windows API TlsGetValue2 for performance

---
 include/jemalloc/internal/quantum.h |  2 +-
 include/jemalloc/internal/tsd_win.h | 30 ++++++++++++++++++++++++++---
 src/tsd.c                           |  4 ++++
 3 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/quantum.h b/include/jemalloc/internal/quantum.h
index a97f54ca..b4beb309 100644
--- a/include/jemalloc/internal/quantum.h
+++ b/include/jemalloc/internal/quantum.h
@@ -24,7 +24,7 @@
 #  ifdef __arm__
 #    define LG_QUANTUM		3
 #  endif
-#  ifdef __aarch64__
+#  if defined(__aarch64__) || defined(_M_ARM64)
 #    define LG_QUANTUM		4
 #  endif
 #  ifdef __hppa__
diff --git a/include/jemalloc/internal/tsd_win.h b/include/jemalloc/internal/tsd_win.h
index 8ec7eda7..4b40a8ab 100644
--- a/include/jemalloc/internal/tsd_win.h
+++ b/include/jemalloc/internal/tsd_win.h
@@ -15,6 +15,16 @@ typedef struct {
 extern DWORD tsd_tsd;
 extern tsd_wrapper_t tsd_boot_wrapper;
 extern bool tsd_booted;
+#if defined(_M_ARM64EC)
+#define JEMALLOC_WIN32_TLSGETVALUE2 0
+#else
+#define JEMALLOC_WIN32_TLSGETVALUE2 1
+#endif
+#if JEMALLOC_WIN32_TLSGETVALUE2
+typedef LPVOID (WINAPI *TGV2)(DWORD dwTlsIndex);
+extern TGV2 tls_get_value2;
+extern HMODULE tgv2_mod;
+#endif
 
 /* Initialization/cleanup. */
 JEMALLOC_ALWAYS_INLINE bool
@@ -49,9 +59,17 @@ tsd_wrapper_set(tsd_wrapper_t *wrapper) {
 
 JEMALLOC_ALWAYS_INLINE tsd_wrapper_t *
 tsd_wrapper_get(bool init) {
-	DWORD error = GetLastError();
-	tsd_wrapper_t *wrapper = (tsd_wrapper_t *) TlsGetValue(tsd_tsd);
-	SetLastError(error);
+	tsd_wrapper_t *wrapper;
+#if JEMALLOC_WIN32_TLSGETVALUE2
+	if (tls_get_value2 != NULL) {
+		wrapper = (tsd_wrapper_t *) tls_get_value2(tsd_tsd);
+	} else
+#endif
+	{
+		DWORD error = GetLastError();
+		wrapper = (tsd_wrapper_t *) TlsGetValue(tsd_tsd);
+		SetLastError(error);
+	}
 
 	if (init && unlikely(wrapper == NULL)) {
 		wrapper = (tsd_wrapper_t *)
@@ -78,6 +96,12 @@ tsd_boot0(void) {
 	}
 	_malloc_tsd_cleanup_register(&tsd_cleanup_wrapper);
 	tsd_wrapper_set(&tsd_boot_wrapper);
+#if JEMALLOC_WIN32_TLSGETVALUE2
+	tgv2_mod = LoadLibraryA("api-ms-win-core-processthreads-l1-1-8.dll");
+	if (tgv2_mod != NULL) {
+		tls_get_value2 = (TGV2)GetProcAddress(tgv2_mod, "TlsGetValue2");
+	}
+#endif
 	tsd_booted = true;
 	return false;
 }
diff --git a/src/tsd.c b/src/tsd.c
index a4db8e36..c9ae2d64 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -25,6 +25,10 @@ bool tsd_booted = false;
 DWORD tsd_tsd;
 tsd_wrapper_t tsd_boot_wrapper = {false, TSD_INITIALIZER};
 bool tsd_booted = false;
+#if JEMALLOC_WIN32_TLSGETVALUE2
+TGV2 tls_get_value2 = NULL;
+HMODULE tgv2_mod = NULL;
+#endif
 #else
 
 /*

From f68effe4ac0d1ee5cf26fc9c7fc50c88d16bf6ba Mon Sep 17 00:00:00 2001
From: Shirui Cheng <sherrycheng@meta.com>
Date: Mon, 12 Aug 2024 16:15:46 -0700
Subject: [PATCH 206/395] Add a runtime option opt_experimental_tcache_gc to
 guard the new design

---
 include/jemalloc/internal/jemalloc_internal_externs.h | 1 +
 src/ctl.c                                             | 4 ++++
 src/jemalloc.c                                        | 3 +++
 src/stats.c                                           | 1 +
 4 files changed, 9 insertions(+)

diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index 41c0f366..2c6b58f7 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -29,6 +29,7 @@ extern void (*JET_MUTABLE invalid_conf_abort)(void);
 extern bool opt_utrace;
 extern bool opt_xmalloc;
 extern bool opt_experimental_infallible_new;
+extern bool opt_experimental_tcache_gc;
 extern bool opt_zero;
 extern unsigned opt_narenas;
 extern zero_realloc_action_t opt_zero_realloc_action;
diff --git a/src/ctl.c b/src/ctl.c
index ec569d7c..8608f124 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -131,6 +131,7 @@ CTL_PROTO(opt_zero)
 CTL_PROTO(opt_utrace)
 CTL_PROTO(opt_xmalloc)
 CTL_PROTO(opt_experimental_infallible_new)
+CTL_PROTO(opt_experimental_tcache_gc)
 CTL_PROTO(opt_max_batched_size)
 CTL_PROTO(opt_remote_free_max)
 CTL_PROTO(opt_remote_free_max_batch)
@@ -494,6 +495,8 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("xmalloc"),	CTL(opt_xmalloc)},
 	{NAME("experimental_infallible_new"),
 		CTL(opt_experimental_infallible_new)},
+	{NAME("experimental_tcache_gc"),
+		CTL(opt_experimental_tcache_gc)},
 	{NAME("max_batched_size"),	CTL(opt_max_batched_size)},
 	{NAME("remote_free_max"),	CTL(opt_remote_free_max)},
 	{NAME("remote_free_max_batch"),	CTL(opt_remote_free_max_batch)},
@@ -2243,6 +2246,7 @@ CTL_RO_NL_CGEN(config_utrace, opt_utrace, opt_utrace, bool)
 CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool)
 CTL_RO_NL_CGEN(config_enable_cxx, opt_experimental_infallible_new,
     opt_experimental_infallible_new, bool)
+CTL_RO_NL_GEN(opt_experimental_tcache_gc, opt_experimental_tcache_gc, bool)
 CTL_RO_NL_GEN(opt_max_batched_size, opt_bin_info_max_batched_size, size_t)
 CTL_RO_NL_GEN(opt_remote_free_max, opt_bin_info_remote_free_max,
     size_t)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 512b3fcc..dc471563 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -151,6 +151,7 @@ void (*JET_MUTABLE invalid_conf_abort)(void) = &abort;
 bool	opt_utrace = false;
 bool	opt_xmalloc = false;
 bool	opt_experimental_infallible_new = false;
+bool	opt_experimental_tcache_gc = false;
 bool	opt_zero = false;
 unsigned	opt_narenas = 0;
 static fxp_t		opt_narenas_ratio = FXP_INIT_INT(4);
@@ -1414,6 +1415,8 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				    "experimental_infallible_new")
 			}
 
+			CONF_HANDLE_BOOL(opt_experimental_tcache_gc,
+			    "experimental_tcache_gc")
 			CONF_HANDLE_BOOL(opt_tcache, "tcache")
 			CONF_HANDLE_SIZE_T(opt_tcache_max, "tcache_max",
 			    0, TCACHE_MAXCLASS_LIMIT, CONF_DONT_CHECK_MIN,
diff --git a/src/stats.c b/src/stats.c
index bcd08ce5..f45b7447 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1600,6 +1600,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_BOOL("utrace")
 	OPT_WRITE_BOOL("xmalloc")
 	OPT_WRITE_BOOL("experimental_infallible_new")
+	OPT_WRITE_BOOL("experimental_tcache_gc")
 	OPT_WRITE_SIZE_T("max_batched_size")
 	OPT_WRITE_SIZE_T("remote_free_max")
 	OPT_WRITE_SIZE_T("remote_free_max_batch")

From 14d5dc136a40ddf2464f2178f950b562f38f0d25 Mon Sep 17 00:00:00 2001
From: Shirui Cheng <sherrycheng@meta.com>
Date: Thu, 1 Aug 2024 10:24:09 -0700
Subject: [PATCH 207/395] Allow a range for the nfill passed to
 arena_cache_bin_fill_small

---
 include/jemalloc/internal/arena_externs.h |  3 ++-
 src/arena.c                               | 31 +++++++++++++++--------
 src/tcache.c                              |  4 ++-
 3 files changed, 25 insertions(+), 13 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 3d0329fc..e915c97a 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -63,7 +63,8 @@ void arena_do_deferred_work(tsdn_t *tsdn, arena_t *arena);
 void arena_reset(tsd_t *tsd, arena_t *arena);
 void arena_destroy(tsd_t *tsd, arena_t *arena);
 void arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena,
-    cache_bin_t *cache_bin, szind_t binind, const cache_bin_sz_t nfill);
+    cache_bin_t *cache_bin, szind_t binind, const cache_bin_sz_t nfill_min,
+    const cache_bin_sz_t nfill_max);
 
 void *arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size,
     szind_t ind, bool zero, bool slab);
diff --git a/src/arena.c b/src/arena.c
index bee106f0..884d1bf9 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1047,14 +1047,16 @@ arena_bin_choose(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 
 void
 arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena,
-    cache_bin_t *cache_bin, szind_t binind, const cache_bin_sz_t nfill) {
+    cache_bin_t *cache_bin, szind_t binind, const cache_bin_sz_t nfill_min,
+    const cache_bin_sz_t nfill_max) {
 	assert(cache_bin_ncached_get_local(cache_bin) == 0);
-	assert(nfill != 0);
+	assert(nfill_min > 0 && nfill_min <= nfill_max);
+	assert(nfill_max <= cache_bin_ncached_max_get(cache_bin));
 
 	const bin_info_t *bin_info = &bin_infos[binind];
 
-	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nfill);
-	cache_bin_init_ptr_array_for_fill(cache_bin, &ptrs, nfill);
+	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nfill_max);
+	cache_bin_init_ptr_array_for_fill(cache_bin, &ptrs, nfill_max);
 	/*
 	 * Bin-local resources are used first: 1) bin->slabcur, and 2) nonfull
 	 * slabs.  After both are exhausted, new slabs will be allocated through
@@ -1101,13 +1103,19 @@ label_refill:
 	malloc_mutex_lock(tsdn, &bin->lock);
 	arena_bin_flush_batch_after_lock(tsdn, arena, bin, binind, &batch_flush_state);
 
-	while (filled < nfill) {
+	while (filled < nfill_min) {
 		/* Try batch-fill from slabcur first. */
 		edata_t *slabcur = bin->slabcur;
 		if (slabcur != NULL && edata_nfree_get(slabcur) > 0) {
-			unsigned tofill = nfill - filled;
-			unsigned nfree = edata_nfree_get(slabcur);
-			unsigned cnt = tofill < nfree ? tofill : nfree;
+			/*
+			 * Use up the free slots if the total filled <= nfill_max.
+			 * Otherwise, fallback to nfill_min for a more conservative
+			 * memory usage.
+			 */
+			unsigned cnt = edata_nfree_get(slabcur);
+			if (cnt + filled > nfill_max) {
+				cnt = nfill_min - filled;
+			}
 
 			arena_slab_reg_alloc_batch(slabcur, bin_info, cnt,
 			    &ptrs.ptr[filled]);
@@ -1144,7 +1152,7 @@ label_refill:
 		assert(fresh_slab == NULL);
 		assert(!alloc_and_retry);
 		break;
-	} /* while (filled < nfill) loop. */
+	} /* while (filled < nfill_min) loop. */
 
 	if (config_stats && !alloc_and_retry) {
 		bin->stats.nmalloc += filled;
@@ -1162,7 +1170,7 @@ label_refill:
 
 	if (alloc_and_retry) {
 		assert(fresh_slab == NULL);
-		assert(filled < nfill);
+		assert(filled < nfill_min);
 		assert(made_progress);
 
 		fresh_slab = arena_slab_alloc(tsdn, arena, binind, binshard,
@@ -1173,7 +1181,8 @@ label_refill:
 		made_progress = false;
 		goto label_refill;
 	}
-	assert(filled == nfill || (fresh_slab == NULL && !made_progress));
+	assert((filled >= nfill_min && filled <= nfill_max) ||
+	    (fresh_slab == NULL && !made_progress));
 
 	/* Release if allocated but not used. */
 	if (fresh_slab != NULL) {
diff --git a/src/tcache.c b/src/tcache.c
index 003469a3..c300ed7d 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -254,7 +254,9 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena,
 	if (nfill == 0) {
 		nfill = 1;
 	}
-	arena_cache_bin_fill_small(tsdn, arena, cache_bin, binind, nfill);
+	arena_cache_bin_fill_small(tsdn, arena, cache_bin, binind,
+	    /* nfill_min */ opt_experimental_tcache_gc ?
+	    ((nfill >> 1) + 1) : nfill, /* nfill_max */ nfill);
 	tcache_slow->bin_refilled[binind] = true;
 	ret = cache_bin_alloc(cache_bin, tcache_success);
 

From e2c9f3a9ce684090898b58a5fdb244cff48ef9bb Mon Sep 17 00:00:00 2001
From: Shirui Cheng <sherrycheng@meta.com>
Date: Thu, 1 Aug 2024 13:20:11 -0700
Subject: [PATCH 208/395] Take locality into consideration when doing GC flush

---
 include/jemalloc/internal/tcache_types.h |   1 +
 src/tcache.c                             | 216 ++++++++++++++++++++++-
 2 files changed, 213 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h
index 578a199e..27516387 100644
--- a/include/jemalloc/internal/tcache_types.h
+++ b/include/jemalloc/internal/tcache_types.h
@@ -23,5 +23,6 @@ typedef struct tcaches_s tcaches_t;
 #define TCACHE_MAXCLASS_LIMIT ((size_t)1 << TCACHE_LG_MAXCLASS_LIMIT)
 #define TCACHE_NBINS_MAX (SC_NBINS + SC_NGROUP *			\
     (TCACHE_LG_MAXCLASS_LIMIT - SC_LG_LARGE_MINCLASS) + 1)
+#define TCACHE_GC_NEIGHBOR_LIMIT ((uintptr_t)1 << 21) /* 2M */
 
 #endif /* JEMALLOC_INTERNAL_TCACHE_TYPES_H */
diff --git a/src/tcache.c b/src/tcache.c
index c300ed7d..35f18077 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -134,10 +134,162 @@ tcache_gc_item_delay_compute(szind_t szind) {
 	return (uint8_t)item_delay;
 }
 
+static inline void *
+tcache_gc_small_heuristic_addr_get(tsd_t *tsd, tcache_slow_t *tcache_slow,
+    szind_t szind) {
+	assert(szind < SC_NBINS);
+	tsdn_t *tsdn = tsd_tsdn(tsd);
+	bin_t *bin = arena_bin_choose(tsdn, tcache_slow->arena, szind, NULL);
+	assert(bin != NULL);
+
+	malloc_mutex_lock(tsdn, &bin->lock);
+	edata_t *slab = (bin->slabcur == NULL) ?
+	    edata_heap_first(&bin->slabs_nonfull) : bin->slabcur;
+	assert(slab != NULL || edata_heap_empty(&bin->slabs_nonfull));
+	void *ret = (slab != NULL) ? edata_addr_get(slab) : NULL;
+	assert(ret != NULL || slab == NULL);
+	malloc_mutex_unlock(tsdn, &bin->lock);
+
+	return ret;
+}
+
+static inline bool
+tcache_gc_is_addr_remote(void *addr, uintptr_t min, uintptr_t max) {
+	assert(addr != NULL);
+	return ((uintptr_t)addr < min || (uintptr_t)addr >= max);
+}
+
+static inline cache_bin_sz_t
+tcache_gc_small_nremote_get(cache_bin_t *cache_bin, void *addr,
+    uintptr_t *addr_min, uintptr_t *addr_max, szind_t szind, size_t nflush) {
+	assert(addr != NULL && addr_min != NULL && addr_max != NULL);
+	/* The slab address range that the provided addr belongs to. */
+	uintptr_t slab_min = (uintptr_t)addr;
+	uintptr_t slab_max = slab_min + bin_infos[szind].slab_size;
+	/*
+	 * When growing retained virtual memory, it's increased exponentially,
+	 * starting from 2M, so that the total number of disjoint virtual
+	 * memory ranges retained by each shard is limited.
+	 */
+	uintptr_t neighbor_min = ((uintptr_t)addr > TCACHE_GC_NEIGHBOR_LIMIT) ?
+	    ((uintptr_t)addr - TCACHE_GC_NEIGHBOR_LIMIT) : 0;
+	uintptr_t neighbor_max = ((uintptr_t)addr < (UINTPTR_MAX -
+	    TCACHE_GC_NEIGHBOR_LIMIT)) ? ((uintptr_t)addr +
+	    TCACHE_GC_NEIGHBOR_LIMIT) : UINTPTR_MAX;
+
+	/* Scan the entire bin to count the number of remote pointers. */
+	void **head = cache_bin->stack_head;
+	cache_bin_sz_t n_remote_slab = 0, n_remote_neighbor = 0;
+	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin);
+	for (void **cur = head; cur < head + ncached; cur++) {
+		n_remote_slab += (cache_bin_sz_t)tcache_gc_is_addr_remote(*cur,
+		    slab_min, slab_max);
+		n_remote_neighbor += (cache_bin_sz_t)tcache_gc_is_addr_remote(*cur,
+		    neighbor_min, neighbor_max);
+	}
+	/*
+	 * Note: since slab size is dynamic and can be larger than 2M, i.e.
+	 * TCACHE_GC_NEIGHBOR_LIMIT, there is no guarantee as to which of
+	 * n_remote_slab and n_remote_neighbor is greater.
+	 */
+	assert(n_remote_slab <= ncached && n_remote_neighbor <= ncached);
+	/*
+	 * We first consider keeping ptrs from the neighboring addr range,
+	 * since in most cases the range is greater than the slab range.
+	 * So if the number of non-neighbor ptrs is more than the intended
+	 * flush amount, we use it as the anchor for flushing.
+	 */
+	if (n_remote_neighbor >= nflush) {
+		*addr_min = neighbor_min;
+		*addr_max = neighbor_max;
+		return n_remote_neighbor;
+	}
+	/*
+	 * We then consider only keeping ptrs from the local slab, and in most
+	 * cases this is stricter, assuming that slab < 2M is the common case.
+	 */
+	*addr_min = slab_min;
+	*addr_max = slab_max;
+	return n_remote_slab;
+}
+
+/* Shuffle the ptrs in the bin to put the remote pointers at the bottom. */
+static inline void
+tcache_gc_small_bin_shuffle(cache_bin_t *cache_bin, cache_bin_sz_t nremote,
+   uintptr_t addr_min, uintptr_t addr_max) {
+	void **swap = NULL;
+	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin);
+	cache_bin_sz_t ntop = ncached - nremote, cnt = 0;
+	assert(ntop > 0 && ntop < ncached);
+	/*
+	 * Scan the [head, head + ntop) part of the cache bin, during which
+	 * bubbling the non-remote ptrs to the top of the bin.
+	 * After this, the [head, head + cnt) part of the bin contains only
+	 * non-remote ptrs, and they're in the same relative order as before.
+	 * While the [head + cnt, head + ntop) part contains only remote ptrs.
+	 */
+	void **head = cache_bin->stack_head;
+	for (void **cur = head; cur < head + ntop; cur++) {
+		if (!tcache_gc_is_addr_remote(*cur, addr_min, addr_max)) {
+			/* Tracks the number of non-remote ptrs seen so far. */
+			cnt++;
+			/*
+			 * There is remote ptr before the current non-remote ptr,
+			 * swap the current non-remote ptr with the remote ptr,
+			 * and increment the swap pointer so that it's still
+			 * pointing to the top remote ptr in the bin.
+			 */
+			if (swap != NULL) {
+				assert(swap < cur);
+				assert(tcache_gc_is_addr_remote(*swap, addr_min, addr_max));
+				void *tmp = *cur;
+				*cur = *swap;
+				*swap = tmp;
+				swap++;
+				assert(swap <= cur);
+				assert(tcache_gc_is_addr_remote(*swap, addr_min, addr_max));
+			}
+			continue;
+		} else if (swap == NULL) {
+			/* Swap always points to the top remote ptr in the bin. */
+			swap = cur;
+		}
+	}
+	/*
+	 * Scan the [head + ntop, head + ncached) part of the cache bin,
+	 * after which it should only contain remote ptrs.
+	 */
+	for (void **cur = head + ntop; cur < head + ncached; cur++) {
+		/* Early break if all non-remote ptrs have been moved. */
+		if (cnt == ntop) {
+			break;
+		}
+		if (!tcache_gc_is_addr_remote(*cur, addr_min, addr_max)) {
+			assert(tcache_gc_is_addr_remote(*(head + cnt), addr_min,
+			    addr_max));
+			void *tmp = *cur;
+			*cur = *(head + cnt);
+			*(head + cnt) = tmp;
+			cnt++;
+		}
+	}
+	assert(cnt == ntop);
+	/* Sanity check to make sure the shuffle is done correctly. */
+	for (void **cur = head; cur < head + ncached; cur++) {
+		assert(*cur != NULL);
+		assert(((cur < head + ntop) && !tcache_gc_is_addr_remote(
+		    *cur, addr_min, addr_max)) || ((cur >= head + ntop) &&
+		    tcache_gc_is_addr_remote(*cur, addr_min, addr_max)));
+	}
+}
+
 static void
 tcache_gc_small(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
     szind_t szind) {
-	/* Aim to flush 3/4 of items below low-water. */
+	/*
+	 * Aim to flush 3/4 of items below low-water, with remote pointers being
+	 * prioritized for flushing.
+	 */
 	assert(szind < SC_NBINS);
 
 	cache_bin_t *cache_bin = &tcache->bins[szind];
@@ -158,8 +310,6 @@ tcache_gc_small(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 
 	tcache_slow->bin_flush_delay_items[szind]
 	    = tcache_gc_item_delay_compute(szind);
-	tcache_bin_flush_small(tsd, tcache, cache_bin, szind,
-	    (unsigned)(ncached - nflush));
 
 	/*
 	 * Reduce fill count by 2X.  Limit lg_fill_div such that
@@ -169,12 +319,70 @@ tcache_gc_small(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	     tcache_slow->lg_fill_div[szind]) > 1) {
 		tcache_slow->lg_fill_div[szind]++;
 	}
+
+	/*
+	 * When the new tcache gc is not enabled, or simply the entire bin needs
+	 * to be flushed, flush the bottom nflush items directly.
+	 */
+	if (!opt_experimental_tcache_gc || nflush == ncached) {
+		goto label_flush;
+	}
+
+	/* Query arena binshard to get heuristic locality info. */
+	void *addr = tcache_gc_small_heuristic_addr_get(tsd, tcache_slow, szind);
+	if (addr == NULL) {
+		goto label_flush;
+	}
+
+	/*
+	 * Use the queried addr above to get the number of remote ptrs in the
+	 * bin, and the min/max of the local addr range.
+	 */
+	uintptr_t addr_min, addr_max;
+	cache_bin_sz_t nremote = tcache_gc_small_nremote_get(cache_bin, addr,
+	    &addr_min, &addr_max, szind, nflush);
+
+	/*
+	 * Update the nflush to the larger value between the intended flush count
+	 * and the number of remote ptrs.
+	 */
+	if (nremote > nflush) {
+		nflush = nremote;
+	}
+	/*
+	 * When entering the locality check, nflush should be less than ncached,
+	 * otherwise the entire bin should be flushed regardless. The only case
+	 * when nflush gets updated to ncached after locality check is, when all
+	 * the items in the bin are remote, in which case the entire bin should
+	 * also be flushed.
+	 */
+	assert(nflush < ncached || nremote == ncached);
+	if (nremote == 0 || nremote == ncached)	{
+		goto label_flush;
+	}
+
+	/*
+	 * Move the remote points to the bottom of the bin for flushing.
+	 * As long as moved to the bottom, the order of these nremote ptrs
+	 * does not matter, since they are going to be flushed anyway.
+	 * The rest of the ptrs are moved to the top of the bin, and their
+	 * relative order is maintained.
+	 */
+	tcache_gc_small_bin_shuffle(cache_bin, nremote, addr_min, addr_max);
+
+label_flush:
+	assert(nflush > 0 && nflush <= ncached);
+	tcache_bin_flush_small(tsd, tcache, cache_bin, szind,
+	    (unsigned)(ncached - nflush));
 }
 
 static void
 tcache_gc_large(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
     szind_t szind) {
-	/* Like the small GC; flush 3/4 of untouched items. */
+	/*
+	 * Like the small GC, flush 3/4 of untouched items. However, simply flush
+	 * the bottom nflush items, without any locality check.
+	 */
 	assert(szind >= SC_NBINS);
 	cache_bin_t *cache_bin = &tcache->bins[szind];
 	assert(!tcache_bin_disabled(szind, cache_bin, tcache->tcache_slow));

From 0c88be9e0a09fc868ac05ace96466bdc6f502ab8 Mon Sep 17 00:00:00 2001
From: Shirui Cheng <sherrycheng@meta.com>
Date: Thu, 1 Aug 2024 14:01:32 -0700
Subject: [PATCH 209/395] Regulate GC frequency by requiring a time interval
 between two consecutive GCs

---
 include/jemalloc/internal/tcache_structs.h |   4 +
 include/jemalloc/internal/tcache_types.h   |   3 +
 src/tcache.c                               | 191 ++++++++++++++-------
 3 files changed, 138 insertions(+), 60 deletions(-)

diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h
index d94099b0..63e5db5d 100644
--- a/include/jemalloc/internal/tcache_structs.h
+++ b/include/jemalloc/internal/tcache_structs.h
@@ -33,8 +33,12 @@ struct tcache_slow_s {
 	arena_t		*arena;
 	/* The number of bins activated in the tcache. */
 	unsigned	tcache_nbins;
+	/* Last time GC has been performed.  */
+	nstime_t	last_gc_time;
 	/* Next bin to GC. */
 	szind_t		next_gc_bin;
+	szind_t		next_gc_bin_small;
+	szind_t		next_gc_bin_large;
 	/* For small bins, fill (ncached_max >> lg_fill_div). */
 	uint8_t		lg_fill_div[SC_NBINS];
 	/* For small bins, whether has been refilled since last GC. */
diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h
index 27516387..eebad79f 100644
--- a/include/jemalloc/internal/tcache_types.h
+++ b/include/jemalloc/internal/tcache_types.h
@@ -24,5 +24,8 @@ typedef struct tcaches_s tcaches_t;
 #define TCACHE_NBINS_MAX (SC_NBINS + SC_NGROUP *			\
     (TCACHE_LG_MAXCLASS_LIMIT - SC_LG_LARGE_MINCLASS) + 1)
 #define TCACHE_GC_NEIGHBOR_LIMIT ((uintptr_t)1 << 21) /* 2M */
+#define TCACHE_GC_INTERVAL_NS ((uint64_t)10 * KQU(1000000)) /* 10ms */
+#define TCACHE_GC_SMALL_NBINS_MAX ((SC_NBINS > 8) ? (SC_NBINS >> 3) : 1)
+#define TCACHE_GC_LARGE_NBINS_MAX 1
 
 #endif /* JEMALLOC_INTERNAL_TCACHE_TYPES_H */
diff --git a/src/tcache.c b/src/tcache.c
index 35f18077..3d38700d 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -283,7 +283,7 @@ tcache_gc_small_bin_shuffle(cache_bin_t *cache_bin, cache_bin_sz_t nremote,
 	}
 }
 
-static void
+static bool
 tcache_gc_small(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
     szind_t szind) {
 	/*
@@ -296,35 +296,49 @@ tcache_gc_small(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	assert(!tcache_bin_disabled(szind, cache_bin, tcache->tcache_slow));
 	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin);
 	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin);
+	if (low_water > 0) {
+		/*
+		 * Reduce fill count by 2X.  Limit lg_fill_div such that
+		 * the fill count is always at least 1.
+		 */
+		if ((cache_bin_ncached_max_get(cache_bin) >>
+		    tcache_slow->lg_fill_div[szind]) > 1) {
+			tcache_slow->lg_fill_div[szind]++;
+		}
+	} else if (tcache_slow->bin_refilled[szind]) {
+		/*
+		 * Increase fill count by 2X for small bins.  Make sure
+		 * lg_fill_div stays greater than 0.
+		 */
+		if (tcache_slow->lg_fill_div[szind] > 1) {
+			tcache_slow->lg_fill_div[szind]--;
+		}
+		tcache_slow->bin_refilled[szind] = false;
+	}
 	assert(!tcache_slow->bin_refilled[szind]);
 
-	size_t nflush = low_water - (low_water >> 2);
-	if (nflush < tcache_slow->bin_flush_delay_items[szind]) {
-		/* Workaround for a conversion warning. */
-		uint8_t nflush_uint8 = (uint8_t)nflush;
-		assert(sizeof(tcache_slow->bin_flush_delay_items[0]) ==
-		    sizeof(nflush_uint8));
-		tcache_slow->bin_flush_delay_items[szind] -= nflush_uint8;
-		return;
+	cache_bin_sz_t nflush = low_water - (low_water >> 2);
+	/*
+	 * When the new tcache gc is not enabled, keep the flush delay logic,
+	 * and directly flush the bottom nflush items if needed.
+	 */
+	if (!opt_experimental_tcache_gc) {
+		if (nflush < tcache_slow->bin_flush_delay_items[szind]) {
+			/* Workaround for a conversion warning. */
+			uint8_t nflush_uint8 = (uint8_t)nflush;
+			assert(sizeof(tcache_slow->bin_flush_delay_items[0]) ==
+			    sizeof(nflush_uint8));
+			tcache_slow->bin_flush_delay_items[szind] -= nflush_uint8;
+			return false;
+		}
+
+		tcache_slow->bin_flush_delay_items[szind]
+		    = tcache_gc_item_delay_compute(szind);
+		goto label_flush;
 	}
 
-	tcache_slow->bin_flush_delay_items[szind]
-	    = tcache_gc_item_delay_compute(szind);
-
-	/*
-	 * Reduce fill count by 2X.  Limit lg_fill_div such that
-	 * the fill count is always at least 1.
-	 */
-	if ((cache_bin_ncached_max_get(cache_bin) >>
-	     tcache_slow->lg_fill_div[szind]) > 1) {
-		tcache_slow->lg_fill_div[szind]++;
-	}
-
-	/*
-	 * When the new tcache gc is not enabled, or simply the entire bin needs
-	 * to be flushed, flush the bottom nflush items directly.
-	 */
-	if (!opt_experimental_tcache_gc || nflush == ncached) {
+	/* Directly goto the flush path when the entire bin needs to be flushed. */
+	if ( nflush == ncached) {
 		goto label_flush;
 	}
 
@@ -371,12 +385,17 @@ tcache_gc_small(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	tcache_gc_small_bin_shuffle(cache_bin, nremote, addr_min, addr_max);
 
 label_flush:
-	assert(nflush > 0 && nflush <= ncached);
+	if (nflush == 0) {
+		assert(low_water == 0);
+		return false;
+	}
+	assert(nflush <= ncached);
 	tcache_bin_flush_small(tsd, tcache, cache_bin, szind,
 	    (unsigned)(ncached - nflush));
+	return true;
 }
 
-static void
+static bool
 tcache_gc_large(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
     szind_t szind) {
 	/*
@@ -386,10 +405,32 @@ tcache_gc_large(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	assert(szind >= SC_NBINS);
 	cache_bin_t *cache_bin = &tcache->bins[szind];
 	assert(!tcache_bin_disabled(szind, cache_bin, tcache->tcache_slow));
-	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin);
 	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin);
-	tcache_bin_flush_large(tsd, tcache, cache_bin, szind,
-	    (unsigned)(ncached - low_water + (low_water >> 2)));
+	if (low_water == 0) {
+		return false;
+	}
+	unsigned nrem = (unsigned)(cache_bin_ncached_get_local(cache_bin) -
+	    low_water + (low_water >> 2));
+	tcache_bin_flush_large(tsd, tcache, cache_bin, szind, nrem);
+	return true;
+}
+
+/* Try to gc one bin by szind, return true if there is item flushed. */
+static bool
+tcache_try_gc_bin(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
+    szind_t szind) {
+	assert(tcache != NULL);
+	cache_bin_t *cache_bin = &tcache->bins[szind];
+	if (tcache_bin_disabled(szind, cache_bin, tcache_slow)) {
+		return false;
+	}
+
+	bool is_small = (szind < SC_NBINS);
+	tcache_bin_flush_stashed(tsd, tcache, cache_bin, szind, is_small);
+	bool ret = is_small ? tcache_gc_small(tsd, tcache_slow, tcache, szind) :
+	    tcache_gc_large(tsd, tcache_slow, tcache, szind);
+	cache_bin_low_water_set(cache_bin);
+	return ret;
 }
 
 static void
@@ -400,40 +441,67 @@ tcache_event(tsd_t *tsd) {
 	}
 
 	tcache_slow_t *tcache_slow = tsd_tcache_slowp_get(tsd);
-	szind_t szind = tcache_slow->next_gc_bin;
-	bool is_small = (szind < SC_NBINS);
-	cache_bin_t *cache_bin = &tcache->bins[szind];
+	assert(tcache_slow != NULL);
 
-	if (tcache_bin_disabled(szind, cache_bin, tcache_slow)) {
-		goto label_done;
-	}
-
-	tcache_bin_flush_stashed(tsd, tcache, cache_bin, szind, is_small);
-	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin);
-	if (low_water > 0) {
-		if (is_small) {
-			tcache_gc_small(tsd, tcache_slow, tcache, szind);
-		} else {
-			tcache_gc_large(tsd, tcache_slow, tcache, szind);
+	/* When the new tcache gc is not enabled, GC one bin at a time. */
+	if (!opt_experimental_tcache_gc) {
+		szind_t szind = tcache_slow->next_gc_bin;
+		tcache_try_gc_bin(tsd, tcache_slow, tcache, szind);
+		tcache_slow->next_gc_bin++;
+		if (tcache_slow->next_gc_bin == tcache_nbins_get(tcache_slow)) {
+			tcache_slow->next_gc_bin = 0;
 		}
-	} else if (is_small && tcache_slow->bin_refilled[szind]) {
-		assert(low_water == 0);
-		/*
-		 * Increase fill count by 2X for small bins.  Make sure
-		 * lg_fill_div stays greater than 0.
-		 */
-		if (tcache_slow->lg_fill_div[szind] > 1) {
-			tcache_slow->lg_fill_div[szind]--;
-		}
-		tcache_slow->bin_refilled[szind] = false;
+		return;
 	}
-	cache_bin_low_water_set(cache_bin);
 
-label_done:
-	tcache_slow->next_gc_bin++;
-	if (tcache_slow->next_gc_bin == tcache_nbins_get(tcache_slow)) {
-		tcache_slow->next_gc_bin = 0;
+	nstime_t now;
+	nstime_copy(&now, &tcache_slow->last_gc_time);
+	nstime_update(&now);
+	assert(nstime_compare(&now, &tcache_slow->last_gc_time) >= 0);
+
+	if (nstime_ns(&now) - nstime_ns(&tcache_slow->last_gc_time) <
+	    TCACHE_GC_INTERVAL_NS) {
+		// time interval is too short, skip this event.
+		return;
 	}
+	/* Update last_gc_time to now. */
+	nstime_copy(&tcache_slow->last_gc_time, &now);
+
+	unsigned gc_small_nbins = 0, gc_large_nbins = 0;
+	unsigned tcache_nbins = tcache_nbins_get(tcache_slow);
+	unsigned small_nbins = tcache_nbins > SC_NBINS ? SC_NBINS : tcache_nbins;
+	szind_t szind_small = tcache_slow->next_gc_bin_small;
+	szind_t szind_large = tcache_slow->next_gc_bin_large;
+
+	/* Flush at most TCACHE_GC_SMALL_NBINS_MAX small bins at a time. */
+	for (unsigned i = 0; i < small_nbins && gc_small_nbins <
+	    TCACHE_GC_SMALL_NBINS_MAX; i++) {
+		assert(szind_small < SC_NBINS);
+		if (tcache_try_gc_bin(tsd, tcache_slow, tcache, szind_small)) {
+			gc_small_nbins++;
+		}
+		if (++szind_small == small_nbins) {
+			szind_small = 0;
+		}
+	}
+	tcache_slow->next_gc_bin_small = szind_small;
+
+	if (tcache_nbins <= SC_NBINS) {
+		return;
+	}
+
+	/* Flush at most TCACHE_GC_LARGE_NBINS_MAX large bins at a time. */
+	for (unsigned i = SC_NBINS; i < tcache_nbins && gc_large_nbins <
+	    TCACHE_GC_LARGE_NBINS_MAX; i++) {
+		assert(szind_large >= SC_NBINS && szind_large < tcache_nbins);
+		if (tcache_try_gc_bin(tsd, tcache_slow, tcache, szind_large)) {
+			gc_large_nbins++;
+		}
+		if (++szind_large == tcache_nbins) {
+			szind_large = SC_NBINS;
+		}
+	}
+	tcache_slow->next_gc_bin_large = szind_large;
 }
 
 void
@@ -1146,7 +1214,10 @@ tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	tcache_slow->tcache = tcache;
 
 	memset(&tcache_slow->link, 0, sizeof(ql_elm(tcache_t)));
+	nstime_init_zero(&tcache_slow->last_gc_time);
 	tcache_slow->next_gc_bin = 0;
+	tcache_slow->next_gc_bin_small = 0;
+	tcache_slow->next_gc_bin_large = SC_NBINS;
 	tcache_slow->arena = NULL;
 	tcache_slow->dyn_alloc = mem;
 

From 7c996861656f67dc74ab66f1bc6e758ed96c69b3 Mon Sep 17 00:00:00 2001
From: Shirui Cheng <sherrycheng@meta.com>
Date: Thu, 22 Aug 2024 14:50:08 -0700
Subject: [PATCH 210/395] Better handle burst allocation on
 tcache_alloc_small_hard

---
 include/jemalloc/internal/cache_bin.h      |  10 ++
 include/jemalloc/internal/tcache_structs.h |   4 +-
 src/tcache.c                               | 105 ++++++++++++++++++---
 3 files changed, 104 insertions(+), 15 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index a7a5e40e..cb137af9 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -600,6 +600,16 @@ cache_bin_nitems_get_remote(cache_bin_t *bin, cache_bin_sz_t *ncached,
 	 */
 }
 
+/*
+ * For small bins, used to calculate how many items to fill at a time.
+ * The final nfill is calculated by (ncached_max >> (base - offset)).
+ */
+typedef struct cache_bin_fill_ctl_s cache_bin_fill_ctl_t;
+struct cache_bin_fill_ctl_s {
+	uint8_t base;
+	uint8_t offset;
+};
+
 /*
  * Limit how many items can be flushed in a batch (Which is the upper bound
  * for the nflush parameter in tcache_bin_flush_impl()).
diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h
index 63e5db5d..e9a68152 100644
--- a/include/jemalloc/internal/tcache_structs.h
+++ b/include/jemalloc/internal/tcache_structs.h
@@ -39,8 +39,8 @@ struct tcache_slow_s {
 	szind_t		next_gc_bin;
 	szind_t		next_gc_bin_small;
 	szind_t		next_gc_bin_large;
-	/* For small bins, fill (ncached_max >> lg_fill_div). */
-	uint8_t		lg_fill_div[SC_NBINS];
+	/* For small bins, help determine how many items to fill at a time. */
+	cache_bin_fill_ctl_t	bin_fill_ctl_do_not_access_directly[SC_NBINS];
 	/* For small bins, whether has been refilled since last GC. */
 	bool		bin_refilled[SC_NBINS];
 	/*
diff --git a/src/tcache.c b/src/tcache.c
index 3d38700d..15da14da 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -121,6 +121,85 @@ tcache_gc_dalloc_postponed_event_wait(tsd_t *tsd) {
 	return TE_MIN_START_WAIT;
 }
 
+static inline void
+tcache_bin_fill_ctl_init(tcache_slow_t *tcache_slow, szind_t szind) {
+	assert(szind < SC_NBINS);
+	cache_bin_fill_ctl_t *ctl =
+	    &tcache_slow->bin_fill_ctl_do_not_access_directly[szind];
+	ctl->base = 1;
+	ctl->offset = 0;
+}
+
+static inline cache_bin_fill_ctl_t *
+tcache_bin_fill_ctl_get(tcache_slow_t *tcache_slow, szind_t szind) {
+	assert(szind < SC_NBINS);
+	cache_bin_fill_ctl_t *ctl =
+	    &tcache_slow->bin_fill_ctl_do_not_access_directly[szind];
+	assert(ctl->base > ctl->offset);
+	return ctl;
+}
+
+/*
+ * The number of items to be filled at a time for a given small bin is
+ * calculated by (ncached_max >> lg_fill_div).
+ * The actual ctl struct consists of two fields, i.e. base and offset,
+ * and the difference between the two(base - offset) is the final lg_fill_div.
+ * The base is adjusted during GC based on the traffic within a period of time,
+ * while the offset is updated in real time to handle the immediate traffic.
+ */
+static inline uint8_t
+tcache_nfill_small_lg_div_get(tcache_slow_t *tcache_slow, szind_t szind) {
+	cache_bin_fill_ctl_t *ctl = tcache_bin_fill_ctl_get(tcache_slow, szind);
+	return (ctl->base - (opt_experimental_tcache_gc ? ctl->offset : 0));
+}
+
+/*
+ * When we want to fill more items to respond to burst load,
+ * offset is increased so that (base - offset) is decreased,
+ * which in return increases the number of items to be filled.
+ */
+static inline void
+tcache_nfill_small_burst_prepare(tcache_slow_t *tcache_slow, szind_t szind) {
+	cache_bin_fill_ctl_t *ctl = tcache_bin_fill_ctl_get(tcache_slow, szind);
+	if (ctl->offset + 1 < ctl->base) {
+		ctl->offset++;
+	}
+}
+
+static inline void
+tcache_nfill_small_burst_reset(tcache_slow_t *tcache_slow, szind_t szind) {
+	cache_bin_fill_ctl_t *ctl = tcache_bin_fill_ctl_get(tcache_slow, szind);
+	ctl->offset = 0;
+}
+
+/*
+ * limit == 0: indicating that the fill count should be increased,
+ * i.e. lg_div(base) should be decreased.
+ *
+ * limit != 0: limit is set to ncached_max, indicating that the fill
+ * count should be decreased, i.e. lg_div(base) should be increased.
+ */
+static inline void
+tcache_nfill_small_gc_update(tcache_slow_t *tcache_slow, szind_t szind,
+    cache_bin_sz_t limit) {
+	cache_bin_fill_ctl_t *ctl = tcache_bin_fill_ctl_get(tcache_slow, szind);
+	if (!limit && ctl->base > 1) {
+		/*
+		 * Increase fill count by 2X for small bins.  Make sure
+		 * lg_fill_div stays greater than 1.
+		 */
+		ctl->base--;
+	} else if (limit && (limit >> ctl->base) > 1) {
+		/*
+		 * Reduce fill count by 2X.  Limit lg_fill_div such that
+		 * the fill count is always at least 1.
+		 */
+		ctl->base++;
+	}
+	/* Reset the offset for the next GC period. */
+	ctl->offset = 0;
+}
+
 static uint8_t
 tcache_gc_item_delay_compute(szind_t szind) {
 	assert(szind < SC_NBINS);
@@ -298,21 +377,19 @@ tcache_gc_small(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin);
 	if (low_water > 0) {
 		/*
-		 * Reduce fill count by 2X.  Limit lg_fill_div such that
-		 * the fill count is always at least 1.
+		 * There is unused items within the GC period => reduce fill count.
+		 * limit field != 0 is borrowed to indicate that the fill count
+		 * should be reduced.
 		 */
-		if ((cache_bin_ncached_max_get(cache_bin) >>
-		    tcache_slow->lg_fill_div[szind]) > 1) {
-			tcache_slow->lg_fill_div[szind]++;
-		}
+		tcache_nfill_small_gc_update(tcache_slow, szind,
+		    /* limit */ cache_bin_ncached_max_get(cache_bin));
 	} else if (tcache_slow->bin_refilled[szind]) {
 		/*
-		 * Increase fill count by 2X for small bins.  Make sure
-		 * lg_fill_div stays greater than 0.
+		 * There has been refills within the GC period => increase fill count.
+		 * limit field set to 0 is borrowed to indicate that the fill count
+		 * should be increased.
 		 */
-		if (tcache_slow->lg_fill_div[szind] > 1) {
-			tcache_slow->lg_fill_div[szind]--;
-		}
+		tcache_nfill_small_gc_update(tcache_slow, szind, /* limit */ 0);
 		tcache_slow->bin_refilled[szind] = false;
 	}
 	assert(!tcache_slow->bin_refilled[szind]);
@@ -526,7 +603,7 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena,
 	assert(tcache_slow->arena != NULL);
 	assert(!tcache_bin_disabled(binind, cache_bin, tcache_slow));
 	cache_bin_sz_t nfill = cache_bin_ncached_max_get(cache_bin)
-	    >> tcache_slow->lg_fill_div[binind];
+	    >> tcache_nfill_small_lg_div_get(tcache_slow, binind);
 	if (nfill == 0) {
 		nfill = 1;
 	}
@@ -534,6 +611,7 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena,
 	    /* nfill_min */ opt_experimental_tcache_gc ?
 	    ((nfill >> 1) + 1) : nfill, /* nfill_max */ nfill);
 	tcache_slow->bin_refilled[binind] = true;
+	tcache_nfill_small_burst_prepare(tcache_slow, binind);
 	ret = cache_bin_alloc(cache_bin, tcache_success);
 
 	return ret;
@@ -1059,6 +1137,7 @@ tcache_bin_flush_bottom(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 void
 tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
     szind_t binind, unsigned rem) {
+	tcache_nfill_small_burst_reset(tcache->tcache_slow, binind);
 	tcache_bin_flush_bottom(tsd, tcache, cache_bin, binind, rem,
 	    /* small */ true);
 }
@@ -1233,7 +1312,7 @@ tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	    &cur_offset);
 	for (unsigned i = 0; i < tcache_nbins; i++) {
 		if (i < SC_NBINS) {
-			tcache_slow->lg_fill_div[i] = 1;
+			tcache_bin_fill_ctl_init(tcache_slow, i);
 			tcache_slow->bin_refilled[i] = false;
 			tcache_slow->bin_flush_delay_items[i]
 			    = tcache_gc_item_delay_compute(i);

From baa5a90cc6f77e86c2aa58257f3d6c67a1b863dc Mon Sep 17 00:00:00 2001
From: Shirui Cheng <sherrycheng@meta.com>
Date: Wed, 28 Aug 2024 15:17:11 -0700
Subject: [PATCH 211/395] fix nstime_update_mock in arena_decay unit test

---
 test/unit/arena_decay.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/unit/arena_decay.c b/test/unit/arena_decay.c
index e991f4dd..10d1a6b1 100644
--- a/test/unit/arena_decay.c
+++ b/test/unit/arena_decay.c
@@ -18,7 +18,7 @@ nstime_monotonic_mock(void) {
 static void
 nstime_update_mock(nstime_t *time) {
 	nupdates_mock++;
-	if (monotonic_mock) {
+	if (monotonic_mock && nstime_compare(&time_mock, time) > 0) {
 		nstime_copy(time, &time_mock);
 	}
 }

From cd05b19f10fce353105dcc7290a8374a5c4f4a67 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 28 Mar 2024 14:43:17 -0700
Subject: [PATCH 212/395] Fix the VM over-reservation on aarch64 w/ larger
 pages.

HUGEPAGE could be larger on some platforms (e.g. 512M on aarch64 w/ 64K pages),
in which case it would cause grow_retained / exp_grow to over-reserve VMs.

Similarly, make sure the base alloc has a const 2M alignment.
---
 include/jemalloc/internal/base.h |  7 ++++++-
 src/base.c                       | 31 +++++++++++++++++++++++--------
 src/exp_grow.c                   |  8 +++++++-
 3 files changed, 36 insertions(+), 10 deletions(-)

diff --git a/include/jemalloc/internal/base.h b/include/jemalloc/internal/base.h
index 86b0cf4a..c8004b25 100644
--- a/include/jemalloc/internal/base.h
+++ b/include/jemalloc/internal/base.h
@@ -6,6 +6,12 @@
 #include "jemalloc/internal/ehooks.h"
 #include "jemalloc/internal/mutex.h"
 
+/*
+ * Alignment when THP is not enabled.  Set to constant 2M in case the HUGEPAGE
+ * value is unexpected high (which would cause VM over-reservation).
+ */
+#define BASE_BLOCK_MIN_ALIGN ((size_t)2 << 20)
+
 enum metadata_thp_mode_e {
 	metadata_thp_disabled   = 0,
 	/*
@@ -26,7 +32,6 @@ typedef enum metadata_thp_mode_e metadata_thp_mode_t;
 extern metadata_thp_mode_t opt_metadata_thp;
 extern const char *const metadata_thp_mode_names[];
 
-
 /* Embedded at the beginning of every block of base-managed virtual memory. */
 typedef struct base_block_s base_block_t;
 struct base_block_s {
diff --git a/src/base.c b/src/base.c
index 1d5e8fcd..ac8598eb 100644
--- a/src/base.c
+++ b/src/base.c
@@ -42,9 +42,17 @@ base_map(tsdn_t *tsdn, ehooks_t *ehooks, unsigned ind, size_t size) {
 	bool zero = true;
 	bool commit = true;
 
-	/* Use huge page sizes and alignment regardless of opt_metadata_thp. */
-	assert(size == HUGEPAGE_CEILING(size));
-	size_t alignment = HUGEPAGE;
+	/*
+	 * Use huge page sizes and alignment when opt_metadata_thp is enabled
+	 * or auto.
+	 */
+	size_t alignment;
+	if (opt_metadata_thp == metadata_thp_disabled) {
+		alignment = BASE_BLOCK_MIN_ALIGN;
+	} else {
+		assert(size == HUGEPAGE_CEILING(size));
+		alignment = HUGEPAGE;
+	}
 	if (ehooks_are_default(ehooks)) {
 		addr = extent_alloc_mmap(NULL, size, alignment, &zero, &commit);
 		if (have_madvise_huge && addr) {
@@ -277,6 +285,13 @@ base_extent_bump_alloc(tsdn_t *tsdn, base_t *base, edata_t *edata, size_t size,
 	return ret;
 }
 
+static size_t
+base_block_size_ceil(size_t block_size) {
+	return opt_metadata_thp == metadata_thp_disabled ?
+	    ALIGNMENT_CEILING(block_size, BASE_BLOCK_MIN_ALIGN) :
+	    HUGEPAGE_CEILING(block_size);
+}
+
 /*
  * Allocate a block of virtual memory that is large enough to start with a
  * base_block_t header, followed by an object of specified size and alignment.
@@ -295,14 +310,14 @@ base_block_alloc(tsdn_t *tsdn, base_t *base, ehooks_t *ehooks, unsigned ind,
 	 * Create increasingly larger blocks in order to limit the total number
 	 * of disjoint virtual memory ranges.  Choose the next size in the page
 	 * size class series (skipping size classes that are not a multiple of
-	 * HUGEPAGE), or a size large enough to satisfy the requested size and
-	 * alignment, whichever is larger.
+	 * HUGEPAGE when using metadata_thp), or a size large enough to satisfy
+	 * the requested size and alignment, whichever is larger.
 	 */
-	size_t min_block_size = HUGEPAGE_CEILING(sz_psz2u(header_size + gap_size
-	    + usize));
+	size_t min_block_size = base_block_size_ceil(sz_psz2u(header_size +
+	    gap_size + usize));
 	pszind_t pind_next = (*pind_last + 1 < sz_psz2ind(SC_LARGE_MAXCLASS)) ?
 	    *pind_last + 1 : *pind_last;
-	size_t next_block_size = HUGEPAGE_CEILING(sz_pind2sz(pind_next));
+	size_t next_block_size = base_block_size_ceil(sz_pind2sz(pind_next));
 	size_t block_size = (min_block_size > next_block_size) ? min_block_size
 	    : next_block_size;
 	base_block_t *block = (base_block_t *)base_map(tsdn, ehooks, ind,
diff --git a/src/exp_grow.c b/src/exp_grow.c
index 386471f4..955823a1 100644
--- a/src/exp_grow.c
+++ b/src/exp_grow.c
@@ -3,6 +3,12 @@
 
 void
 exp_grow_init(exp_grow_t *exp_grow) {
-	exp_grow->next = sz_psz2ind(HUGEPAGE);
+	/*
+	 * Enforce a minimal of 2M grow, which is convenient for the huge page
+	 * use cases.  Avoid using HUGEPAGE as the value though, because on some
+	 * platforms it can be very large (e.g. 512M on aarch64 w/ 64K pages).
+	 */
+	const size_t min_grow = (size_t)2 << 20;
+	exp_grow->next = sz_psz2ind(min_grow);
 	exp_grow->limit = sz_psz2ind(SC_LARGE_MAXCLASS);
 }

From 3383b98f1b9a2e60ec0bda2fcf463ba271926596 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 28 Mar 2024 16:04:56 -0700
Subject: [PATCH 213/395] Check if the huge page size is expected when enabling
 HPA.

---
 include/jemalloc/internal/pages.h |  8 ++++++++
 src/jemalloc.c                    | 28 +++++++++++++++++++++-------
 2 files changed, 29 insertions(+), 7 deletions(-)

diff --git a/include/jemalloc/internal/pages.h b/include/jemalloc/internal/pages.h
index b4e9678e..6c295b43 100644
--- a/include/jemalloc/internal/pages.h
+++ b/include/jemalloc/internal/pages.h
@@ -27,6 +27,14 @@ extern size_t	os_page;
 #define HUGEPAGE	((size_t)(1U << LG_HUGEPAGE))
 #define HUGEPAGE_MASK	((size_t)(HUGEPAGE - 1))
 
+/*
+ * Used to validate that the hugepage size is not unexpectedly high.  The huge
+ * page features (HPA, metadata_thp) are primarily designed with a 2M THP size
+ * in mind.  Much larger sizes are not tested and likely to cause issues such as
+ * bad fragmentation or simply broken.
+ */
+#define HUGEPAGE_MAX_EXPECTED_SIZE ((size_t)(16U << 20))
+
 #if LG_HUGEPAGE != 0
 #  define HUGEPAGE_PAGES (HUGEPAGE / PAGE)
 #else
diff --git a/src/jemalloc.c b/src/jemalloc.c
index dc471563..63f6b302 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1041,18 +1041,14 @@ obtain_malloc_conf(unsigned which_source, char readlink_buf[PATH_MAX + 1]) {
 	return ret;
 }
 
-static void
-validate_hpa_settings(void) {
-	if (!hpa_supported() || !opt_hpa || opt_hpa_opts.dirty_mult == (fxp_t)-1) {
-		return;
-	}
+static bool
+validate_hpa_ratios(void) {
 	size_t hpa_threshold = fxp_mul_frac(HUGEPAGE, opt_hpa_opts.dirty_mult) +
 	    opt_hpa_opts.hugification_threshold;
 	if (hpa_threshold > HUGEPAGE) {
-		return;
+		return false;
 	}
 
-	had_conf_error = true;
 	char hpa_dirty_mult[FXP_BUF_SIZE];
 	char hugification_threshold[FXP_BUF_SIZE];
 	char normalization_message[256] = {0};
@@ -1079,6 +1075,24 @@ validate_hpa_settings(void) {
 	    "hpa_hugification_threshold_ratio: %s and hpa_dirty_mult: %s. "
 	    "These values should sum to > 1.0.\n%s", hugification_threshold,
 	    hpa_dirty_mult, normalization_message);
+
+	return true;
+}
+
+static void
+validate_hpa_settings(void) {
+	if (!hpa_supported() || !opt_hpa) {
+		return;
+	}
+	if (HUGEPAGE > HUGEPAGE_MAX_EXPECTED_SIZE) {
+		had_conf_error = true;
+		malloc_printf(
+		    "<jemalloc>: huge page size (%zu) greater than expected."
+		    "May not be supported or behave as expected.", HUGEPAGE);
+	}
+	if (opt_hpa_opts.dirty_mult != (fxp_t)-1 && validate_hpa_ratios()) {
+		had_conf_error = true;
+	}
 }
 
 static void

From c1a3ca3755f2adae078b14925e874a6ff743aba1 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 3 May 2024 12:04:09 -0700
Subject: [PATCH 214/395] Adjust the value width in stats output.

Some of the values are accumulative and can reach high after running for long
periods.
---
 src/stats.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/stats.c b/src/stats.c
index f45b7447..ef025eb3 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -324,12 +324,12 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i,
 
 	COL_HDR(row, size, NULL, right, 20, size)
 	COL_HDR(row, ind, NULL, right, 4, unsigned)
-	COL_HDR(row, allocated, NULL, right, 13, size)
-	COL_HDR(row, nmalloc, NULL, right, 13, uint64)
+	COL_HDR(row, allocated, NULL, right, 14, size)
+	COL_HDR(row, nmalloc, NULL, right, 14, uint64)
 	COL_HDR(row, nmalloc_ps, "(#/sec)", right, 8, uint64)
-	COL_HDR(row, ndalloc, NULL, right, 13, uint64)
+	COL_HDR(row, ndalloc, NULL, right, 14, uint64)
 	COL_HDR(row, ndalloc_ps, "(#/sec)", right, 8, uint64)
-	COL_HDR(row, nrequests, NULL, right, 13, uint64)
+	COL_HDR(row, nrequests, NULL, right, 15, uint64)
 	COL_HDR(row, nrequests_ps, "(#/sec)", right, 10, uint64)
 	COL_HDR_DECLARE(prof_live_requested);
 	COL_HDR_DECLARE(prof_live_count);

From 323ed2e3a8c88c7db89b4119b10192af4303d29c Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 11 Sep 2024 15:08:24 -0700
Subject: [PATCH 215/395] Optimize fast path to allow static size class
 computation.

After inlining at LTO time, many callsites have input size known which means the
index and usable size can be translated at compile time.  However the size-index
lookup table prevents it -- this commit solves that by switching to the compute
approach when the size is detected to be a known const.
---
 .../internal/jemalloc_internal_inlines_c.h    |  5 +--
 include/jemalloc/internal/sz.h                | 33 +++++++++++++++----
 include/jemalloc/internal/util.h              | 10 ++++++
 3 files changed, 40 insertions(+), 8 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index 6dcffac9..432ec17c 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -496,6 +496,7 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
             *tsd_thread_deallocated_next_event_fastp_get_unsafe(tsd) == 0);
 
         emap_alloc_ctx_t alloc_ctx;
+	size_t usize;
         if (!size_hint) {
                 bool err = emap_alloc_ctx_try_lookup_fast(tsd,
                     &arena_emap_global, ptr, &alloc_ctx);
@@ -507,6 +508,7 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
                         return false;
                 }
                 assert(alloc_ctx.szind != SC_NSIZES);
+		usize = sz_index2size(alloc_ctx.szind);
         } else {
                 /*
                  * Check for both sizes that are too large, and for sampled /
@@ -518,7 +520,7 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
                     /* check_prof */ true))) {
                         return false;
                 }
-                alloc_ctx.szind = sz_size2index_lookup(size);
+		sz_size2index_usize_fastpath(size, &alloc_ctx.szind, &usize);
                 /* Max lookup class must be small. */
                 assert(alloc_ctx.szind < SC_NBINS);
                 /* This is a dead store, except when opt size checking is on. */
@@ -534,7 +536,6 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
         uint64_t deallocated, threshold;
         te_free_fastpath_ctx(tsd, &deallocated, &threshold);
 
-        size_t usize = sz_index2size(alloc_ctx.szind);
         uint64_t deallocated_after = deallocated + usize;
         /*
          * Check for events and tsd non-nominal (fast_threshold will be set to
diff --git a/include/jemalloc/internal/sz.h b/include/jemalloc/internal/sz.h
index 955d8ec0..a2d2debc 100644
--- a/include/jemalloc/internal/sz.h
+++ b/include/jemalloc/internal/sz.h
@@ -152,8 +152,8 @@ sz_psz2u(size_t psz) {
 	return usize;
 }
 
-static inline szind_t
-sz_size2index_compute(size_t size) {
+JEMALLOC_ALWAYS_INLINE szind_t
+sz_size2index_compute_inline(size_t size) {
 	if (unlikely(size > SC_LARGE_MAXCLASS)) {
 		return SC_NSIZES;
 	}
@@ -186,6 +186,11 @@ sz_size2index_compute(size_t size) {
 	}
 }
 
+static inline szind_t
+sz_size2index_compute(size_t size) {
+	return sz_size2index_compute_inline(size);
+}
+
 JEMALLOC_ALWAYS_INLINE szind_t
 sz_size2index_lookup_impl(size_t size) {
 	assert(size <= SC_LOOKUP_MAXCLASS);
@@ -208,8 +213,8 @@ sz_size2index(size_t size) {
 	return sz_size2index_compute(size);
 }
 
-static inline size_t
-sz_index2size_compute(szind_t index) {
+JEMALLOC_ALWAYS_INLINE size_t
+sz_index2size_compute_inline(szind_t index) {
 #if (SC_NTINY > 0)
 	if (index < SC_NTINY) {
 		return (ZU(1) << (SC_LG_TINY_MAXCLASS - SC_NTINY + 1 + index));
@@ -234,6 +239,11 @@ sz_index2size_compute(szind_t index) {
 	}
 }
 
+static inline size_t
+sz_index2size_compute(szind_t index) {
+	return sz_index2size_compute_inline(index);
+}
+
 JEMALLOC_ALWAYS_INLINE size_t
 sz_index2size_lookup_impl(szind_t index) {
 	return sz_index2size_tab[index];
@@ -254,8 +264,19 @@ sz_index2size(szind_t index) {
 
 JEMALLOC_ALWAYS_INLINE void
 sz_size2index_usize_fastpath(size_t size, szind_t *ind, size_t *usize) {
-	*ind = sz_size2index_lookup_impl(size);
-	*usize = sz_index2size_lookup_impl(*ind);
+	if (util_compile_time_const(size)) {
+		/*
+		 * When inlined, the size may become known at compile
+		 * time, which allows static computation through LTO.
+		 */
+		*ind = sz_size2index_compute_inline(size);
+		assert(*ind == sz_size2index_lookup_impl(size));
+		*usize = sz_index2size_compute_inline(*ind);
+		assert(*usize == sz_index2size_lookup_impl(*ind));
+	} else {
+		*ind = sz_size2index_lookup_impl(size);
+		*usize = sz_index2size_lookup_impl(*ind);
+	}
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h
index f4035095..24f23629 100644
--- a/include/jemalloc/internal/util.h
+++ b/include/jemalloc/internal/util.h
@@ -79,6 +79,16 @@ get_errno(void) {
 	} while(0)
 #endif
 
+/* Allows compiler constant folding on inlined paths. */
+#if defined(__has_builtin)
+#  if __has_builtin(__builtin_constant_p)
+#    define util_compile_time_const(x) __builtin_constant_p(x)
+#  endif
+#endif
+#ifndef util_compile_time_const
+#  define util_compile_time_const(x) (false)
+#endif
+
 /* ptr should be valid. */
 JEMALLOC_ALWAYS_INLINE void
 util_prefetch_read(void *ptr) {

From 60f472f367121d7d4933d0237ff38276f565fc88 Mon Sep 17 00:00:00 2001
From: Nathan Slingerland <slinger@meta.com>
Date: Thu, 12 Sep 2024 09:03:30 -0700
Subject: [PATCH 216/395] Fix initialization of pop_attempt_results in
 bin_batching test

---
 test/unit/bin_batching.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/test/unit/bin_batching.c b/test/unit/bin_batching.c
index 19975341..a20062fd 100644
--- a/test/unit/bin_batching.c
+++ b/test/unit/bin_batching.c
@@ -118,7 +118,8 @@ stress_run(void (*main_thread_fn)(), int nruns) {
 	bin_batching_test_after_unlock_hook = &increment_slab_dalloc_count;
 
 	atomic_store_zu(&push_failure_count, 0, ATOMIC_RELAXED);
-	atomic_store_zu(&pop_attempt_results[2], 0, ATOMIC_RELAXED);
+	atomic_store_zu(&pop_attempt_results[0], 0, ATOMIC_RELAXED);
+	atomic_store_zu(&pop_attempt_results[1], 0, ATOMIC_RELAXED);
 	atomic_store_zu(&dalloc_zero_slab_count, 0, ATOMIC_RELAXED);
 	atomic_store_zu(&dalloc_nonzero_slab_count, 0, ATOMIC_RELAXED);
 	atomic_store_zu(&dalloc_nonempty_list_count, 0, ATOMIC_RELAXED);

From 8c2e15d1a5749e50a1f61e216bb5fefc0d71d9b0 Mon Sep 17 00:00:00 2001
From: Nathan Slingerland <slinger@fb.com>
Date: Tue, 10 Sep 2024 13:29:35 -0700
Subject: [PATCH 217/395] Add malloc_open() / malloc_close() reentrancy safe
 helpers

---
 include/jemalloc/internal/malloc_io.h | 21 +++++++++
 src/pages.c                           | 61 +++++----------------------
 2 files changed, 32 insertions(+), 50 deletions(-)

diff --git a/include/jemalloc/internal/malloc_io.h b/include/jemalloc/internal/malloc_io.h
index 91e7b2ba..bb43fb5c 100644
--- a/include/jemalloc/internal/malloc_io.h
+++ b/include/jemalloc/internal/malloc_io.h
@@ -134,4 +134,25 @@ malloc_read_fd(int fd, void *buf, size_t count) {
 	return bytes_read;
 }
 
+static inline int malloc_open(const char *path, int flags) {
+	int fd;
+#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
+	fd = (int)syscall(SYS_open, path, flags);
+#elif defined(JEMALLOC_USE_SYSCALL) && defined(SYS_openat)
+	fd = (int)syscall(SYS_openat, AT_FDCWD, path, flags);
+#else
+	fd = open(path, flags);
+#endif
+	return fd;
+}
+
+static inline int malloc_close(int fd) {
+#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close)
+	return (int)syscall(SYS_close, fd);
+#else
+	return close(fd);
+#endif
+}
+
+
 #endif /* JEMALLOC_INTERNAL_MALLOC_IO_H */
diff --git a/src/pages.c b/src/pages.c
index 981aae9b..5b55a046 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -651,36 +651,13 @@ os_overcommits_proc(void) {
 	int fd;
 	char buf[1];
 
-#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
-	#if defined(O_CLOEXEC)
-		fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY |
-			O_CLOEXEC);
-	#else
-		fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY);
-		if (fd != -1) {
-			fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
-		}
-	#endif
-#elif defined(JEMALLOC_USE_SYSCALL) && defined(SYS_openat)
-	#if defined(O_CLOEXEC)
-		fd = (int)syscall(SYS_openat,
-			AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC);
-	#else
-		fd = (int)syscall(SYS_openat,
-			AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY);
-		if (fd != -1) {
-			fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
-		}
-	#endif
+#if defined(O_CLOEXEC)
+	fd = malloc_open("/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC);
 #else
-	#if defined(O_CLOEXEC)
-		fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC);
-	#else
-		fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY);
-		if (fd != -1) {
-			fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
-		}
-	#endif
+	fd = malloc_open("/proc/sys/vm/overcommit_memory", O_RDONLY);
+	if (fd != -1) {
+		fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
+	}
 #endif
 
 	if (fd == -1) {
@@ -688,11 +665,7 @@ os_overcommits_proc(void) {
 	}
 
 	ssize_t nread = malloc_read_fd(fd, &buf, sizeof(buf));
-#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close)
-	syscall(SYS_close, fd);
-#else
-	close(fd);
-#endif
+	malloc_close(fd);
 
 	if (nread < 1) {
 		return false; /* Error. */
@@ -741,29 +714,17 @@ init_thp_state(void) {
 	static const char sys_state_never[] = "always madvise [never]\n";
 	char buf[sizeof(sys_state_madvise)];
 
-#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
-	int fd = (int)syscall(SYS_open,
+	int fd = malloc_open(
 	    "/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
-#elif defined(JEMALLOC_USE_SYSCALL) && defined(SYS_openat)
-	int fd = (int)syscall(SYS_openat,
-		    AT_FDCWD, "/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
-#else
-	int fd = open("/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
-#endif
 	if (fd == -1) {
 		goto label_error;
 	}
 
 	ssize_t nread = malloc_read_fd(fd, &buf, sizeof(buf));
-#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close)
-	syscall(SYS_close, fd);
-#else
-	close(fd);
-#endif
-
-        if (nread < 0) {
+	malloc_close(fd);
+	if (nread < 0) {
 		goto label_error;
-        }
+	}
 
 	if (strncmp(buf, sys_state_madvise, (size_t)nread) == 0) {
 		init_system_thp_mode = thp_mode_default;

From db4f0e71820017039f09e5acc04b554826e304fd Mon Sep 17 00:00:00 2001
From: Guangli Dai <gdai@meta.com>
Date: Tue, 27 Aug 2024 14:26:57 -0700
Subject: [PATCH 218/395] Add travis tests for arm64.

---
 .travis.yml           | 36 ++++++++++++++++++++++++++++++++++++
 scripts/gen_travis.py |  4 ++++
 2 files changed, 40 insertions(+)

diff --git a/.travis.yml b/.travis.yml
index 20605fc0..aad7eea7 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -271,6 +271,42 @@ jobs:
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: arm64
+      env: CC=gcc CXX=g++ EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: arm64
+      env: CC=clang CXX=clang++ EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+    - os: linux
+      arch: arm64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: arm64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: arm64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: arm64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: arm64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: arm64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: arm64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: arm64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: arm64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: arm64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: osx
       arch: amd64
       env: CC=gcc CXX=g++ EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
index d2fd25e3..ae0b9e2e 100755
--- a/scripts/gen_travis.py
+++ b/scripts/gen_travis.py
@@ -246,6 +246,9 @@ def generate_linux(arch):
     if arch == PPC64LE:
         # Avoid 32 bit builds and clang on PowerPC
         exclude = (CROSS_COMPILE_32BIT, CLANG,)
+    if arch == ARM64:
+        # Avoid 32 bit build on ARM64
+        exclude = (CROSS_COMPILE_32BIT,)
 
     return generate_jobs(os, arch, exclude, max_unusual_opts)
 
@@ -318,6 +321,7 @@ def main():
         generate_linux(AMD64),
         # PPC tests on travis has been down for a while, disable it for now.
         # generate_linux(PPC64LE),
+        generate_linux(ARM64),
 
         generate_macos(AMD64),
 

From 661fb1e6722e9b29e76520182086edcb835077e3 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 19 Sep 2024 23:48:55 -0700
Subject: [PATCH 219/395] Fix the locked flag for malloc_mutex_trylock().

---
 include/jemalloc/internal/mutex.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h
index 75abf298..9f72cb18 100644
--- a/include/jemalloc/internal/mutex.h
+++ b/include/jemalloc/internal/mutex.h
@@ -36,6 +36,8 @@ struct malloc_mutex_s {
 			 * Hint flag to avoid exclusive cache line contention
 			 * during spin waiting.  Placed along with prof_data
 			 * since it's always modified even with no contention.
+			 * Modified by the lock owner only (after acquired, and
+			 * before release), and may be read by other threads.
 			 */
 			atomic_b_t		locked;
 #ifdef _WIN32
@@ -156,7 +158,12 @@ malloc_mutex_lock_final(malloc_mutex_t *mutex) {
 
 static inline bool
 malloc_mutex_trylock_final(malloc_mutex_t *mutex) {
-	return MALLOC_MUTEX_TRYLOCK(mutex);
+	bool failed = MALLOC_MUTEX_TRYLOCK(mutex);
+	if (!failed) {
+		atomic_store_b(&mutex->locked, true, ATOMIC_RELAXED);
+	}
+
+	return failed;
 }
 
 static inline void
@@ -216,7 +223,6 @@ malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 	if (isthreaded) {
 		if (malloc_mutex_trylock_final(mutex)) {
 			malloc_mutex_lock_slow(mutex);
-			atomic_store_b(&mutex->locked, true, ATOMIC_RELAXED);
 		}
 		mutex_owner_stats_update(tsdn, mutex);
 	}

From 3eb7a4b53dfeae537fd78cece51342a1f12d86dc Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 20 Sep 2024 15:07:27 -0700
Subject: [PATCH 220/395] Fix mutex state tracking around pthread_cond_wait().

pthread_cond_wait drops and re-acquires the mutex internally, w/o
going through our wrapper.  Update the locked state explicitly.
---
 src/background_thread.c | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/src/background_thread.c b/src/background_thread.c
index c92fa2bc..a5f4dbf7 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -153,6 +153,26 @@ set_current_thread_affinity(int cpu) {
 /* Minimal sleep interval 100 ms. */
 #define BACKGROUND_THREAD_MIN_INTERVAL_NS (BILLION / 10)
 
+static int
+background_thread_cond_wait(background_thread_info_t *info,
+    struct timespec *ts) {
+	int ret;
+
+	/*
+	 * pthread_cond_wait drops and re-acquires the mutex internally, w/o
+	 * going through our wrapper.  Update the locked state explicitly.
+	 */
+	atomic_store_b(&info->mtx.locked, false, ATOMIC_RELAXED);
+	if (ts == NULL) {
+		ret = pthread_cond_wait(&info->cond, &info->mtx.lock);
+	} else {
+		ret = pthread_cond_timedwait(&info->cond, &info->mtx.lock, ts);
+	}
+	atomic_store_b(&info->mtx.locked, true, ATOMIC_RELAXED);
+
+	return ret;
+}
+
 static void
 background_thread_sleep(tsdn_t *tsdn, background_thread_info_t *info,
     uint64_t interval) {
@@ -171,7 +191,7 @@ background_thread_sleep(tsdn_t *tsdn, background_thread_info_t *info,
 	if (interval == BACKGROUND_THREAD_INDEFINITE_SLEEP) {
 		background_thread_wakeup_time_set(tsdn, info,
 		    BACKGROUND_THREAD_INDEFINITE_SLEEP);
-		ret = pthread_cond_wait(&info->cond, &info->mtx.lock);
+		ret = background_thread_cond_wait(info, NULL);
 		assert(ret == 0);
 	} else {
 		assert(interval >= BACKGROUND_THREAD_MIN_INTERVAL_NS &&
@@ -193,7 +213,7 @@ background_thread_sleep(tsdn_t *tsdn, background_thread_info_t *info,
 		ts.tv_nsec = (size_t)nstime_nsec(&ts_wakeup);
 
 		assert(!background_thread_indefinite_sleep(info));
-		ret = pthread_cond_timedwait(&info->cond, &info->mtx.lock, &ts);
+		ret = background_thread_cond_wait(info, &ts);
 		assert(ret == ETIMEDOUT || ret == 0);
 	}
 	if (config_stats) {

From 1960536b61ba2c1d287cf7866fae02aea3f4e3b0 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 19 Sep 2024 23:28:23 -0700
Subject: [PATCH 221/395] Add malloc_mutex_is_locked() sanity checks.

---
 include/jemalloc/internal/mutex.h | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h
index 9f72cb18..ceb07b85 100644
--- a/include/jemalloc/internal/mutex.h
+++ b/include/jemalloc/internal/mutex.h
@@ -178,6 +178,12 @@ mutex_owner_stats_update(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 	}
 }
 
+static inline bool
+malloc_mutex_is_locked(malloc_mutex_t *mutex) {
+	/* Used for sanity checking only. */
+	return atomic_load_b(&mutex->locked, ATOMIC_RELAXED);
+}
+
 /* Trylock: return false if the lock is successfully acquired. */
 static inline bool
 malloc_mutex_trylock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
@@ -186,6 +192,7 @@ malloc_mutex_trylock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 		if (malloc_mutex_trylock_final(mutex)) {
 			return true;
 		}
+		assert(malloc_mutex_is_locked(mutex));
 		mutex_owner_stats_update(tsdn, mutex);
 	}
 	witness_lock(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
@@ -224,6 +231,7 @@ malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 		if (malloc_mutex_trylock_final(mutex)) {
 			malloc_mutex_lock_slow(mutex);
 		}
+		assert(malloc_mutex_is_locked(mutex));
 		mutex_owner_stats_update(tsdn, mutex);
 	}
 	witness_lock(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
@@ -231,9 +239,10 @@ malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 
 static inline void
 malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
-	atomic_store_b(&mutex->locked, false, ATOMIC_RELAXED);
 	witness_unlock(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
 	if (isthreaded) {
+		assert(malloc_mutex_is_locked(mutex));
+		atomic_store_b(&mutex->locked, false, ATOMIC_RELAXED);
 		MALLOC_MUTEX_UNLOCK(mutex);
 	}
 }

From de5606d0d819cbea5c9ef17c989821c1bd7a6697 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 20 Sep 2024 15:39:46 -0700
Subject: [PATCH 222/395] Fix a missing init value warning caught by static
 analysis.

---
 src/extent.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/extent.c b/src/extent.c
index 2efc7938..30942491 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -825,6 +825,7 @@ extent_try_coalesce_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, bool *coalesced) {
 	assert(!edata_guarded_get(edata));
 	assert(coalesced != NULL);
+	*coalesced = false;
 	/*
 	 * We avoid checking / locking inactive neighbors for large size
 	 * classes, since they are eagerly coalesced on deallocation which can

From 734f29ce56a2769857b084a37af09f5846c56a32 Mon Sep 17 00:00:00 2001
From: roblabla <unfiltered@roblab.la>
Date: Mon, 23 Sep 2024 15:33:43 +0200
Subject: [PATCH 223/395] Fix compilation with MSVC 2022

On MSVC, log is an intrinsic that doesn't require libm. However,
AC_SEARCH_LIBS does not successfully detect this, as it will try to
compile a program using the wrong signature for log. Newer versions of
MSVC CL detects this and rejects the program with the following
messages:

conftest.c(40): warning C4391: 'char log()': incorrect return type for intrinsic function, expected 'double'
conftest.c(44): error C2168: 'log': too few actual parameters for intrinsic function

Since log is always available on MSVC (it's been around since the dawn
of time), we simply always assume it's there if MSVC is detected.
---
 configure.ac | 23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/configure.ac b/configure.ac
index ff493e1d..dcf357e4 100644
--- a/configure.ac
+++ b/configure.ac
@@ -878,11 +878,26 @@ AC_SUBST([DUMP_SYMS])
 AC_SUBST([CC_MM])
 
 dnl Determine whether libm must be linked to use e.g. log(3).
-AC_SEARCH_LIBS([log], [m], , [AC_MSG_ERROR([Missing math functions])])
-if test "x$ac_cv_search_log" != "xnone required" ; then
-  LM="$ac_cv_search_log"
-else
+
+# On MSVC, log is an intrinsic that doesn't require libm. However,
+# AC_SEARCH_LIBS does not successfully detect this, as it will try to compile
+# a program using the wrong signature for log. Newer versions of MSVC CL detects
+# this and rejects the program with the following messages.
+#
+# conftest.c(40): warning C4391: 'char log()': incorrect return type for intrinsic function, expected 'double'
+# conftest.c(44): error C2168: 'log': too few actual parameters for intrinsic function
+#
+# Since log is always available on MSVC (it's been around since the dawn of
+# time), we simply always assume it's there if MSVC is detected.
+if test "x$je_cv_msvc" = "xyes" ; then
   LM=
+else
+  AC_SEARCH_LIBS([log], [m], , [AC_MSG_ERROR([Missing math functions])])
+    if test "x$ac_cv_search_log" != "xnone required" ; then
+      LM="$ac_cv_search_log"
+    else
+      LM=
+    fi
 fi
 AC_SUBST(LM)
 

From 0181aaa495bc6ef3dcd570ea5d37cb7b72375614 Mon Sep 17 00:00:00 2001
From: Guangli Dai <gdai@meta.com>
Date: Fri, 13 Sep 2024 15:52:22 -0700
Subject: [PATCH 224/395] Optimize edata_cmp_summary_compare when __uint128_t
 is available

---
 configure.ac                                     | 15 +++++++++++++++
 include/jemalloc/internal/edata.h                | 16 ++++++++++++++++
 .../internal/jemalloc_internal_defs.h.in         |  3 +++
 3 files changed, 34 insertions(+)

diff --git a/configure.ac b/configure.ac
index dcf357e4..2e7f610d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -562,6 +562,21 @@ AC_CACHE_CHECK([asm volatile support],
 if test "x${je_cv_asm_volatile}" = "xyes"; then
   AC_DEFINE([JEMALLOC_HAVE_ASM_VOLATILE], [ ], [ ])
 fi
+AC_CACHE_CHECK([__int128 support],
+               [je_cv_int128],
+               AC_RUN_IFELSE([AC_LANG_PROGRAM(
+[[
+]],
+[[
+      __int128 temp = 0;
+      return temp;
+]])],
+[je_cv_int128=yes],
+[je_cv_int128=no],
+[je_cv_int128=no]))
+if test "x${je_cv_int128}" = "xyes"; then
+  AC_DEFINE([JEMALLOC_HAVE_INT128], [ ], [ ])
+fi
 
 LD_PRELOAD_VAR="LD_PRELOAD"
 so="so"
diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index 17befd92..2381ccbc 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -666,6 +666,21 @@ edata_cmp_summary_get(const edata_t *edata) {
 	return result;
 }
 
+#ifdef JEMALLOC_HAVE_INT128
+JEMALLOC_ALWAYS_INLINE unsigned __int128
+edata_cmp_summary_encode(edata_cmp_summary_t src) {
+	return ((unsigned __int128)src.sn << 64) | src.addr;
+}
+
+static inline int
+edata_cmp_summary_comp(edata_cmp_summary_t a, edata_cmp_summary_t b) {
+    unsigned __int128 a_encoded = edata_cmp_summary_encode(a);
+    unsigned __int128 b_encoded = edata_cmp_summary_encode(b);
+    if (a_encoded < b_encoded) return -1;
+    if (a_encoded == b_encoded) return 0;
+    return 1;
+}
+#else
 static inline int
 edata_cmp_summary_comp(edata_cmp_summary_t a, edata_cmp_summary_t b) {
 	/*
@@ -683,6 +698,7 @@ edata_cmp_summary_comp(edata_cmp_summary_t a, edata_cmp_summary_t b) {
 	return (2 * ((a.sn > b.sn) - (a.sn < b.sn))) +
 	       ((a.addr > b.addr) - (a.addr < b.addr));
 }
+#endif
 
 static inline int
 edata_snad_comp(const edata_t *a, const edata_t *b) {
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 7498bc48..7f369873 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -454,6 +454,9 @@
  */
 #undef JEMALLOC_HAVE_RDTSCP
 
+/* If defined, use __int128 for optimization. */
+#undef JEMALLOC_HAVE_INT128
+
 #include "jemalloc/internal/jemalloc_internal_overrides.h"
 
 #endif /* JEMALLOC_INTERNAL_DEFS_H_ */

From 44db479fad82751a3c6a3157e59b9d295f9ec90f Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 20 Sep 2024 20:24:30 -0700
Subject: [PATCH 225/395] Fix the lock owner sanity checking during background
 thread boot.

During boot, some mutexes are not initialized yet, plus there's no point taking
many mutexes while everything is covered by the global init lock, so the locking
assumptions in some functions (e.g. background_thread_enabled_set()) can't be
enforced.  Skip the lock owner check in this case.
---
 include/jemalloc/internal/background_thread_inlines.h | 7 ++++++-
 src/background_thread.c                               | 4 ++--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/include/jemalloc/internal/background_thread_inlines.h b/include/jemalloc/internal/background_thread_inlines.h
index 4ed05d1b..fd3884f1 100644
--- a/include/jemalloc/internal/background_thread_inlines.h
+++ b/include/jemalloc/internal/background_thread_inlines.h
@@ -11,10 +11,15 @@ background_thread_enabled(void) {
 	return atomic_load_b(&background_thread_enabled_state, ATOMIC_RELAXED);
 }
 
+JEMALLOC_ALWAYS_INLINE void
+background_thread_enabled_set_impl(bool state) {
+	atomic_store_b(&background_thread_enabled_state, state, ATOMIC_RELAXED);
+}
+
 JEMALLOC_ALWAYS_INLINE void
 background_thread_enabled_set(tsdn_t *tsdn, bool state) {
 	malloc_mutex_assert_owner(tsdn, &background_thread_lock);
-	atomic_store_b(&background_thread_enabled_state, state, ATOMIC_RELAXED);
+	background_thread_enabled_set_impl(state);
 }
 
 JEMALLOC_ALWAYS_INLINE background_thread_info_t *
diff --git a/src/background_thread.c b/src/background_thread.c
index a5f4dbf7..30c3111c 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -819,7 +819,6 @@ background_thread_boot1(tsdn_t *tsdn, base_t *base) {
 	}
 	max_background_threads = opt_max_background_threads;
 
-	background_thread_enabled_set(tsdn, opt_background_thread);
 	if (malloc_mutex_init(&background_thread_lock,
 	    "background_thread_global",
 	    WITNESS_RANK_BACKGROUND_THREAD_GLOBAL,
@@ -850,7 +849,8 @@ background_thread_boot1(tsdn_t *tsdn, base_t *base) {
 		background_thread_info_init(tsdn, info);
 		malloc_mutex_unlock(tsdn, &info->mtx);
 	}
+	/* Using _impl to bypass the locking check during init. */
+	background_thread_enabled_set_impl(opt_background_thread);
 #endif
-
 	return false;
 }

From 6cc42173cbb2dad6ef5c7e49e6666987ce4cf92c Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 20 Sep 2024 20:33:10 -0700
Subject: [PATCH 226/395] Assert the mutex is locked within
 malloc_mutex_assert_owner().

---
 include/jemalloc/internal/mutex.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h
index ceb07b85..db2bdf37 100644
--- a/include/jemalloc/internal/mutex.h
+++ b/include/jemalloc/internal/mutex.h
@@ -250,6 +250,9 @@ malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 static inline void
 malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 	witness_assert_owner(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
+	if (isthreaded) {
+		assert(malloc_mutex_is_locked(mutex));
+	}
 }
 
 static inline void

From 4f4fd424477142ee9962fcf4e4cd0349d4e6e4d3 Mon Sep 17 00:00:00 2001
From: Dmitry Ilvokhin <d@ilvokhin.com>
Date: Thu, 29 Aug 2024 10:49:31 -0700
Subject: [PATCH 227/395] Remove `strict_min_purge_interval` option

Option `experimental_hpa_strict_min_purge_interval` was expected to be
temporary to simplify rollout of a bugfix. Now, when bugfix rollout is
complete it is safe to remove this option.
---
 include/jemalloc/internal/hpa_opts.h | 10 ----------
 src/ctl.c                            |  5 -----
 src/hpa.c                            | 11 +++--------
 src/jemalloc.c                       |  4 ----
 src/stats.c                          |  1 -
 test/unit/hpa.c                      | 29 +++++++++++++++++-----------
 test/unit/mallctl.c                  |  2 --
 7 files changed, 21 insertions(+), 41 deletions(-)

diff --git a/include/jemalloc/internal/hpa_opts.h b/include/jemalloc/internal/hpa_opts.h
index 15765689..ee2bd40c 100644
--- a/include/jemalloc/internal/hpa_opts.h
+++ b/include/jemalloc/internal/hpa_opts.h
@@ -50,14 +50,6 @@ struct hpa_shard_opts_s {
 	 */
 	uint64_t min_purge_interval_ms;
 
-	/*
-	 * Strictly respect minimum amout of time between purges.
-	 *
-	 * This is an option to provide backward compatibility for staged rollout of
-	 * purging logic fix.
-	 */
-	bool experimental_strict_min_purge_interval;
-
 	/*
 	 * Maximum number of hugepages to purge on each purging attempt.
 	 */
@@ -83,8 +75,6 @@ struct hpa_shard_opts_s {
 	10 * 1000,							\
 	/* min_purge_interval_ms */					\
 	5 * 1000,							\
-	/* experimental_strict_min_purge_interval */			\
-	false,								\
 	/* experimental_max_purge_nhp */				\
 	-1								\
 }
diff --git a/src/ctl.c b/src/ctl.c
index 8608f124..2a9e47f2 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -103,7 +103,6 @@ CTL_PROTO(opt_hpa_slab_max_alloc)
 CTL_PROTO(opt_hpa_hugification_threshold)
 CTL_PROTO(opt_hpa_hugify_delay_ms)
 CTL_PROTO(opt_hpa_min_purge_interval_ms)
-CTL_PROTO(opt_experimental_hpa_strict_min_purge_interval)
 CTL_PROTO(opt_experimental_hpa_max_purge_nhp)
 CTL_PROTO(opt_hpa_dirty_mult)
 CTL_PROTO(opt_hpa_sec_nshards)
@@ -462,8 +461,6 @@ static const ctl_named_node_t opt_node[] = {
 		CTL(opt_hpa_hugification_threshold)},
 	{NAME("hpa_hugify_delay_ms"), CTL(opt_hpa_hugify_delay_ms)},
 	{NAME("hpa_min_purge_interval_ms"), CTL(opt_hpa_min_purge_interval_ms)},
-	{NAME("experimental_hpa_strict_min_purge_interval"),
-		CTL(opt_experimental_hpa_strict_min_purge_interval)},
 	{NAME("experimental_hpa_max_purge_nhp"),
 		CTL(opt_experimental_hpa_max_purge_nhp)},
 	{NAME("hpa_dirty_mult"), CTL(opt_hpa_dirty_mult)},
@@ -2202,8 +2199,6 @@ CTL_RO_NL_GEN(opt_hpa_hugification_threshold,
 CTL_RO_NL_GEN(opt_hpa_hugify_delay_ms, opt_hpa_opts.hugify_delay_ms, uint64_t)
 CTL_RO_NL_GEN(opt_hpa_min_purge_interval_ms, opt_hpa_opts.min_purge_interval_ms,
     uint64_t)
-CTL_RO_NL_GEN(opt_experimental_hpa_strict_min_purge_interval,
-    opt_hpa_opts.experimental_strict_min_purge_interval, bool)
 CTL_RO_NL_GEN(opt_experimental_hpa_max_purge_nhp,
     opt_hpa_opts.experimental_max_purge_nhp, ssize_t)
 
diff --git a/src/hpa.c b/src/hpa.c
index 3d7a6f60..d58a17ec 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -512,14 +512,9 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
 static bool
 hpa_min_purge_interval_passed(tsdn_t *tsdn, hpa_shard_t *shard) {
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
-	if (shard->opts.experimental_strict_min_purge_interval) {
-		uint64_t since_last_purge_ms = shard->central->hooks.ms_since(
-		    &shard->last_purge);
-		if (since_last_purge_ms < shard->opts.min_purge_interval_ms) {
-		     return false;
-		}
-	}
-	return true;
+	uint64_t since_last_purge_ms = shard->central->hooks.ms_since(
+	    &shard->last_purge);
+	return since_last_purge_ms >= shard->opts.min_purge_interval_ms;
 }
 
 /*
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 63f6b302..428a50ef 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1571,10 +1571,6 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    "hpa_min_purge_interval_ms", 0, 0,
 			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, false);
 
-			CONF_HANDLE_BOOL(
-			    opt_hpa_opts.experimental_strict_min_purge_interval,
-			    "experimental_hpa_strict_min_purge_interval");
-
 			CONF_HANDLE_SSIZE_T(
 			    opt_hpa_opts.experimental_max_purge_nhp,
 			    "experimental_hpa_max_purge_nhp", -1, SSIZE_MAX);
diff --git a/src/stats.c b/src/stats.c
index ef025eb3..d5be92d3 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1564,7 +1564,6 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_SIZE_T("hpa_hugification_threshold")
 	OPT_WRITE_UINT64("hpa_hugify_delay_ms")
 	OPT_WRITE_UINT64("hpa_min_purge_interval_ms")
-	OPT_WRITE_BOOL("experimental_hpa_strict_min_purge_interval")
 	OPT_WRITE_SSIZE_T("experimental_hpa_max_purge_nhp")
 	if (je_mallctl("opt.hpa_dirty_mult", (void *)&u32v, &u32sz, NULL, 0)
 	    == 0) {
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index ae8a976c..747f98ef 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -34,8 +34,6 @@ static hpa_shard_opts_t test_hpa_shard_opts_default = {
 	10 * 1000,
 	/* min_purge_interval_ms */
 	5 * 1000,
-	/* experimental_strict_min_purge_interval */
-	false,
 	/* experimental_max_purge_nhp */
 	-1
 };
@@ -53,8 +51,6 @@ static hpa_shard_opts_t test_hpa_shard_opts_purge = {
 	0,
 	/* min_purge_interval_ms */
 	5 * 1000,
-	/* experimental_strict_min_purge_interval */
-	false,
 	/* experimental_max_purge_nhp */
 	-1
 };
@@ -506,7 +502,7 @@ TEST_BEGIN(test_purge_no_infinite_loop) {
 }
 TEST_END
 
-TEST_BEGIN(test_no_experimental_strict_min_purge_interval) {
+TEST_BEGIN(test_no_min_purge_interval) {
 	test_skip_if(!hpa_supported());
 
 	hpa_hooks_t hooks;
@@ -520,6 +516,7 @@ TEST_BEGIN(test_no_experimental_strict_min_purge_interval) {
 
 	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
 	opts.deferral_allowed = true;
+	opts.min_purge_interval_ms = 0;
 
 	hpa_shard_t *shard = create_test_data(&hooks, &opts);
 
@@ -547,7 +544,7 @@ TEST_BEGIN(test_no_experimental_strict_min_purge_interval) {
 }
 TEST_END
 
-TEST_BEGIN(test_experimental_strict_min_purge_interval) {
+TEST_BEGIN(test_min_purge_interval) {
 	test_skip_if(!hpa_supported());
 
 	hpa_hooks_t hooks;
@@ -561,7 +558,6 @@ TEST_BEGIN(test_experimental_strict_min_purge_interval) {
 
 	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
 	opts.deferral_allowed = true;
-	opts.experimental_strict_min_purge_interval = true;
 
 	hpa_shard_t *shard = create_test_data(&hooks, &opts);
 
@@ -631,6 +627,7 @@ TEST_BEGIN(test_purge) {
 		pai_dalloc(tsdn, &shard->pai, edatas[i],
 		    &deferred_work_generated);
 	}
+	nstime_init2(&defer_curtime, 6, 0);
 	hpa_shard_do_deferred_work(tsdn, shard);
 
 	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
@@ -642,9 +639,15 @@ TEST_BEGIN(test_purge) {
 	expect_zu_eq(2, ndefer_purge_calls, "Expect purges");
 	ndefer_purge_calls = 0;
 
+	nstime_init2(&defer_curtime, 12, 0);
 	hpa_shard_do_deferred_work(tsdn, shard);
 
-	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	/*
+	 * We are still having 5 active hugepages and now they are
+	 * matching hugification criteria long enough to actually hugify them.
+	 */
+	expect_zu_eq(5, ndefer_hugify_calls, "Expect hugification");
+	ndefer_hugify_calls = 0;
 	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
 	/*
 	 * We still have completely dirty hugepage, but we are below
@@ -691,6 +694,7 @@ TEST_BEGIN(test_experimental_max_purge_nhp) {
 		pai_dalloc(tsdn, &shard->pai, edatas[i],
 		    &deferred_work_generated);
 	}
+	nstime_init2(&defer_curtime, 6, 0);
 	hpa_shard_do_deferred_work(tsdn, shard);
 
 	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
@@ -702,14 +706,17 @@ TEST_BEGIN(test_experimental_max_purge_nhp) {
 	expect_zu_eq(1, ndefer_purge_calls, "Expect purges");
 	ndefer_purge_calls = 0;
 
+	nstime_init2(&defer_curtime, 12, 0);
 	hpa_shard_do_deferred_work(tsdn, shard);
 
-	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(5, ndefer_hugify_calls, "Expect hugification");
+	ndefer_hugify_calls = 0;
 	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
 	/* We still above the limit for dirty pages. */
 	expect_zu_eq(1, ndefer_purge_calls, "Expect purge");
 	ndefer_purge_calls = 0;
 
+	nstime_init2(&defer_curtime, 18, 0);
 	hpa_shard_do_deferred_work(tsdn, shard);
 
 	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
@@ -741,8 +748,8 @@ main(void) {
 	    test_alloc_dalloc_batch,
 	    test_defer_time,
 	    test_purge_no_infinite_loop,
-	    test_no_experimental_strict_min_purge_interval,
-	    test_experimental_strict_min_purge_interval,
+	    test_no_min_purge_interval,
+	    test_min_purge_interval,
 	    test_purge,
 	    test_experimental_max_purge_nhp);
 }
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 028a27f7..ffe5c411 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -292,8 +292,6 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(size_t, hpa_sec_max_bytes, always);
 	TEST_MALLCTL_OPT(size_t, hpa_sec_bytes_after_flush, always);
 	TEST_MALLCTL_OPT(size_t, hpa_sec_batch_fill_extra, always);
-	TEST_MALLCTL_OPT(bool, experimental_hpa_strict_min_purge_interval,
-	    always);
 	TEST_MALLCTL_OPT(ssize_t, experimental_hpa_max_purge_nhp, always);
 	TEST_MALLCTL_OPT(unsigned, narenas, always);
 	TEST_MALLCTL_OPT(const char *, percpu_arena, always);

From 1c900088c33402cc8bb0ea78dc1338ab6c087e0c Mon Sep 17 00:00:00 2001
From: Guangli Dai <gdai@meta.com>
Date: Thu, 26 Sep 2024 15:40:52 -0700
Subject: [PATCH 228/395] Do not support hpa if HUGEPAGE is too large.

---
 src/hpa.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/hpa.c b/src/hpa.c
index d58a17ec..d1558821 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -51,6 +51,10 @@ hpa_supported(void) {
 	if (HUGEPAGE_PAGES == 1) {
 		return false;
 	}
+	/* As mentioned in pages.h, do not support If HUGEPAGE is too large. */
+	if (HUGEPAGE > HUGEPAGE_MAX_EXPECTED_SIZE) {
+		return false;
+	}
 	return true;
 }
 

From 3a0d9cdadb8a0dbfd180367459721d13eab0e116 Mon Sep 17 00:00:00 2001
From: Ben Niu <beniu@microsoft.com>
Date: Fri, 30 Aug 2024 17:32:42 -0700
Subject: [PATCH 229/395] Use MSVC __declspec(thread) for TSD on Windows

---
 include/jemalloc/internal/tsd_win.h | 74 ++++++++++++++++++++++++++++-
 src/tsd.c                           |  6 ++-
 2 files changed, 78 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/tsd_win.h b/include/jemalloc/internal/tsd_win.h
index 4b40a8ab..559ee78f 100644
--- a/include/jemalloc/internal/tsd_win.h
+++ b/include/jemalloc/internal/tsd_win.h
@@ -7,11 +7,17 @@
 #include "jemalloc/internal/tsd_internals.h"
 #include "jemalloc/internal/tsd_types.h"
 
+/* val should always be the first field of tsd_wrapper_t since accessing
+   val is the common path and having val as the first field makes it possible
+   that converting a pointer to tsd_wrapper_t to a pointer to val is no more
+   than a type cast. */
 typedef struct {
-	bool initialized;
 	tsd_t val;
+	bool initialized;
 } tsd_wrapper_t;
 
+#if defined(JEMALLOC_LEGACY_WINDOWS_SUPPORT) || !defined(_MSC_VER)
+
 extern DWORD tsd_tsd;
 extern tsd_wrapper_t tsd_boot_wrapper;
 extern bool tsd_booted;
@@ -165,3 +171,69 @@ tsd_set(tsd_t *val) {
 	}
 	wrapper->initialized = true;
 }
+
+#else // defined(JEMALLOC_LEGACY_WINDOWS_SUPPORT) || !defined(_MSC_VER)
+
+#define JEMALLOC_TSD_TYPE_ATTR(type) __declspec(thread) type
+
+extern JEMALLOC_TSD_TYPE_ATTR(tsd_wrapper_t) tsd_wrapper_tls;
+extern bool tsd_booted;
+
+/* Initialization/cleanup. */
+JEMALLOC_ALWAYS_INLINE bool
+tsd_cleanup_wrapper(void) {
+	if (tsd_wrapper_tls.initialized) {
+		tsd_wrapper_tls.initialized = false;
+		tsd_cleanup(&tsd_wrapper_tls.val);
+		if (tsd_wrapper_tls.initialized) {
+			/* Trigger another cleanup round. */
+			return true;
+		}
+	}
+	return false;
+}
+
+/* Initialization/cleanup. */
+JEMALLOC_ALWAYS_INLINE bool
+tsd_boot0(void) {
+	_malloc_tsd_cleanup_register(tsd_cleanup_wrapper);
+	tsd_booted = true;
+	return false;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+tsd_boot1(void) {
+	/* Do nothing. */
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsd_boot(void) {
+	return tsd_boot0();
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsd_booted_get(void) {
+	return tsd_booted;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsd_get_allocates(void) {
+	return false;
+}
+
+/* Get/set. */
+JEMALLOC_ALWAYS_INLINE tsd_t *
+tsd_get(bool init) {
+	return &(tsd_wrapper_tls.val);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+tsd_set(tsd_t *val) {
+	assert(tsd_booted);
+	if (likely(&(tsd_wrapper_tls.val) != val)) {
+		tsd_wrapper_tls.val = (*val);
+	}
+	tsd_wrapper_tls.initialized = true;
+}
+
+#endif // defined(JEMALLOC_LEGACY_WINDOWS_SUPPORT) || !defined(_MSC_VER)
diff --git a/src/tsd.c b/src/tsd.c
index c9ae2d64..0a2ccc59 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -22,8 +22,12 @@ JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls = TSD_INITIALIZER;
 pthread_key_t tsd_tsd;
 bool tsd_booted = false;
 #elif (defined(_WIN32))
+#if defined(JEMALLOC_LEGACY_WINDOWS_SUPPORT) || !defined(_MSC_VER)
 DWORD tsd_tsd;
-tsd_wrapper_t tsd_boot_wrapper = {false, TSD_INITIALIZER};
+tsd_wrapper_t tsd_boot_wrapper = {TSD_INITIALIZER, false};
+#else
+JEMALLOC_TSD_TYPE_ATTR(tsd_wrapper_t) tsd_wrapper_tls = { TSD_INITIALIZER, false };
+#endif
 bool tsd_booted = false;
 #if JEMALLOC_WIN32_TLSGETVALUE2
 TGV2 tls_get_value2 = NULL;

From edc1576f03d15a22b968828b68a074d9be6e5cc0 Mon Sep 17 00:00:00 2001
From: Nathan Slingerland <slinger@fb.com>
Date: Mon, 16 Sep 2024 11:00:37 -0700
Subject: [PATCH 230/395] Add safe frame-pointer backtrace unwinder

---
 INSTALL.md                                    |   7 +
 Makefile.in                                   |   1 +
 configure.ac                                  |  28 +++
 doc/jemalloc.xml.in                           |  10 ++
 .../internal/jemalloc_internal_defs.h.in      |   3 +
 .../jemalloc/internal/jemalloc_preamble.h.in  |   7 +
 include/jemalloc/internal/malloc_io.h         |   7 +
 include/jemalloc/internal/prof_sys.h          |   1 +
 src/ctl.c                                     |   3 +
 src/prof_stack_range.c                        | 161 ++++++++++++++++++
 src/prof_sys.c                                |  42 ++++-
 src/stats.c                                   |   1 +
 test/unit/mallctl.c                           |   1 +
 13 files changed, 271 insertions(+), 1 deletion(-)
 create mode 100644 src/prof_stack_range.c

diff --git a/INSTALL.md b/INSTALL.md
index f772dd09..7f6137ee 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -139,6 +139,7 @@ any of the following arguments (not a definitive list) to 'configure':
     in the following list that appears to function correctly:
 
     + libunwind      (requires --enable-prof-libunwind)
+    + frame pointer  (requires --enable-prof-frameptr)
     + libgcc         (unless --disable-prof-libgcc)
     + gcc intrinsics (unless --disable-prof-gcc)
 
@@ -147,6 +148,12 @@ any of the following arguments (not a definitive list) to 'configure':
     Use the libunwind library (http://www.nongnu.org/libunwind/) for stack
     backtracing.
 
+* `--enable-prof-frameptr`
+
+    Use the optimized frame pointer unwinder for stack backtracing. Safe
+    to use in mixed code (with and without frame pointers) - but requires
+    frame pointers to produce meaningful stacks. Linux only.
+
 * `--disable-prof-libgcc`
 
     Disable the use of libgcc's backtracing functionality.
diff --git a/Makefile.in b/Makefile.in
index 94208f37..6a386720 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -142,6 +142,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/prof_data.c \
 	$(srcroot)src/prof_log.c \
 	$(srcroot)src/prof_recent.c \
+	$(srcroot)src/prof_stack_range.c \
 	$(srcroot)src/prof_stats.c \
 	$(srcroot)src/prof_sys.c \
 	$(srcroot)src/psset.c \
diff --git a/configure.ac b/configure.ac
index 2e7f610d..4776aa60 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1448,6 +1448,33 @@ if test "x$backtrace_method" = "x" -a "x$enable_prof_libunwind" = "x1" ; then
   fi
 fi
 
+if test `uname -s` = "Linux"
+then
+  AC_ARG_ENABLE([prof-frameptr],
+    [AS_HELP_STRING([--enable-prof-frameptr], [Use optimized frame pointer unwinder for backtracing (Linux only)])],
+  [if test "x$enable_prof_frameptr" = "xno" ; then
+    enable_prof_frameptr="0"
+  else
+    enable_prof_frameptr="1"
+    if test "x$enable_prof" = "x0" ; then
+      AC_MSG_ERROR([--enable-prof-frameptr should only be used with --enable-prof])
+    fi
+  fi
+  ],
+  [enable_prof_frameptr="0"]
+  )
+  if test "x$backtrace_method" = "x" -a "x$enable_prof_frameptr" = "x1" \
+      -a "x$GCC" = "xyes" ; then
+    JE_CFLAGS_ADD([-fno-omit-frame-pointer])
+    backtrace_method="frame pointer linux"
+    AC_DEFINE([JEMALLOC_PROF_FRAME_POINTER], [ ], [ ])
+  else
+    enable_prof_frameptr="0"
+  fi
+else
+  enable_prof_frameptr="0"
+fi
+
 AC_ARG_ENABLE([prof-libgcc],
   [AS_HELP_STRING([--disable-prof-libgcc],
   [Do not use libgcc for backtracing])],
@@ -2847,6 +2874,7 @@ AC_MSG_RESULT([stats              : ${enable_stats}])
 AC_MSG_RESULT([experimental_smallocx : ${enable_experimental_smallocx}])
 AC_MSG_RESULT([prof               : ${enable_prof}])
 AC_MSG_RESULT([prof-libunwind     : ${enable_prof_libunwind}])
+AC_MSG_RESULT([prof-frameptr      : ${enable_prof_frameptr}])
 AC_MSG_RESULT([prof-libgcc        : ${enable_prof_libgcc}])
 AC_MSG_RESULT([prof-gcc           : ${enable_prof_gcc}])
 AC_MSG_RESULT([fill               : ${enable_fill}])
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 89a176e0..59058bad 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -897,6 +897,16 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
         during build configuration.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="config.prof_frameptr">
+        <term>
+          <mallctl>config.prof_frameptr</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para><option>--enable-prof-frameptr</option> was specified
+        during build configuration.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="config.stats">
         <term>
           <mallctl>config.stats</mallctl>
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 7f369873..7ad75a06 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -167,6 +167,9 @@
 /* Use gcc intrinsics for profile backtracing if defined. */
 #undef JEMALLOC_PROF_GCC
 
+/* Use frame pointer for profile backtracing if defined. Linux only. */
+#undef JEMALLOC_PROF_FRAME_POINTER
+
 /* JEMALLOC_PAGEID enabled page id */
 #undef JEMALLOC_PAGEID
 
diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in
index 6b55e47f..ebce5d56 100644
--- a/include/jemalloc/internal/jemalloc_preamble.h.in
+++ b/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -114,6 +114,13 @@ static const bool config_prof_libunwind =
     false
 #endif
     ;
+static const bool config_prof_frameptr =
+#ifdef JEMALLOC_PROF_FRAME_POINTER
+    true
+#else
+    false
+#endif
+    ;
 static const bool maps_coalesce =
 #ifdef JEMALLOC_MAPS_COALESCE
     true
diff --git a/include/jemalloc/internal/malloc_io.h b/include/jemalloc/internal/malloc_io.h
index bb43fb5c..9c7c6ec2 100644
--- a/include/jemalloc/internal/malloc_io.h
+++ b/include/jemalloc/internal/malloc_io.h
@@ -154,5 +154,12 @@ static inline int malloc_close(int fd) {
 #endif
 }
 
+static inline off_t malloc_lseek(int fd, off_t offset, int whence) {
+#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_lseek)
+  return (off_t)syscall(SYS_lseek, fd, offset, whence);
+#else
+  return lseek(fd, offset, whence);
+#endif
+}
 
 #endif /* JEMALLOC_INTERNAL_MALLOC_IO_H */
diff --git a/include/jemalloc/internal/prof_sys.h b/include/jemalloc/internal/prof_sys.h
index e6e7f06f..3377ba92 100644
--- a/include/jemalloc/internal/prof_sys.h
+++ b/include/jemalloc/internal/prof_sys.h
@@ -20,6 +20,7 @@ void prof_fdump_impl(tsd_t *tsd);
 void prof_idump_impl(tsd_t *tsd);
 bool prof_mdump_impl(tsd_t *tsd, const char *filename);
 void prof_gdump_impl(tsd_t *tsd);
+uintptr_t prof_thread_stack_start(uintptr_t stack_end);
 
 /* Used in unit tests. */
 typedef int (prof_sys_thread_name_read_t)(char *buf, size_t limit);
diff --git a/src/ctl.c b/src/ctl.c
index 2a9e47f2..690bbabc 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -89,6 +89,7 @@ CTL_PROTO(config_opt_safety_checks)
 CTL_PROTO(config_prof)
 CTL_PROTO(config_prof_libgcc)
 CTL_PROTO(config_prof_libunwind)
+CTL_PROTO(config_prof_frameptr)
 CTL_PROTO(config_stats)
 CTL_PROTO(config_utrace)
 CTL_PROTO(config_xmalloc)
@@ -436,6 +437,7 @@ static const ctl_named_node_t	config_node[] = {
 	{NAME("prof"),		CTL(config_prof)},
 	{NAME("prof_libgcc"),	CTL(config_prof_libgcc)},
 	{NAME("prof_libunwind"), CTL(config_prof_libunwind)},
+	{NAME("prof_frameptr"), CTL(config_prof_frameptr)},
 	{NAME("stats"),		CTL(config_stats)},
 	{NAME("utrace"),	CTL(config_utrace)},
 	{NAME("xmalloc"),	CTL(config_xmalloc)}
@@ -2178,6 +2180,7 @@ CTL_RO_CONFIG_GEN(config_opt_safety_checks, bool)
 CTL_RO_CONFIG_GEN(config_prof, bool)
 CTL_RO_CONFIG_GEN(config_prof_libgcc, bool)
 CTL_RO_CONFIG_GEN(config_prof_libunwind, bool)
+CTL_RO_CONFIG_GEN(config_prof_frameptr, bool)
 CTL_RO_CONFIG_GEN(config_stats, bool)
 CTL_RO_CONFIG_GEN(config_utrace, bool)
 CTL_RO_CONFIG_GEN(config_xmalloc, bool)
diff --git a/src/prof_stack_range.c b/src/prof_stack_range.c
new file mode 100644
index 00000000..c3458044
--- /dev/null
+++ b/src/prof_stack_range.c
@@ -0,0 +1,161 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/malloc_io.h"
+#include "jemalloc/internal/prof_sys.h"
+
+#if defined (__linux__)
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h> // strtoul
+#include <string.h>
+#include <unistd.h>
+
+static int prof_mapping_containing_addr(
+    uintptr_t addr,
+    const char* maps_path,
+    uintptr_t* mm_start,
+    uintptr_t* mm_end) {
+  int ret = ENOENT; // not found
+  *mm_start = *mm_end = 0;
+
+  // Each line of /proc/<pid>/maps is:
+  // <start>-<end> <perms> <offset> <dev> <inode> <pathname>
+  //
+  // The fields we care about are always within the first 34 characters so
+  // as long as `buf` contains the start of a mapping line it can always be
+  // parsed.
+  static const int kMappingFieldsWidth = 34;
+
+  int fd = -1;
+  char buf[4096];
+  ssize_t remaining = 0; // actual number of bytes read to buf
+  char* line = NULL;
+
+  while (1) {
+    if (fd < 0) {
+      // case 0: initial open of maps file
+      fd = malloc_open(maps_path, O_RDONLY);
+      if (fd < 0) {
+        return errno;
+      }
+
+      remaining = malloc_read_fd(fd, buf, sizeof(buf));
+      if (remaining <= 0) {
+        break;
+      }
+      line = buf;
+    } else if (line == NULL) {
+      // case 1: no newline found in buf
+      remaining = malloc_read_fd(fd, buf, sizeof(buf));
+      if (remaining <= 0) {
+        break;
+      }
+      line = memchr(buf, '\n', remaining);
+      if (line != NULL) {
+        line++; // advance to character after newline
+        remaining -= (line - buf);
+      }
+    } else if (line != NULL && remaining < kMappingFieldsWidth) {
+      // case 2: found newline but insufficient characters remaining in buf
+
+      // fd currently points to the character immediately after the last
+      // character in buf. Seek fd to the character after the newline.
+      if (malloc_lseek(fd, -remaining, SEEK_CUR) == -1) {
+        ret = errno;
+        break;
+      }
+
+      remaining = malloc_read_fd(fd, buf, sizeof(buf));
+      if (remaining <= 0) {
+        break;
+      }
+      line = buf;
+    } else {
+      // case 3: found newline and sufficient characters to parse
+
+      // parse <start>-<end>
+      char* tmp = line;
+      uintptr_t start_addr = strtoul(tmp, &tmp, 16);
+      if (addr >= start_addr) {
+        tmp++; // advance to character after '-'
+        uintptr_t end_addr = strtoul(tmp, &tmp, 16);
+        if (addr < end_addr) {
+          *mm_start = start_addr;
+          *mm_end = end_addr;
+          ret = 0;
+          break;
+        }
+      }
+
+      // Advance to character after next newline in the current buf.
+      char* prev_line = line;
+      line = memchr(line, '\n', remaining);
+      if (line != NULL) {
+        line++; // advance to character after newline
+        remaining -= (line - prev_line);
+      }
+    }
+  }
+
+  malloc_close(fd);
+  return ret;
+}
+
+static uintptr_t prof_main_thread_stack_start(const char* stat_path) {
+  uintptr_t stack_start = 0;
+
+  int fd = malloc_open(stat_path, O_RDONLY);
+  if (fd < 0) {
+    return 0;
+  }
+
+  char buf[512];
+  ssize_t n = malloc_read_fd(fd, buf, sizeof(buf) - 1);
+  if (n >= 0) {
+    buf[n] = '\0';
+    if (sscanf(
+            buf,
+            "%*d (%*[^)]) %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %*u %*u %*d %*d %*d %*d %*d %*d %*u %*u %*d %*u %*u %*u %"FMTuPTR,
+            &stack_start) != 1) {
+    }
+  }
+  malloc_close(fd);
+  return stack_start;
+}
+
+uintptr_t prof_thread_stack_start(uintptr_t stack_end) {
+  pid_t pid = getpid();
+  pid_t tid = gettid();
+  if (pid == tid) {
+    char stat_path[32]; // "/proc/<pid>/stat"
+    malloc_snprintf(stat_path, sizeof(stat_path), "/proc/%d/stat", pid);
+    return prof_main_thread_stack_start(stat_path);
+  } else {
+    // NOTE: Prior to kernel 4.5 an entry for every thread stack was included in
+    // /proc/<pid>/maps as [STACK:<tid>]. Starting with kernel 4.5 only the main
+    // thread stack remains as the [stack] mapping. For other thread stacks the
+    // mapping is still visible in /proc/<pid>/task/<tid>/maps (though not
+    // labeled as [STACK:tid]).
+    // https://lists.ubuntu.com/archives/kernel-team/2016-March/074681.html
+    char maps_path[64]; // "/proc/<pid>/task/<tid>/maps"
+    malloc_snprintf(maps_path, sizeof(maps_path), "/proc/%d/task/%d/maps", pid, tid);
+
+    uintptr_t mm_start, mm_end;
+    if (prof_mapping_containing_addr(
+            stack_end, maps_path, &mm_start, &mm_end) != 0) {
+      return 0;
+    }
+    return mm_end;
+  }
+}
+
+#else
+
+uintptr_t prof_thread_stack_start(UNUSED uintptr_t stack_end) {
+  return 0;
+}
+
+#endif // __linux__
diff --git a/src/prof_sys.c b/src/prof_sys.c
index 8a904040..f0bc8b4b 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -3,6 +3,7 @@
 
 #include "jemalloc/internal/buf_writer.h"
 #include "jemalloc/internal/ctl.h"
+#include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/prof_data.h"
 #include "jemalloc/internal/prof_sys.h"
 
@@ -98,6 +99,45 @@ prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
 
 	_Unwind_Backtrace(prof_unwind_callback, &data);
 }
+#elif (defined(JEMALLOC_PROF_FRAME_POINTER))
+JEMALLOC_DIAGNOSTIC_PUSH
+JEMALLOC_DIAGNOSTIC_IGNORE_FRAME_ADDRESS
+static void
+prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
+  // stack_start - highest possible valid stack address (assumption: stacks grow downward)
+  //   stack_end - current stack frame and lowest possible valid stack address
+  //               (all earlier frames will be at higher addresses than this)
+
+  // always safe to get the current stack frame address
+  void** stack_end = (void**)__builtin_frame_address(0);
+  if (stack_end == NULL) {
+    *len = 0;
+    return;
+  }
+
+  static __thread void **stack_start = (void **)0;  // thread local
+  if (stack_start == 0 || stack_end >= stack_start) {
+    stack_start = (void**)prof_thread_stack_start((uintptr_t)stack_end);
+  }
+
+  if (stack_start == 0 || stack_end >= stack_start) {
+    *len = 0;
+    return;
+  }
+
+  unsigned ii = 0;
+  void** fp = (void**)stack_end;
+  while (fp < stack_start && ii < max_len) {
+    vec[ii++] = fp[1];
+    void** fp_prev = fp;
+    fp = fp[0];
+    if (unlikely(fp <= fp_prev)) { // sanity check forward progress
+      break;
+    }
+  }
+  *len = ii;
+}
+JEMALLOC_DIAGNOSTIC_POP
 #elif (defined(JEMALLOC_PROF_GCC))
 JEMALLOC_DIAGNOSTIC_PUSH
 JEMALLOC_DIAGNOSTIC_IGNORE_FRAME_ADDRESS
@@ -484,7 +524,7 @@ prof_getpid(void) {
 #endif
 }
 
-long
+static long
 prof_get_pid_namespace() {
 	long ret = 0;
 
diff --git a/src/stats.c b/src/stats.c
index d5be92d3..1e607d9e 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1467,6 +1467,7 @@ stats_general_print(emitter_t *emitter) {
 	CONFIG_WRITE_BOOL(prof);
 	CONFIG_WRITE_BOOL(prof_libgcc);
 	CONFIG_WRITE_BOOL(prof_libunwind);
+	CONFIG_WRITE_BOOL(prof_frameptr);
 	CONFIG_WRITE_BOOL(stats);
 	CONFIG_WRITE_BOOL(utrace);
 	CONFIG_WRITE_BOOL(xmalloc);
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index ffe5c411..65e84370 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -255,6 +255,7 @@ TEST_BEGIN(test_mallctl_config) {
 	TEST_MALLCTL_CONFIG(prof, bool);
 	TEST_MALLCTL_CONFIG(prof_libgcc, bool);
 	TEST_MALLCTL_CONFIG(prof_libunwind, bool);
+	TEST_MALLCTL_CONFIG(prof_frameptr, bool);
 	TEST_MALLCTL_CONFIG(stats, bool);
 	TEST_MALLCTL_CONFIG(utrace, bool);
 	TEST_MALLCTL_CONFIG(xmalloc, bool);

From 8c2b8bcf24ec67523d310f46c38730b1d3348b39 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 4 Oct 2024 11:18:43 -0700
Subject: [PATCH 231/395] Update doc to reflect muzzy decay is disabled by
 default.

It has been disabled since 5.2.0 (in #1421).
---
 doc/jemalloc.xml.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 59058bad..2a8573b8 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1195,7 +1195,7 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
         purged according to a sigmoidal decay curve that starts and ends with
         zero purge rate.  A decay time of 0 causes all unused muzzy pages to be
         purged immediately upon creation.  A decay time of -1 disables purging.
-        The default decay time is 10 seconds.  See <link
+        Muzzy decay is disabled by default (with decay time 0).  See <link
         linkend="arenas.muzzy_decay_ms"><mallctl>arenas.muzzy_decay_ms</mallctl></link>
         and <link
         linkend="arena.i.muzzy_decay_ms"><mallctl>arena.&lt;i&gt;.muzzy_decay_ms</mallctl></link>

From 02251c0070969e526cae3dde6d7b2610a4ed87ef Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 4 Oct 2024 11:29:04 -0700
Subject: [PATCH 232/395] Update the configure cache file example in INSTALL.md

---
 INSTALL.md | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/INSTALL.md b/INSTALL.md
index 7f6137ee..2333f13d 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -322,13 +322,13 @@ behavior:
     'configure' uses this to find programs.
 
 In some cases it may be necessary to work around configuration results that do
-not match reality.  For example, Linux 4.5 added support for the MADV_FREE flag
-to madvise(2), which can cause problems if building on a host with MADV_FREE
-support and deploying to a target without.  To work around this, use a cache
-file to override the relevant configuration variable defined in configure.ac,
-e.g.:
+not match reality.  For example, Linux 3.4 added support for the MADV_DONTDUMP
+flag to madvise(2), which can cause problems if building on a host with
+MADV_DONTDUMP support and deploying to a target without.  To work around this,
+use a cache file to override the relevant configuration variable defined in
+configure.ac, e.g.:
 
-    echo "je_cv_madv_free=no" > config.cache && ./configure -C
+    echo "je_cv_madv_dontdump=no" > config.cache && ./configure -C
 
 
 ## Advanced compilation

From 397827a27d0e5092a15812eb421a2762c773920f Mon Sep 17 00:00:00 2001
From: Guangli Dai <gdai@meta.com>
Date: Fri, 16 Aug 2024 16:36:57 -0700
Subject: [PATCH 233/395] Updated jeprof with more symbols to filter.

---
 bin/jeprof.in | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/bin/jeprof.in b/bin/jeprof.in
index 7aff8643..9cae84ed 100644
--- a/bin/jeprof.in
+++ b/bin/jeprof.in
@@ -2961,6 +2961,20 @@ sub RemoveUninterestingFrames {
                       'void* newImpl',
                       'fallbackNewImpl',
                       'void* fallbackNewImpl',
+                      'fallback_impl',
+                      'void* fallback_impl',
+                      'imalloc',
+                      'int imalloc',
+                      'imalloc_body',
+                      'int imalloc_body',
+                      'prof_alloc_prep',
+                      'prof_tctx_t *prof_alloc_prep',
+                      'prof_backtrace_impl',
+                      'void prof_backtrace_impl',
+                      'je_prof_backtrace',
+                      'void je_prof_backtrace',
+                      'je_prof_tctx_create',
+                      'prof_tctx_t* prof_tctx_create',
                       '@JEMALLOC_PREFIX@free',
                       '@JEMALLOC_PREFIX@memalign',
                       '@JEMALLOC_PREFIX@posix_memalign',
@@ -2969,8 +2983,12 @@ sub RemoveUninterestingFrames {
                       '@JEMALLOC_PREFIX@valloc',
                       '@JEMALLOC_PREFIX@realloc',
                       '@JEMALLOC_PREFIX@mallocx',
+                      'irallocx_prof',
+                      'void *irallocx_prof',
                       '@JEMALLOC_PREFIX@rallocx',
                       'do_rallocx',
+                      'ixallocx_prof',
+                      'size_t ixallocx_prof',
                       '@JEMALLOC_PREFIX@xallocx',
                       '@JEMALLOC_PREFIX@dallocx',
                       '@JEMALLOC_PREFIX@sdallocx',
@@ -3083,6 +3101,8 @@ sub RemoveUninterestingFrames {
     foreach my $a (@addrs) {
       if (exists($symbols->{$a})) {
         my $func = $symbols->{$a}->[0];
+        # Remove suffix in the symbols following space when filtering.
+        $func =~ s/ .*//;
         if ($skip{$func} || ($func =~ m/$skip_regexp/)) {
           # Throw away the portion of the backtrace seen so far, under the
           # assumption that previous frames were for functions internal to the

From 6d625d5e5e06b5a07ab90c37ef6b03b55ca1c00a Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 4 Oct 2024 15:25:54 -0700
Subject: [PATCH 234/395] Add support for clock_gettime_nsec_np()

Prefer clock_gettime_nsec_np(CLOCK_UPTIME_RAW) to mach_absolute_time().
---
 configure.ac                                        | 10 ++++++++++
 .../jemalloc/internal/jemalloc_internal_defs.h.in   |  5 +++++
 src/nstime.c                                        | 13 ++++++++++++-
 3 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index 4776aa60..9698997a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2126,6 +2126,16 @@ if test "x${je_cv_clock_realtime}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_HAVE_CLOCK_REALTIME], [ ], [ ])
 fi
 
+dnl Check for clock_gettime_nsec_np().
+JE_COMPILABLE([clock_gettime_nsec_np()], [
+#include <time.h>
+], [
+	clock_gettime_nsec_np(CLOCK_UPTIME_RAW);
+], [je_cv_clock_gettime_nsec_np])
+if test "x${je_cv_clock_gettime_nsec_np}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_HAVE_CLOCK_GETTIME_NSEC_NP], [ ], [ ])
+fi
+
 dnl Use syscall(2) (if available) by default.
 AC_ARG_ENABLE([syscall],
   [AS_HELP_STRING([--disable-syscall], [Disable use of syscall(2)])],
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 7ad75a06..f5b1a924 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -117,6 +117,11 @@
  */
 #undef JEMALLOC_HAVE_CLOCK_REALTIME
 
+/*
+ * Defined if clock_gettime_nsec_np(CLOCK_UPTIME_RAW) is available.
+ */
+#undef JEMALLOC_HAVE_CLOCK_GETTIME_NSEC_NP
+
 /*
  * Defined if _malloc_thread_cleanup() exists.  At least in the case of
  * FreeBSD, pthread_key_create() allocates, which if used during malloc
diff --git a/src/nstime.c b/src/nstime.c
index 72f04227..15c53330 100644
--- a/src/nstime.c
+++ b/src/nstime.c
@@ -201,11 +201,22 @@ nstime_get(nstime_t *time) {
 	clock_gettime(CLOCK_MONOTONIC, &ts);
 	nstime_init2(time, ts.tv_sec, ts.tv_nsec);
 }
+#elif defined(JEMALLOC_HAVE_CLOCK_GETTIME_NSEC_NP)
+#  define NSTIME_MONOTONIC true
+static void
+nstime_get(nstime_t *time) {
+	nstime_init(time, clock_gettime_nsec_np(CLOCK_UPTIME_RAW));
+}
 #elif defined(JEMALLOC_HAVE_MACH_ABSOLUTE_TIME)
 #  define NSTIME_MONOTONIC true
 static void
 nstime_get(nstime_t *time) {
-	nstime_init(time, mach_absolute_time());
+	static mach_timebase_info_data_t sTimebaseInfo;
+	if (sTimebaseInfo.denom == 0) {
+		(void) mach_timebase_info(&sTimebaseInfo);
+	}
+	nstime_init(time, mach_absolute_time() * sTimebaseInfo.numer
+	    / sTimebaseInfo.denom);
 }
 #else
 #  define NSTIME_MONOTONIC false

From 2a693b83d2d1631b6a856d178125e1c47c12add9 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 14 Oct 2024 10:28:50 -0700
Subject: [PATCH 235/395] Fix the sized-dealloc safety check abort msg.

---
 include/jemalloc/internal/jemalloc_internal_inlines_c.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index 432ec17c..854aec1e 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -425,8 +425,8 @@ maybe_check_alloc_ctx(tsd_t *tsd, void *ptr, emap_alloc_ctx_t *alloc_ctx) {
                 if (alloc_ctx->szind != dbg_ctx.szind) {
                         safety_check_fail_sized_dealloc(
                             /* current_dealloc */ true, ptr,
-                            /* true_size */ sz_size2index(dbg_ctx.szind),
-                            /* input_size */ sz_size2index(alloc_ctx->szind));
+                            /* true_size */ sz_index2size(dbg_ctx.szind),
+                            /* input_size */ sz_index2size(alloc_ctx->szind));
                         return true;
                 }
                 if (alloc_ctx->slab != dbg_ctx.slab) {

From b9758afff037fb074a440bb5590ed113cad78bd3 Mon Sep 17 00:00:00 2001
From: Dmitry Ilvokhin <d@ilvokhin.com>
Date: Fri, 25 Oct 2024 09:47:43 -0700
Subject: [PATCH 236/395] Add `nstime_ms_since` to get time since in ms

Milliseconds are used a lot in hpa, so it is convenient to have
`nstime_ms_since` function instead of dividing to `MILLION` constantly.

For consistency renamed `nstime_msec` to `nstime_ms` as `ms` abbreviation
is used much more commonly across codebase than `msec`.

```
$ grep -Rn '_msec' include src | wc -l
2

$ grep -RPn '_ms( |,|:)' include src | wc -l
72
```

Function `nstime_msec` wasn't used anywhere in the code yet.
---
 include/jemalloc/internal/nstime.h |  3 ++-
 src/hpa_hooks.c                    |  2 +-
 src/nstime.c                       | 10 ++++++++--
 test/unit/nstime.c                 | 19 +++++++++++++++++++
 4 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/nstime.h b/include/jemalloc/internal/nstime.h
index 440a4d15..1f32df58 100644
--- a/include/jemalloc/internal/nstime.h
+++ b/include/jemalloc/internal/nstime.h
@@ -26,8 +26,8 @@ static const nstime_t nstime_zero = NSTIME_ZERO_INITIALIZER;
 void nstime_init(nstime_t *time, uint64_t ns);
 void nstime_init2(nstime_t *time, uint64_t sec, uint64_t nsec);
 uint64_t nstime_ns(const nstime_t *time);
+uint64_t nstime_ms(const nstime_t *time);
 uint64_t nstime_sec(const nstime_t *time);
-uint64_t nstime_msec(const nstime_t *time);
 uint64_t nstime_nsec(const nstime_t *time);
 void nstime_copy(nstime_t *time, const nstime_t *source);
 int nstime_compare(const nstime_t *a, const nstime_t *b);
@@ -39,6 +39,7 @@ void nstime_imultiply(nstime_t *time, uint64_t multiplier);
 void nstime_idivide(nstime_t *time, uint64_t divisor);
 uint64_t nstime_divide(const nstime_t *time, const nstime_t *divisor);
 uint64_t nstime_ns_since(const nstime_t *past);
+uint64_t nstime_ms_since(const nstime_t *past);
 
 typedef bool (nstime_monotonic_t)(void);
 extern nstime_monotonic_t *JET_MUTABLE nstime_monotonic;
diff --git a/src/hpa_hooks.c b/src/hpa_hooks.c
index 6048f382..f43f05eb 100644
--- a/src/hpa_hooks.c
+++ b/src/hpa_hooks.c
@@ -59,5 +59,5 @@ hpa_hooks_curtime(nstime_t *r_nstime, bool first_reading) {
 
 static uint64_t
 hpa_hooks_ms_since(nstime_t *past_nstime) {
-	return nstime_ns_since(past_nstime) / 1000 / 1000;
+	return nstime_ms_since(past_nstime);
 }
diff --git a/src/nstime.c b/src/nstime.c
index 15c53330..894753aa 100644
--- a/src/nstime.c
+++ b/src/nstime.c
@@ -63,7 +63,7 @@ nstime_ns(const nstime_t *time) {
 }
 
 uint64_t
-nstime_msec(const nstime_t *time) {
+nstime_ms(const nstime_t *time) {
 	nstime_assert_initialized(time);
 	return time->ns / MILLION;
 }
@@ -158,7 +158,7 @@ nstime_divide(const nstime_t *time, const nstime_t *divisor) {
 	return time->ns / divisor->ns;
 }
 
-/* Returns time since *past, w/o updating *past. */
+/* Returns time since *past in nanoseconds, w/o updating *past. */
 uint64_t
 nstime_ns_since(const nstime_t *past) {
 	nstime_assert_initialized(past);
@@ -171,6 +171,12 @@ nstime_ns_since(const nstime_t *past) {
 	return now.ns - past->ns;
 }
 
+/* Returns time since *past in milliseconds, w/o updating *past. */
+uint64_t
+nstime_ms_since(const nstime_t *past) {
+	return nstime_ns_since(past) / MILLION;
+}
+
 #ifdef _WIN32
 #  define NSTIME_MONOTONIC false
 static void
diff --git a/test/unit/nstime.c b/test/unit/nstime.c
index e7e11e61..43fd3954 100644
--- a/test/unit/nstime.c
+++ b/test/unit/nstime.c
@@ -228,6 +228,24 @@ TEST_BEGIN(test_nstime_ns_since) {
 }
 TEST_END
 
+TEST_BEGIN(test_nstime_ms_since) {
+	nstime_t delta;
+
+	nstime_init2(&delta, /* sec */ 1, /* nsec */ 0);
+	for (uint64_t i = 0; i < 10000; i++) {
+		nstime_t now;
+		nstime_init_update(&now);
+
+		nstime_t past;
+		nstime_copy(&past, &now);
+		nstime_subtract(&past, &delta);
+
+		expect_u64_ge(nstime_ms_since(&past), nstime_ms(&delta),
+		    "Incorrect time since result");
+	}
+}
+TEST_END
+
 TEST_BEGIN(test_nstime_monotonic) {
 	nstime_monotonic();
 }
@@ -248,5 +266,6 @@ main(void) {
 	    test_nstime_idivide,
 	    test_nstime_divide,
 	    test_nstime_ns_since,
+	    test_nstime_ms_since,
 	    test_nstime_monotonic);
 }

From b82333fdec6e5833f88780fcf1fc50b799268e1b Mon Sep 17 00:00:00 2001
From: Dmitry Ilvokhin <d@ilvokhin.com>
Date: Fri, 25 Oct 2024 10:27:25 -0700
Subject: [PATCH 237/395] Split `stats_arena_hpa_shard_print` function

Make multiple functions from `stats_arena_hpa_shard_print` for
readability and ease of change in the future.
---
 src/stats.c | 60 +++++++++++++++++++++++++++++++++--------------------
 1 file changed, 37 insertions(+), 23 deletions(-)

diff --git a/src/stats.c b/src/stats.c
index 1e607d9e..89dd1916 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -831,12 +831,16 @@ stats_arena_extents_print(emitter_t *emitter, unsigned i) {
 }
 
 static void
-stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
-	emitter_row_t header_row;
-	emitter_row_init(&header_row);
-	emitter_row_t row;
-	emitter_row_init(&row);
+stats_arena_hpa_shard_sec_print(emitter_t *emitter, unsigned i) {
+	size_t sec_bytes;
+	CTL_M2_GET("stats.arenas.0.hpa_sec_bytes", i, &sec_bytes, size_t);
+	emitter_kv(emitter, "sec_bytes", "Bytes in small extent cache",
+	    emitter_type_size, &sec_bytes);
+}
 
+static void
+stats_arena_hpa_shard_counters_print(emitter_t *emitter, unsigned i,
+    uint64_t uptime) {
 	uint64_t npurge_passes;
 	uint64_t npurges;
 	uint64_t nhugifies;
@@ -851,21 +855,6 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	CTL_M2_GET("stats.arenas.0.hpa_shard.ndehugifies",
 	    i, &ndehugifies, uint64_t);
 
-	size_t npageslabs_huge;
-	size_t nactive_huge;
-	size_t ndirty_huge;
-
-	size_t npageslabs_nonhuge;
-	size_t nactive_nonhuge;
-	size_t ndirty_nonhuge;
-	size_t nretained_nonhuge;
-
-	size_t sec_bytes;
-	CTL_M2_GET("stats.arenas.0.hpa_sec_bytes", i, &sec_bytes, size_t);
-	emitter_kv(emitter, "sec_bytes", "Bytes in small extent cache",
-	    emitter_type_size, &sec_bytes);
-
-	/* First, global stats. */
 	emitter_table_printf(emitter,
 	    "HPA shard stats:\n"
 	    "  Purge passes: %" FMTu64 " (%" FMTu64 " / sec)\n"
@@ -878,7 +867,6 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	    nhugifies, rate_per_second(nhugifies, uptime),
 	    ndehugifies, rate_per_second(ndehugifies, uptime));
 
-	emitter_json_object_kv_begin(emitter, "hpa_shard");
 	emitter_json_kv(emitter, "npurge_passes", emitter_type_uint64,
 	    &npurge_passes);
 	emitter_json_kv(emitter, "npurges", emitter_type_uint64,
@@ -887,8 +875,25 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	    &nhugifies);
 	emitter_json_kv(emitter, "ndehugifies", emitter_type_uint64,
 	    &ndehugifies);
+}
 
-	/* Next, full slab stats. */
+static void
+stats_arena_hpa_shard_slabs_print(emitter_t *emitter, unsigned i) {
+	emitter_row_t header_row;
+	emitter_row_init(&header_row);
+	emitter_row_t row;
+	emitter_row_init(&row);
+
+	size_t npageslabs_huge;
+	size_t nactive_huge;
+	size_t ndirty_huge;
+
+	size_t npageslabs_nonhuge;
+	size_t nactive_nonhuge;
+	size_t ndirty_nonhuge;
+	size_t nretained_nonhuge;
+
+	/* Full slab stats. */
 	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.npageslabs_huge",
 	    i, &npageslabs_huge, size_t);
 	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.nactive_huge",
@@ -1049,12 +1054,21 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 		emitter_json_object_end(emitter);
 	}
 	emitter_json_array_end(emitter); /* End "nonfull_slabs" */
-	emitter_json_object_end(emitter); /* End "hpa_shard" */
 	if (in_gap) {
 		emitter_table_printf(emitter, "                     ---\n");
 	}
 }
 
+static void
+stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
+	stats_arena_hpa_shard_sec_print(emitter, i);
+
+	emitter_json_object_kv_begin(emitter, "hpa_shard");
+	stats_arena_hpa_shard_counters_print(emitter, i, uptime);
+	stats_arena_hpa_shard_slabs_print(emitter, i);
+	emitter_json_object_end(emitter); /* End "hpa_shard" */
+}
+
 static void
 stats_arena_mutexes_print(emitter_t *emitter, unsigned arena_ind, uint64_t uptime) {
 	emitter_row_t row;

From a361e886e2ec23513e374abc1e4e0429cc93ec5c Mon Sep 17 00:00:00 2001
From: Dmitry Ilvokhin <d@ilvokhin.com>
Date: Thu, 7 Nov 2024 10:16:46 -0800
Subject: [PATCH 238/395] Move `je_cv_thp` logic closer to definition

---
 configure.ac | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/configure.ac b/configure.ac
index 9698997a..d037fed5 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2471,6 +2471,16 @@ if test "x${je_cv_madvise}" = "xyes" ; then
 	madvise((void *)0, 0, MADV_HUGEPAGE);
 	madvise((void *)0, 0, MADV_NOHUGEPAGE);
 ], [je_cv_thp])
+  case "${host_cpu}" in
+    arm*)
+      ;;
+    *)
+    if test "x${je_cv_thp}" = "xyes" ; then
+      AC_DEFINE([JEMALLOC_HAVE_MADVISE_HUGE], [ ], [ ])
+    fi
+    ;;
+  esac
+
   dnl Check for madvise(..., MADV_[NO]CORE).
   JE_COMPILABLE([madvise(..., MADV_[[NO]]CORE)], [
 #include <sys/mman.h>
@@ -2481,15 +2491,6 @@ if test "x${je_cv_madvise}" = "xyes" ; then
   if test "x${je_cv_madv_nocore}" = "xyes" ; then
     AC_DEFINE([JEMALLOC_MADVISE_NOCORE], [ ], [ ])
   fi
-case "${host_cpu}" in
-  arm*)
-    ;;
-  *)
-  if test "x${je_cv_thp}" = "xyes" ; then
-    AC_DEFINE([JEMALLOC_HAVE_MADVISE_HUGE], [ ], [ ])
-  fi
-  ;;
-esac
 else
   dnl Check for posix_madvise.
   JE_COMPILABLE([posix_madvise], [

From 0ce13c6fb5ae3bd837f5a7314bd580070bb408da Mon Sep 17 00:00:00 2001
From: Dmitry Ilvokhin <d@ilvokhin.com>
Date: Thu, 31 Oct 2024 11:43:11 -0700
Subject: [PATCH 239/395] Add opt `hpa_hugify_sync` to hugify synchronously

Linux 6.1 introduced `MADV_COLLAPSE` flag to perform a best-effort
synchronous collapse of the native pages mapped by the memory range into
transparent huge pages.

Synchronous hugification might be beneficial for at least two reasons:
we are not relying on khugepaged anymore and get an instant feedback if
range wasn't hugified.

If `hpa_hugify_sync` option is on, we'll try to perform synchronously
collapse and if it wasn't successful, we'll fallback to asynchronous
behaviour.
---
 configure.ac                                  | 10 ++++++++
 include/jemalloc/internal/hpa.h               |  8 ++++++
 include/jemalloc/internal/hpa_hooks.h         |  2 +-
 include/jemalloc/internal/hpa_opts.h          |  7 ++++++
 .../internal/jemalloc_internal_defs.h.in      |  7 ++++++
 .../jemalloc/internal/jemalloc_preamble.h.in  |  9 +++++++
 include/jemalloc/internal/pages.h             |  1 +
 src/ctl.c                                     |  9 +++++++
 src/hpa.c                                     | 17 ++++++++++++-
 src/hpa_hooks.c                               | 25 ++++++++++++++++---
 src/jemalloc.c                                | 12 +++++++++
 src/pages.c                                   | 24 ++++++++++++++++++
 src/stats.c                                   |  8 ++++++
 test/unit/hpa.c                               |  9 +++++--
 test/unit/mallctl.c                           |  1 +
 15 files changed, 141 insertions(+), 8 deletions(-)

diff --git a/configure.ac b/configure.ac
index d037fed5..a330e33e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2491,6 +2491,16 @@ if test "x${je_cv_madvise}" = "xyes" ; then
   if test "x${je_cv_madv_nocore}" = "xyes" ; then
     AC_DEFINE([JEMALLOC_MADVISE_NOCORE], [ ], [ ])
   fi
+
+  dnl Check for madvise(..., MADV_COLLAPSE).
+  JE_COMPILABLE([madvise(..., MADV_COLLAPSE)], [
+#include <sys/mman.h>
+], [
+	madvise((void *)0, 0, MADV_COLLAPSE);
+], [je_cv_madv_collapse])
+  if test "x${je_cv_madv_collapse}" = "xyes" ; then
+    AC_DEFINE([JEMALLOC_HAVE_MADVISE_COLLAPSE], [ ], [ ])
+  fi
 else
   dnl Check for posix_madvise.
   JE_COMPILABLE([posix_madvise], [
diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index 4805efaf..4c410c40 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -61,6 +61,14 @@ struct hpa_shard_nonderived_stats_s {
 	 * Guarded by mtx.
 	 */
 	uint64_t nhugifies;
+
+	/*
+	 * The number of times we've tried to hugify a pageslab, but failed.
+	 *
+	 * Guarded by mtx.
+	 */
+	uint64_t nhugify_failures;
+
 	/*
 	 * The number of times we've dehugified a pageslab.
 	 *
diff --git a/include/jemalloc/internal/hpa_hooks.h b/include/jemalloc/internal/hpa_hooks.h
index 72f3a43c..b04b04f6 100644
--- a/include/jemalloc/internal/hpa_hooks.h
+++ b/include/jemalloc/internal/hpa_hooks.h
@@ -9,7 +9,7 @@ struct hpa_hooks_s {
 	void *(*map)(size_t size);
 	void (*unmap)(void *ptr, size_t size);
 	void (*purge)(void *ptr, size_t size);
-	void (*hugify)(void *ptr, size_t size);
+	bool (*hugify)(void *ptr, size_t size, bool sync);
 	void (*dehugify)(void *ptr, size_t size);
 	void (*curtime)(nstime_t *r_time, bool first_reading);
 	uint64_t (*ms_since)(nstime_t *r_time);
diff --git a/include/jemalloc/internal/hpa_opts.h b/include/jemalloc/internal/hpa_opts.h
index ee2bd40c..42246172 100644
--- a/include/jemalloc/internal/hpa_opts.h
+++ b/include/jemalloc/internal/hpa_opts.h
@@ -45,6 +45,11 @@ struct hpa_shard_opts_s {
 	 */
 	uint64_t hugify_delay_ms;
 
+	/*
+	 * Hugify pages synchronously.
+	 */
+	bool hugify_sync;
+
 	/*
 	 * Minimum amount of time between purges.
 	 */
@@ -73,6 +78,8 @@ struct hpa_shard_opts_s {
 	false,								\
 	/* hugify_delay_ms */						\
 	10 * 1000,							\
+	/* hugify_sync */						\
+	false,								\
 	/* min_purge_interval_ms */					\
 	5 * 1000,							\
 	/* experimental_max_purge_nhp */				\
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index f5b1a924..5cf77f47 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -308,6 +308,13 @@
  */
 #undef JEMALLOC_HAVE_MADVISE_HUGE
 
+/*
+ * Defined if best-effort synchronous collapse of the native
+ * pages mapped by the memory range into transparent huge pages is supported
+ * via MADV_COLLAPSE arguments to madvise(2).
+ */
+#undef JEMALLOC_HAVE_MADVISE_COLLAPSE
+
 /*
  * Methods for purging unused pages differ between operating systems.
  *
diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in
index ebce5d56..a59c3489 100644
--- a/include/jemalloc/internal/jemalloc_preamble.h.in
+++ b/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -57,6 +57,15 @@
 #  define JEMALLOC_MADV_FREE 8
 #endif
 
+/*
+ * Can be defined at compile time, in cases, when it is known
+ * madvise(..., MADV_COLLAPSE) feature is supported, but MADV_COLLAPSE
+ * constant is not defined.
+ */
+#ifdef JEMALLOC_DEFINE_MADVISE_COLLAPSE
+#  define JEMALLOC_MADV_COLLAPSE 25
+#endif
+
 static const bool config_debug =
 #ifdef JEMALLOC_DEBUG
     true
diff --git a/include/jemalloc/internal/pages.h b/include/jemalloc/internal/pages.h
index 6c295b43..0dcf96dc 100644
--- a/include/jemalloc/internal/pages.h
+++ b/include/jemalloc/internal/pages.h
@@ -123,6 +123,7 @@ bool pages_purge_lazy(void *addr, size_t size);
 bool pages_purge_forced(void *addr, size_t size);
 bool pages_huge(void *addr, size_t size);
 bool pages_nohuge(void *addr, size_t size);
+bool pages_collapse(void *addr, size_t size);
 bool pages_dontdump(void *addr, size_t size);
 bool pages_dodump(void *addr, size_t size);
 bool pages_boot(void);
diff --git a/src/ctl.c b/src/ctl.c
index 690bbabc..40e75fb7 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -103,6 +103,7 @@ CTL_PROTO(opt_hpa)
 CTL_PROTO(opt_hpa_slab_max_alloc)
 CTL_PROTO(opt_hpa_hugification_threshold)
 CTL_PROTO(opt_hpa_hugify_delay_ms)
+CTL_PROTO(opt_hpa_hugify_sync)
 CTL_PROTO(opt_hpa_min_purge_interval_ms)
 CTL_PROTO(opt_experimental_hpa_max_purge_nhp)
 CTL_PROTO(opt_hpa_dirty_mult)
@@ -263,6 +264,7 @@ INDEX_PROTO(stats_arenas_i_extents_j)
 CTL_PROTO(stats_arenas_i_hpa_shard_npurge_passes)
 CTL_PROTO(stats_arenas_i_hpa_shard_npurges)
 CTL_PROTO(stats_arenas_i_hpa_shard_nhugifies)
+CTL_PROTO(stats_arenas_i_hpa_shard_nhugify_failures)
 CTL_PROTO(stats_arenas_i_hpa_shard_ndehugifies)
 
 /* We have a set of stats for full slabs. */
@@ -462,6 +464,7 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("hpa_hugification_threshold"),
 		CTL(opt_hpa_hugification_threshold)},
 	{NAME("hpa_hugify_delay_ms"), CTL(opt_hpa_hugify_delay_ms)},
+	{NAME("hpa_hugify_sync"), CTL(opt_hpa_hugify_sync)},
 	{NAME("hpa_min_purge_interval_ms"), CTL(opt_hpa_min_purge_interval_ms)},
 	{NAME("experimental_hpa_max_purge_nhp"),
 		CTL(opt_experimental_hpa_max_purge_nhp)},
@@ -834,6 +837,8 @@ static const ctl_named_node_t stats_arenas_i_hpa_shard_node[] = {
 	{NAME("npurge_passes"),	CTL(stats_arenas_i_hpa_shard_npurge_passes)},
 	{NAME("npurges"),	CTL(stats_arenas_i_hpa_shard_npurges)},
 	{NAME("nhugifies"),	CTL(stats_arenas_i_hpa_shard_nhugifies)},
+	{NAME("nhugify_failures"),
+	    CTL(stats_arenas_i_hpa_shard_nhugify_failures)},
 	{NAME("ndehugifies"),	CTL(stats_arenas_i_hpa_shard_ndehugifies)}
 };
 
@@ -2200,6 +2205,7 @@ CTL_RO_NL_GEN(opt_hpa, opt_hpa, bool)
 CTL_RO_NL_GEN(opt_hpa_hugification_threshold,
     opt_hpa_opts.hugification_threshold, size_t)
 CTL_RO_NL_GEN(opt_hpa_hugify_delay_ms, opt_hpa_opts.hugify_delay_ms, uint64_t)
+CTL_RO_NL_GEN(opt_hpa_hugify_sync, opt_hpa_opts.hugify_sync, bool)
 CTL_RO_NL_GEN(opt_hpa_min_purge_interval_ms, opt_hpa_opts.min_purge_interval_ms,
     uint64_t)
 CTL_RO_NL_GEN(opt_experimental_hpa_max_purge_nhp,
@@ -4061,6 +4067,9 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_npurges,
     arenas_i(mib[2])->astats->hpastats.nonderived_stats.npurges, uint64_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nhugifies,
     arenas_i(mib[2])->astats->hpastats.nonderived_stats.nhugifies, uint64_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nhugify_failures,
+    arenas_i(mib[2])->astats->hpastats.nonderived_stats.nhugify_failures,
+    uint64_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_ndehugifies,
     arenas_i(mib[2])->astats->hpastats.nonderived_stats.ndehugifies, uint64_t);
 
diff --git a/src/hpa.c b/src/hpa.c
index d1558821..14541413 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -210,6 +210,7 @@ hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap,
 	shard->stats.npurge_passes = 0;
 	shard->stats.npurges = 0;
 	shard->stats.nhugifies = 0;
+	shard->stats.nhugify_failures = 0;
 	shard->stats.ndehugifies = 0;
 
 	/*
@@ -242,6 +243,7 @@ hpa_shard_nonderived_stats_accum(hpa_shard_nonderived_stats_t *dst,
 	dst->npurge_passes += src->npurge_passes;
 	dst->npurges += src->npurges;
 	dst->nhugifies += src->nhugifies;
+	dst->nhugify_failures += src->nhugify_failures;
 	dst->ndehugifies += src->ndehugifies;
 }
 
@@ -499,10 +501,23 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
 
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 
-	shard->central->hooks.hugify(hpdata_addr_get(to_hugify), HUGEPAGE);
+	bool err = shard->central->hooks.hugify(hpdata_addr_get(to_hugify),
+	    HUGEPAGE, shard->opts.hugify_sync);
 
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	shard->stats.nhugifies++;
+	if (err) {
+		/*
+		 * When asynchronious hugification is used
+		 * (shard->opts.hugify_sync option is false), we are not
+		 * expecting to get here, unless something went terrible wrong.
+		 * Because underlying syscall is only setting kernel flag for
+		 * memory range (actual hugification happens asynchroniously
+		 * and we are not getting any feedback about its outcome), we
+		 * expect syscall to be successful all the time.
+		 */
+		shard->stats.nhugify_failures++;
+	}
 
 	psset_update_begin(&shard->psset, to_hugify);
 	hpdata_hugify(to_hugify);
diff --git a/src/hpa_hooks.c b/src/hpa_hooks.c
index f43f05eb..4628c14f 100644
--- a/src/hpa_hooks.c
+++ b/src/hpa_hooks.c
@@ -6,7 +6,7 @@
 static void *hpa_hooks_map(size_t size);
 static void hpa_hooks_unmap(void *ptr, size_t size);
 static void hpa_hooks_purge(void *ptr, size_t size);
-static void hpa_hooks_hugify(void *ptr, size_t size);
+static bool hpa_hooks_hugify(void *ptr, size_t size, bool sync);
 static void hpa_hooks_dehugify(void *ptr, size_t size);
 static void hpa_hooks_curtime(nstime_t *r_nstime, bool first_reading);
 static uint64_t hpa_hooks_ms_since(nstime_t *past_nstime);
@@ -37,10 +37,27 @@ hpa_hooks_purge(void *ptr, size_t size) {
 	pages_purge_forced(ptr, size);
 }
 
-static void
-hpa_hooks_hugify(void *ptr, size_t size) {
+static bool
+hpa_hooks_hugify(void *ptr, size_t size, bool sync) {
+	/*
+	 * We mark memory range as huge independently on which hugification
+	 * technique is used (synchronous or asynchronous) to have correct
+	 * VmFlags set for introspection and accounting purposes.  If
+	 * synchronous hugification is enabled and pages_collapse call fails,
+	 * then we hope memory range will be hugified asynchronously by
+	 * khugepaged eventually.  Right now, 3 out of 4 error return codes of
+	 * madvise(..., MADV_COLLAPSE) are retryable.  Instead of retrying, we
+	 * just fallback to asynchronous khugepaged hugification to simplify
+	 * implementation, even if we might know khugepaged fallback will not
+	 * be successful (current madvise(..., MADV_COLLAPSE) implementation
+	 * hints, when EINVAL is returned it is likely that khugepaged won't be
+	 * able to collapse memory range into hugepage either).
+	 */
 	bool err = pages_huge(ptr, size);
-	(void)err;
+	if (sync) {
+		err = pages_collapse(ptr, size);
+	}
+	return err;
 }
 
 static void
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 428a50ef..248de28b 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1093,6 +1093,15 @@ validate_hpa_settings(void) {
 	if (opt_hpa_opts.dirty_mult != (fxp_t)-1 && validate_hpa_ratios()) {
 		had_conf_error = true;
 	}
+#ifndef JEMALLOC_HAVE_MADVISE_COLLAPSE
+	if (opt_hpa_opts.hugify_sync) {
+	       had_conf_error = true;
+	       malloc_printf(
+		   "<jemalloc>: hpa_hugify_sync config option is enabled, "
+		   "but MADV_COLLAPSE support was not detected at build "
+		   "time.");
+	}
+#endif
 }
 
 static void
@@ -1566,6 +1575,9 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    0, 0, CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX,
 			    false);
 
+			CONF_HANDLE_BOOL(
+			    opt_hpa_opts.hugify_sync, "hpa_hugify_sync");
+
 			CONF_HANDLE_UINT64_T(
 			    opt_hpa_opts.min_purge_interval_ms,
 			    "hpa_min_purge_interval_ms", 0, 0,
diff --git a/src/pages.c b/src/pages.c
index 5b55a046..26fd8d5d 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -567,6 +567,30 @@ pages_nohuge_unaligned(void *addr, size_t size) {
 	return pages_nohuge_impl(addr, size, false);
 }
 
+bool
+pages_collapse(void *addr, size_t size) {
+	assert(PAGE_ADDR2BASE(addr) == addr);
+	assert(PAGE_CEILING(size) == size);
+	/*
+	 * There is one more MADV_COLLAPSE precondition that is not easy to
+	 * express with assert statement.  In order to madvise(addr, size,
+	 * MADV_COLLAPSE) call to be successful, at least one page in the range
+	 * must currently be backed by physical memory.  In particularly, this
+	 * means we can't call pages_collapse on freshly mapped memory region.
+	 * See madvise(2) man page for more details.
+	 */
+#if defined(JEMALLOC_HAVE_MADVISE_COLLAPSE) && \
+    (defined(MADV_COLLAPSE) || defined(JEMALLOC_MADV_COLLAPSE))
+#  if defined(MADV_COLLAPSE)
+	return (madvise(addr, size, MADV_COLLAPSE) != 0);
+#  elif defined(JEMALLOC_MADV_COLLAPSE)
+	return (madvise(addr, size, JEMALLOC_MADV_COLLAPSE) != 0);
+#  endif
+#else
+	return true;
+#endif
+}
+
 bool
 pages_dontdump(void *addr, size_t size) {
 	assert(PAGE_ADDR2BASE(addr) == addr);
diff --git a/src/stats.c b/src/stats.c
index 89dd1916..7fbaa5cc 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -844,6 +844,7 @@ stats_arena_hpa_shard_counters_print(emitter_t *emitter, unsigned i,
 	uint64_t npurge_passes;
 	uint64_t npurges;
 	uint64_t nhugifies;
+	uint64_t nhugify_failures;
 	uint64_t ndehugifies;
 
 	CTL_M2_GET("stats.arenas.0.hpa_shard.npurge_passes",
@@ -852,6 +853,8 @@ stats_arena_hpa_shard_counters_print(emitter_t *emitter, unsigned i,
 	    i, &npurges, uint64_t);
 	CTL_M2_GET("stats.arenas.0.hpa_shard.nhugifies",
 	    i, &nhugifies, uint64_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.nhugify_failures",
+	    i, &nhugify_failures, uint64_t);
 	CTL_M2_GET("stats.arenas.0.hpa_shard.ndehugifies",
 	    i, &ndehugifies, uint64_t);
 
@@ -860,11 +863,13 @@ stats_arena_hpa_shard_counters_print(emitter_t *emitter, unsigned i,
 	    "  Purge passes: %" FMTu64 " (%" FMTu64 " / sec)\n"
 	    "  Purges: %" FMTu64 " (%" FMTu64 " / sec)\n"
 	    "  Hugeifies: %" FMTu64 " (%" FMTu64 " / sec)\n"
+	    "  Hugify failures: %" FMTu64 " (%" FMTu64 " / sec)\n"
 	    "  Dehugifies: %" FMTu64 " (%" FMTu64 " / sec)\n"
 	    "\n",
 	    npurge_passes, rate_per_second(npurge_passes, uptime),
 	    npurges, rate_per_second(npurges, uptime),
 	    nhugifies, rate_per_second(nhugifies, uptime),
+	    nhugify_failures, rate_per_second(nhugify_failures, uptime),
 	    ndehugifies, rate_per_second(ndehugifies, uptime));
 
 	emitter_json_kv(emitter, "npurge_passes", emitter_type_uint64,
@@ -873,6 +878,8 @@ stats_arena_hpa_shard_counters_print(emitter_t *emitter, unsigned i,
 	    &npurges);
 	emitter_json_kv(emitter, "nhugifies", emitter_type_uint64,
 	    &nhugifies);
+	emitter_json_kv(emitter, "nhugify_failures", emitter_type_uint64,
+	    &nhugify_failures);
 	emitter_json_kv(emitter, "ndehugifies", emitter_type_uint64,
 	    &ndehugifies);
 }
@@ -1578,6 +1585,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_SIZE_T("hpa_slab_max_alloc")
 	OPT_WRITE_SIZE_T("hpa_hugification_threshold")
 	OPT_WRITE_UINT64("hpa_hugify_delay_ms")
+	OPT_WRITE_BOOL("hpa_hugify_sync")
 	OPT_WRITE_UINT64("hpa_min_purge_interval_ms")
 	OPT_WRITE_SSIZE_T("experimental_hpa_max_purge_nhp")
 	if (je_mallctl("opt.hpa_dirty_mult", (void *)&u32v, &u32sz, NULL, 0)
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index 747f98ef..50b96a87 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -32,6 +32,8 @@ static hpa_shard_opts_t test_hpa_shard_opts_default = {
 	false,
 	/* hugify_delay_ms */
 	10 * 1000,
+	/* hugify_sync */
+	false,
 	/* min_purge_interval_ms */
 	5 * 1000,
 	/* experimental_max_purge_nhp */
@@ -49,6 +51,8 @@ static hpa_shard_opts_t test_hpa_shard_opts_purge = {
 	true,
 	/* hugify_delay_ms */
 	0,
+	/* hugify_sync */
+	false,
 	/* min_purge_interval_ms */
 	5 * 1000,
 	/* experimental_max_purge_nhp */
@@ -371,9 +375,10 @@ defer_test_purge(void *ptr, size_t size) {
 }
 
 static size_t ndefer_hugify_calls = 0;
-static void
-defer_test_hugify(void *ptr, size_t size) {
+static bool
+defer_test_hugify(void *ptr, size_t size, bool sync) {
 	++ndefer_hugify_calls;
+	return false;
 }
 
 static size_t ndefer_dehugify_calls = 0;
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 65e84370..8c8fb18c 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -288,6 +288,7 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(const char *, dss, always);
 	TEST_MALLCTL_OPT(bool, hpa, always);
 	TEST_MALLCTL_OPT(size_t, hpa_slab_max_alloc, always);
+	TEST_MALLCTL_OPT(bool, hpa_hugify_sync, always);
 	TEST_MALLCTL_OPT(size_t, hpa_sec_nshards, always);
 	TEST_MALLCTL_OPT(size_t, hpa_sec_max_alloc, always);
 	TEST_MALLCTL_OPT(size_t, hpa_sec_max_bytes, always);

From 3820e38dc1021cebba4628e277cde060e840aaef Mon Sep 17 00:00:00 2001
From: Dmitry Ilvokhin <d@ilvokhin.com>
Date: Fri, 15 Nov 2024 08:53:20 -0800
Subject: [PATCH 240/395] Remove validation for HPA ratios

Config validation was introduced at 3aae792b with main intention to fix
infinite purging loop, but it didn't actually fix the underlying
problem, just masked it. Later 47d69b4ea was merged to address the same
problem.

Options `hpa_dirty_mult` and `hpa_hugification_threshold` have different
application dimensions: `hpa_dirty_mult` applied to active memory on the
shard, but `hpa_hugification_threshold` is a threshold for single
pageslab (hugepage). It doesn't make much sense to sum them up together.

While it is true that too high value of `hpa_dirty_mult` and too low
value of `hpa_hugification_threshold` can lead to pathological
behaviour, it is true for other options as well. Poor configurations
might lead to suboptimal and sometimes completely unacceptable
behaviour and that's OK, that is exactly the reason why they are called
poor.

There are other mechanism exist to prevent extreme behaviour, when we
hugified and then immediately purged page, see
`hpa_hugify_blocked_by_ndirty` function, which exist to prevent exactly
this case.

Lastly, `hpa_dirty_mult + hpa_hugification_threshold >= 1` constraint is
too tight and prevents a lot of valid configurations.
---
 Makefile.in                        |  1 -
 src/jemalloc.c                     | 41 ----------------------
 test/unit/hpa_background_thread.sh |  2 +-
 test/unit/hpa_validate_conf.c      | 56 ------------------------------
 test/unit/hpa_validate_conf.sh     |  3 --
 5 files changed, 1 insertion(+), 102 deletions(-)
 delete mode 100644 test/unit/hpa_validate_conf.c
 delete mode 100644 test/unit/hpa_validate_conf.sh

diff --git a/Makefile.in b/Makefile.in
index 6a386720..27eb90d3 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -230,7 +230,6 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/hook.c \
 	$(srcroot)test/unit/hpa.c \
 	$(srcroot)test/unit/hpa_background_thread.c \
-	$(srcroot)test/unit/hpa_validate_conf.c \
 	$(srcroot)test/unit/hpdata.c \
 	$(srcroot)test/unit/huge.c \
 	$(srcroot)test/unit/inspect.c \
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 248de28b..67be7681 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1041,44 +1041,6 @@ obtain_malloc_conf(unsigned which_source, char readlink_buf[PATH_MAX + 1]) {
 	return ret;
 }
 
-static bool
-validate_hpa_ratios(void) {
-	size_t hpa_threshold = fxp_mul_frac(HUGEPAGE, opt_hpa_opts.dirty_mult) +
-	    opt_hpa_opts.hugification_threshold;
-	if (hpa_threshold > HUGEPAGE) {
-		return false;
-	}
-
-	char hpa_dirty_mult[FXP_BUF_SIZE];
-	char hugification_threshold[FXP_BUF_SIZE];
-	char normalization_message[256] = {0};
-	fxp_print(opt_hpa_opts.dirty_mult, hpa_dirty_mult);
-	fxp_print(fxp_div(FXP_INIT_INT((unsigned)
-	    (opt_hpa_opts.hugification_threshold >> LG_PAGE)),
-	    FXP_INIT_INT(HUGEPAGE_PAGES)), hugification_threshold);
-	if (!opt_abort_conf) {
-		char normalized_hugification_threshold[FXP_BUF_SIZE];
-		opt_hpa_opts.hugification_threshold +=
-		    HUGEPAGE - hpa_threshold;
-		fxp_print(fxp_div(FXP_INIT_INT((unsigned)
-		    (opt_hpa_opts.hugification_threshold >> LG_PAGE)),
-		    FXP_INIT_INT(HUGEPAGE_PAGES)),
-		    normalized_hugification_threshold);
-		malloc_snprintf(normalization_message,
-		    sizeof(normalization_message), "<jemalloc>: Normalizing "
-		    "HPA settings to avoid pathological behavior, setting "
-		    "hpa_hugification_threshold_ratio: to %s.\n",
-		    normalized_hugification_threshold);
-	}
-	malloc_printf(
-	    "<jemalloc>: Invalid combination of options "
-	    "hpa_hugification_threshold_ratio: %s and hpa_dirty_mult: %s. "
-	    "These values should sum to > 1.0.\n%s", hugification_threshold,
-	    hpa_dirty_mult, normalization_message);
-
-	return true;
-}
-
 static void
 validate_hpa_settings(void) {
 	if (!hpa_supported() || !opt_hpa) {
@@ -1090,9 +1052,6 @@ validate_hpa_settings(void) {
 		    "<jemalloc>: huge page size (%zu) greater than expected."
 		    "May not be supported or behave as expected.", HUGEPAGE);
 	}
-	if (opt_hpa_opts.dirty_mult != (fxp_t)-1 && validate_hpa_ratios()) {
-		had_conf_error = true;
-	}
 #ifndef JEMALLOC_HAVE_MADVISE_COLLAPSE
 	if (opt_hpa_opts.hugify_sync) {
 	       had_conf_error = true;
diff --git a/test/unit/hpa_background_thread.sh b/test/unit/hpa_background_thread.sh
index 33b70e19..65a56a08 100644
--- a/test/unit/hpa_background_thread.sh
+++ b/test/unit/hpa_background_thread.sh
@@ -1,4 +1,4 @@
 #!/bin/sh
 
-export MALLOC_CONF="hpa_dirty_mult:0.001,hpa_hugification_threshold_ratio:1.0,hpa_min_purge_interval_ms:50,hpa_sec_nshards:0"
+export MALLOC_CONF="hpa_dirty_mult:0,hpa_min_purge_interval_ms:50,hpa_sec_nshards:0"
 
diff --git a/test/unit/hpa_validate_conf.c b/test/unit/hpa_validate_conf.c
deleted file mode 100644
index 8c1847ba..00000000
--- a/test/unit/hpa_validate_conf.c
+++ /dev/null
@@ -1,56 +0,0 @@
-#include "test/jemalloc_test.h"
-
-static bool abort_called = false;
-static void (*default_malloc_message)(void *, const char *);
-
-static void
-mock_invalid_conf_abort(void) {
-	abort_called = true;
-}
-
-static void
-null_malloc_message(void *_1, const char* _2) {
-}
-
-TEST_BEGIN(test_hpa_validate_conf) {
-	test_skip_if(!hpa_supported());
-	void *ptr = malloc(4096);
-	/* Need to restore this here to see any possible assert messages */
-	malloc_message = default_malloc_message;
-	assert_true(abort_called,
-	     "Should have aborted due to invalid values for hpa_dirty_mult and "
-	     "hpa_hugification_threshold_ratio");
-	free(ptr);
-}
-TEST_END
-
-/*
- * We have to set `abort_conf:true` here and not via the `MALLOC_CONF`
- * environment variable in the associated shell script for this test. This is
- * because when testing on FreeBSD (where Jemalloc is the system allocator) in
- * CI configs where HPA is not supported, setting `abort_conf:true` there would
- * result in the system Jemalloc picking this up and aborting before we could
- * ever even launch the test.
- */
-const char *malloc_conf = "abort_conf:true";
-
-int
-main(void) {
-	/*
-	 * OK, this is a sort of nasty hack.  We don't want to add *another*
-	 * config option for HPA (the intent is that it becomes available on
-	 * more platforms over time, and we're trying to prune back config
-	 * options generally.  But we'll get initialization errors on other
-	 * platforms if we set hpa:true in the MALLOC_CONF (even if we set
-	 * abort_conf:false as well).  So we reach into the internals and set
-	 * them directly, but only if we know that we're actually going to do
-	 * something nontrivial in the tests.
-	 */
-	if (hpa_supported()) {
-		default_malloc_message = malloc_message;
-		malloc_message = null_malloc_message;
-		opt_hpa = true;
-		invalid_conf_abort = mock_invalid_conf_abort;
-	}
-	return test_no_reentrancy(test_hpa_validate_conf);
-}
diff --git a/test/unit/hpa_validate_conf.sh b/test/unit/hpa_validate_conf.sh
deleted file mode 100644
index 692c3da9..00000000
--- a/test/unit/hpa_validate_conf.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/bin/sh
-
-export MALLOC_CONF='tcache:false,hpa_dirty_mult:0.25,hpa_hugification_threshold_ratio:0.6'

From 6092c980a6d02b34bc7b3ed0c2ad923d0a5d2970 Mon Sep 17 00:00:00 2001
From: Dmitry Ilvokhin <d@ilvokhin.com>
Date: Thu, 14 Nov 2024 10:52:50 -0800
Subject: [PATCH 241/395] Expose `psset` state stats

When evaluating changes in HPA logic, it is useful to know internal
`hpa_shard` state. Great deal of this state is `psset`. Some of the
`psset` stats was available, but in disaggregated form, which is not
very convenient. This commit exposed `psset` counters to `mallctl`
and malloc stats dumps.

Example of how malloc stats dump will look like after the change.

HPA shard stats:
  Pageslabs: 14899 (4354 huge, 10545 nonhuge)
  Active pages: 6708166 (2228917 huge, 4479249 nonhuge)
  Dirty pages: 233816 (331 huge, 233485 nonhuge)
  Retained pages: 686306
  Purge passes: 8730 (10 / sec)
  Purges: 127501 (146 / sec)
  Hugeifies: 4358 (5 / sec)
  Dehugifies: 4 (0 / sec)

Pageslabs, active pages, dirty pages and retained pages are rows added
by this change.
---
 include/jemalloc/internal/psset.h |  39 ++++---
 src/ctl.c                         |  75 +++++++++++--
 src/psset.c                       | 105 ++++++++++++-------
 src/stats.c                       |  69 ++++++++++++
 test/unit/mallctl.c               |  59 +++++++++++
 test/unit/psset.c                 | 169 +++++++++++++++++++++++++++++-
 6 files changed, 458 insertions(+), 58 deletions(-)

diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
index 7e510b7f..ea608213 100644
--- a/include/jemalloc/internal/psset.h
+++ b/include/jemalloc/internal/psset.h
@@ -21,6 +21,12 @@
  */
 #define PSSET_NPSIZES 64
 
+/*
+ * We store non-hugefied and hugified pageslabs metadata separately.
+ * [0] corresponds to non-hugified and [1] to hugified pageslabs.
+ */
+#define PSSET_NHUGE 2
+
 /*
  * We keep two purge lists per page size class; one for hugified hpdatas (at
  * index 2*pszind), and one for the non-hugified hpdatas (at index 2*pszind +
@@ -44,21 +50,36 @@ struct psset_bin_stats_s {
 
 typedef struct psset_stats_s psset_stats_t;
 struct psset_stats_s {
+	/*
+	 * Merged stats for all pageslabs in psset.  This lets us quickly
+	 * answer queries for the number of dirty and active pages in the
+	 * entire set.
+	 */
+	psset_bin_stats_t merged;
+
+	/*
+	 * Below are the same stats, but aggregated by different
+	 * properties of pageslabs: huginess or fullness.
+	 */
+
+	/* Non-huge and huge slabs. */
+	psset_bin_stats_t slabs[PSSET_NHUGE];
+
 	/*
 	 * The second index is huge stats; nonfull_slabs[pszind][0] contains
 	 * stats for the non-huge slabs in bucket pszind, while
 	 * nonfull_slabs[pszind][1] contains stats for the huge slabs.
 	 */
-	psset_bin_stats_t nonfull_slabs[PSSET_NPSIZES][2];
+	psset_bin_stats_t nonfull_slabs[PSSET_NPSIZES][PSSET_NHUGE];
 
 	/*
 	 * Full slabs don't live in any edata heap, but we still track their
 	 * stats.
 	 */
-	psset_bin_stats_t full_slabs[2];
+	psset_bin_stats_t full_slabs[PSSET_NHUGE];
 
 	/* Empty slabs are similar. */
-	psset_bin_stats_t empty_slabs[2];
+	psset_bin_stats_t empty_slabs[PSSET_NHUGE];
 };
 
 typedef struct psset_s psset_t;
@@ -70,12 +91,6 @@ struct psset_s {
 	hpdata_age_heap_t pageslabs[PSSET_NPSIZES];
 	/* Bitmap for which set bits correspond to non-empty heaps. */
 	fb_group_t pageslab_bitmap[FB_NGROUPS(PSSET_NPSIZES)];
-	/*
-	 * The sum of all bin stats in stats.  This lets us quickly answer
-	 * queries for the number of dirty, active, and retained pages in the
-	 * entire set.
-	 */
-	psset_bin_stats_t merged_stats;
 	psset_stats_t stats;
 	/*
 	 * Slabs with no active allocations, but which are allowed to serve new
@@ -116,17 +131,17 @@ void psset_remove(psset_t *psset, hpdata_t *ps);
 
 static inline size_t
 psset_npageslabs(psset_t *psset) {
-	return psset->merged_stats.npageslabs;
+	return psset->stats.merged.npageslabs;
 }
 
 static inline size_t
 psset_nactive(psset_t *psset) {
-	return psset->merged_stats.nactive;
+	return psset->stats.merged.nactive;
 }
 
 static inline size_t
 psset_ndirty(psset_t *psset) {
-	return psset->merged_stats.ndirty;
+	return psset->stats.merged.ndirty;
 }
 
 #endif /* JEMALLOC_INTERNAL_PSSET_H */
diff --git a/src/ctl.c b/src/ctl.c
index 40e75fb7..66844105 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -261,13 +261,27 @@ CTL_PROTO(stats_arenas_i_extents_j_dirty_bytes)
 CTL_PROTO(stats_arenas_i_extents_j_muzzy_bytes)
 CTL_PROTO(stats_arenas_i_extents_j_retained_bytes)
 INDEX_PROTO(stats_arenas_i_extents_j)
+
+/* Merged set of stats for HPA shard. */
+CTL_PROTO(stats_arenas_i_hpa_shard_npageslabs)
+CTL_PROTO(stats_arenas_i_hpa_shard_nactive)
+CTL_PROTO(stats_arenas_i_hpa_shard_ndirty)
+
 CTL_PROTO(stats_arenas_i_hpa_shard_npurge_passes)
 CTL_PROTO(stats_arenas_i_hpa_shard_npurges)
 CTL_PROTO(stats_arenas_i_hpa_shard_nhugifies)
 CTL_PROTO(stats_arenas_i_hpa_shard_nhugify_failures)
 CTL_PROTO(stats_arenas_i_hpa_shard_ndehugifies)
 
-/* We have a set of stats for full slabs. */
+/* Set of stats for non-hugified and hugified slabs. */
+CTL_PROTO(stats_arenas_i_hpa_shard_slabs_npageslabs_nonhuge)
+CTL_PROTO(stats_arenas_i_hpa_shard_slabs_npageslabs_huge)
+CTL_PROTO(stats_arenas_i_hpa_shard_slabs_nactive_nonhuge)
+CTL_PROTO(stats_arenas_i_hpa_shard_slabs_nactive_huge)
+CTL_PROTO(stats_arenas_i_hpa_shard_slabs_ndirty_nonhuge)
+CTL_PROTO(stats_arenas_i_hpa_shard_slabs_ndirty_huge)
+
+/* A parallel set of stats for full slabs. */
 CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_npageslabs_nonhuge)
 CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge)
 CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_nactive_nonhuge)
@@ -295,6 +309,7 @@ CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_ndirty_nonhuge)
 CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_ndirty_huge)
 
 INDEX_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j)
+
 CTL_PROTO(stats_arenas_i_nthreads)
 CTL_PROTO(stats_arenas_i_uptime)
 CTL_PROTO(stats_arenas_i_dss)
@@ -771,6 +786,21 @@ MUTEX_PROF_ARENA_MUTEXES
 #undef OP
 };
 
+static const ctl_named_node_t stats_arenas_i_hpa_shard_slabs_node[] = {
+	{NAME("npageslabs_nonhuge"),
+		CTL(stats_arenas_i_hpa_shard_slabs_npageslabs_nonhuge)},
+	{NAME("npageslabs_huge"),
+		CTL(stats_arenas_i_hpa_shard_slabs_npageslabs_huge)},
+	{NAME("nactive_nonhuge"),
+		CTL(stats_arenas_i_hpa_shard_slabs_nactive_nonhuge)},
+	{NAME("nactive_huge"),
+		CTL(stats_arenas_i_hpa_shard_slabs_nactive_huge)},
+	{NAME("ndirty_nonhuge"),
+		CTL(stats_arenas_i_hpa_shard_slabs_ndirty_nonhuge)},
+	{NAME("ndirty_huge"),
+		CTL(stats_arenas_i_hpa_shard_slabs_ndirty_huge)}
+};
+
 static const ctl_named_node_t stats_arenas_i_hpa_shard_full_slabs_node[] = {
 	{NAME("npageslabs_nonhuge"),
 		CTL(stats_arenas_i_hpa_shard_full_slabs_npageslabs_nonhuge)},
@@ -827,19 +857,25 @@ static const ctl_indexed_node_t stats_arenas_i_hpa_shard_nonfull_slabs_node[] =
 };
 
 static const ctl_named_node_t stats_arenas_i_hpa_shard_node[] = {
-	{NAME("full_slabs"),	CHILD(named,
-	    stats_arenas_i_hpa_shard_full_slabs)},
-	{NAME("empty_slabs"),	CHILD(named,
-	    stats_arenas_i_hpa_shard_empty_slabs)},
-	{NAME("nonfull_slabs"),	CHILD(indexed,
-	    stats_arenas_i_hpa_shard_nonfull_slabs)},
+	{NAME("npageslabs"),	CTL(stats_arenas_i_hpa_shard_npageslabs)},
+	{NAME("nactive"),	CTL(stats_arenas_i_hpa_shard_nactive)},
+	{NAME("ndirty"),	CTL(stats_arenas_i_hpa_shard_ndirty)},
+
+	{NAME("slabs"),	CHILD(named, stats_arenas_i_hpa_shard_slabs)},
 
 	{NAME("npurge_passes"),	CTL(stats_arenas_i_hpa_shard_npurge_passes)},
 	{NAME("npurges"),	CTL(stats_arenas_i_hpa_shard_npurges)},
 	{NAME("nhugifies"),	CTL(stats_arenas_i_hpa_shard_nhugifies)},
 	{NAME("nhugify_failures"),
 	    CTL(stats_arenas_i_hpa_shard_nhugify_failures)},
-	{NAME("ndehugifies"),	CTL(stats_arenas_i_hpa_shard_ndehugifies)}
+	{NAME("ndehugifies"),	CTL(stats_arenas_i_hpa_shard_ndehugifies)},
+
+	{NAME("full_slabs"),	CHILD(named,
+	    stats_arenas_i_hpa_shard_full_slabs)},
+	{NAME("empty_slabs"),	CHILD(named,
+	    stats_arenas_i_hpa_shard_empty_slabs)},
+	{NAME("nonfull_slabs"),	CHILD(indexed,
+	    stats_arenas_i_hpa_shard_nonfull_slabs)}
 };
 
 static const ctl_named_node_t stats_arenas_i_node[] = {
@@ -4061,6 +4097,29 @@ stats_arenas_i_extents_j_index(tsdn_t *tsdn, const size_t *mib,
 	return super_stats_arenas_i_extents_j_node;
 }
 
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_npageslabs,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.merged.npageslabs, size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nactive,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.merged.nactive, size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_ndirty,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.merged.ndirty, size_t);
+
+/* Nonhuge slabs */
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_slabs_npageslabs_nonhuge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.slabs[0].npageslabs, size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_slabs_nactive_nonhuge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.slabs[0].nactive, size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_slabs_ndirty_nonhuge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.slabs[0].ndirty, size_t);
+
+/* Huge slabs */
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_slabs_npageslabs_huge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.slabs[1].npageslabs, size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_slabs_nactive_huge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.slabs[1].nactive, size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_slabs_ndirty_huge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.slabs[1].ndirty, size_t);
+
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_npurge_passes,
     arenas_i(mib[2])->astats->hpastats.nonderived_stats.npurge_passes, uint64_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_npurges,
diff --git a/src/psset.c b/src/psset.c
index 55966816..9a833193 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -11,7 +11,6 @@ psset_init(psset_t *psset) {
 		hpdata_age_heap_new(&psset->pageslabs[i]);
 	}
 	fb_init(psset->pageslab_bitmap, PSSET_NPSIZES);
-	memset(&psset->merged_stats, 0, sizeof(psset->merged_stats));
 	memset(&psset->stats, 0, sizeof(psset->stats));
 	hpdata_empty_list_init(&psset->empty);
 	for (int i = 0; i < PSSET_NPURGE_LISTS; i++) {
@@ -30,10 +29,14 @@ psset_bin_stats_accum(psset_bin_stats_t *dst, psset_bin_stats_t *src) {
 
 void
 psset_stats_accum(psset_stats_t *dst, psset_stats_t *src) {
-	psset_bin_stats_accum(&dst->full_slabs[0], &src->full_slabs[0]);
-	psset_bin_stats_accum(&dst->full_slabs[1], &src->full_slabs[1]);
-	psset_bin_stats_accum(&dst->empty_slabs[0], &src->empty_slabs[0]);
-	psset_bin_stats_accum(&dst->empty_slabs[1], &src->empty_slabs[1]);
+	psset_bin_stats_accum(&dst->merged, &src->merged);
+	for (int huge = 0; huge < PSSET_NHUGE; huge++) {
+		psset_bin_stats_accum(&dst->slabs[huge], &src->slabs[huge]);
+		psset_bin_stats_accum(&dst->full_slabs[huge],
+		    &src->full_slabs[huge]);
+		psset_bin_stats_accum(&dst->empty_slabs[huge],
+		    &src->empty_slabs[huge]);
+	}
 	for (pszind_t i = 0; i < PSSET_NPSIZES; i++) {
 		psset_bin_stats_accum(&dst->nonfull_slabs[i][0],
 		    &src->nonfull_slabs[i][0]);
@@ -48,48 +51,76 @@ psset_stats_accum(psset_stats_t *dst, psset_stats_t *src) {
  * bin) when we call psset_update_end.
  */
 JEMALLOC_ALWAYS_INLINE void
-psset_bin_stats_insert_remove(psset_t *psset, psset_bin_stats_t *binstats,
-    hpdata_t *ps, bool insert) {
+psset_slab_stats_insert_remove(psset_stats_t *stats,
+    psset_bin_stats_t *binstats, hpdata_t *ps, bool insert) {
 	size_t mul = insert ? (size_t)1 : (size_t)-1;
+	size_t nactive = hpdata_nactive_get(ps);
+	size_t ndirty = hpdata_ndirty_get(ps);
+
+	stats->merged.npageslabs += mul * 1;
+	stats->merged.nactive += mul * nactive;
+	stats->merged.ndirty += mul * ndirty;
+
+	/*
+	 * Stats above are necessary for purging logic to work, everything
+	 * below is to improve observability, thense is optional, so we don't
+	 * update it, when stats disabled.
+	 */
+	if (!config_stats) {
+		return;
+	}
+
 	size_t huge_idx = (size_t)hpdata_huge_get(ps);
 
-	binstats[huge_idx].npageslabs += mul * 1;
-	binstats[huge_idx].nactive += mul * hpdata_nactive_get(ps);
-	binstats[huge_idx].ndirty += mul * hpdata_ndirty_get(ps);
+	stats->slabs[huge_idx].npageslabs += mul * 1;
+	stats->slabs[huge_idx].nactive += mul * nactive;
+	stats->slabs[huge_idx].ndirty += mul * ndirty;
 
-	psset->merged_stats.npageslabs += mul * 1;
-	psset->merged_stats.nactive += mul * hpdata_nactive_get(ps);
-	psset->merged_stats.ndirty += mul * hpdata_ndirty_get(ps);
+	binstats[huge_idx].npageslabs += mul * 1;
+	binstats[huge_idx].nactive += mul * nactive;
+	binstats[huge_idx].ndirty += mul * ndirty;
 
 	if (config_debug) {
-		psset_bin_stats_t check_stats = {0};
-		for (size_t huge = 0; huge <= 1; huge++) {
-			psset_bin_stats_accum(&check_stats,
-			    &psset->stats.full_slabs[huge]);
-			psset_bin_stats_accum(&check_stats,
-			    &psset->stats.empty_slabs[huge]);
+		psset_bin_stats_t check_stats[PSSET_NHUGE] = {{0}};
+		for (int huge = 0; huge < PSSET_NHUGE; huge++) {
+			psset_bin_stats_accum(&check_stats[huge],
+			    &stats->full_slabs[huge]);
+			psset_bin_stats_accum(&check_stats[huge],
+			    &stats->empty_slabs[huge]);
 			for (pszind_t pind = 0; pind < PSSET_NPSIZES; pind++) {
-				psset_bin_stats_accum(&check_stats,
-				    &psset->stats.nonfull_slabs[pind][huge]);
+				psset_bin_stats_accum(&check_stats[huge],
+				    &stats->nonfull_slabs[pind][huge]);
 			}
 		}
-		assert(psset->merged_stats.npageslabs
-		    == check_stats.npageslabs);
-		assert(psset->merged_stats.nactive == check_stats.nactive);
-		assert(psset->merged_stats.ndirty == check_stats.ndirty);
+
+		assert(stats->merged.npageslabs
+		    == check_stats[0].npageslabs + check_stats[1].npageslabs);
+		assert(stats->merged.nactive
+		    == check_stats[0].nactive + check_stats[1].nactive);
+		assert(stats->merged.ndirty
+		    == check_stats[0].ndirty + check_stats[1].ndirty);
+
+		for (int huge = 0; huge < PSSET_NHUGE; huge++) {
+			assert(stats->slabs[huge].npageslabs
+			    == check_stats[huge].npageslabs);
+			assert(stats->slabs[huge].nactive
+			    == check_stats[huge].nactive);
+			assert(stats->slabs[huge].ndirty
+			    == check_stats[huge].ndirty);
+		}
 	}
 }
 
 static void
-psset_bin_stats_insert(psset_t *psset, psset_bin_stats_t *binstats,
+psset_slab_stats_insert(psset_stats_t *stats, psset_bin_stats_t *binstats,
     hpdata_t *ps) {
-	psset_bin_stats_insert_remove(psset, binstats, ps, true);
+	psset_slab_stats_insert_remove(stats, binstats, ps, true);
 }
 
 static void
-psset_bin_stats_remove(psset_t *psset, psset_bin_stats_t *binstats,
+psset_slab_stats_remove(psset_stats_t *stats, psset_bin_stats_t *binstats,
     hpdata_t *ps) {
-	psset_bin_stats_insert_remove(psset, binstats, ps, false);
+	psset_slab_stats_insert_remove(stats, binstats, ps, false);
 }
 
 static pszind_t
@@ -122,27 +153,29 @@ psset_hpdata_heap_insert(psset_t *psset, hpdata_t *ps) {
 }
 
 static void
-psset_stats_insert(psset_t* psset, hpdata_t *ps) {
+psset_stats_insert(psset_t *psset, hpdata_t *ps) {
+	psset_stats_t *stats = &psset->stats;
 	if (hpdata_empty(ps)) {
-		psset_bin_stats_insert(psset, psset->stats.empty_slabs, ps);
+		psset_slab_stats_insert(stats, psset->stats.empty_slabs, ps);
 	} else if (hpdata_full(ps)) {
-		psset_bin_stats_insert(psset, psset->stats.full_slabs, ps);
+		psset_slab_stats_insert(stats, psset->stats.full_slabs, ps);
 	} else {
 		pszind_t pind = psset_hpdata_heap_index(ps);
-		psset_bin_stats_insert(psset, psset->stats.nonfull_slabs[pind],
+		psset_slab_stats_insert(stats, psset->stats.nonfull_slabs[pind],
 		    ps);
 	}
 }
 
 static void
 psset_stats_remove(psset_t *psset, hpdata_t *ps) {
+	psset_stats_t *stats = &psset->stats;
 	if (hpdata_empty(ps)) {
-		psset_bin_stats_remove(psset, psset->stats.empty_slabs, ps);
+		psset_slab_stats_remove(stats, psset->stats.empty_slabs, ps);
 	} else if (hpdata_full(ps)) {
-		psset_bin_stats_remove(psset, psset->stats.full_slabs, ps);
+		psset_slab_stats_remove(stats, psset->stats.full_slabs, ps);
 	} else {
 		pszind_t pind = psset_hpdata_heap_index(ps);
-		psset_bin_stats_remove(psset, psset->stats.nonfull_slabs[pind],
+		psset_slab_stats_remove(stats, psset->stats.nonfull_slabs[pind],
 		    ps);
 	}
 }
diff --git a/src/stats.c b/src/stats.c
index 7fbaa5cc..b28b9942 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -841,12 +841,48 @@ stats_arena_hpa_shard_sec_print(emitter_t *emitter, unsigned i) {
 static void
 stats_arena_hpa_shard_counters_print(emitter_t *emitter, unsigned i,
     uint64_t uptime) {
+	size_t npageslabs;
+	size_t nactive;
+	size_t ndirty;
+
+	size_t npageslabs_nonhuge;
+	size_t nactive_nonhuge;
+	size_t ndirty_nonhuge;
+	size_t nretained_nonhuge;
+
+	size_t npageslabs_huge;
+	size_t nactive_huge;
+	size_t ndirty_huge;
+
 	uint64_t npurge_passes;
 	uint64_t npurges;
 	uint64_t nhugifies;
 	uint64_t nhugify_failures;
 	uint64_t ndehugifies;
 
+	CTL_M2_GET("stats.arenas.0.hpa_shard.npageslabs",
+	    i, &npageslabs, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.nactive",
+	    i, &nactive, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.ndirty",
+	    i, &ndirty, size_t);
+
+	CTL_M2_GET("stats.arenas.0.hpa_shard.slabs.npageslabs_nonhuge",
+	    i, &npageslabs_nonhuge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.slabs.nactive_nonhuge",
+	    i, &nactive_nonhuge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.slabs.ndirty_nonhuge",
+	    i, &ndirty_nonhuge, size_t);
+	nretained_nonhuge = npageslabs_nonhuge * HUGEPAGE_PAGES
+	    - nactive_nonhuge - ndirty_nonhuge;
+
+	CTL_M2_GET("stats.arenas.0.hpa_shard.slabs.npageslabs_huge",
+	    i, &npageslabs_huge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.slabs.nactive_huge",
+	    i, &nactive_huge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.slabs.ndirty_huge",
+	    i, &ndirty_huge, size_t);
+
 	CTL_M2_GET("stats.arenas.0.hpa_shard.npurge_passes",
 	    i, &npurge_passes, uint64_t);
 	CTL_M2_GET("stats.arenas.0.hpa_shard.npurges",
@@ -860,18 +896,33 @@ stats_arena_hpa_shard_counters_print(emitter_t *emitter, unsigned i,
 
 	emitter_table_printf(emitter,
 	    "HPA shard stats:\n"
+	    "  Pageslabs: %zu (%zu huge, %zu nonhuge)\n"
+	    "  Active pages: %zu (%zu huge, %zu nonhuge)\n"
+	    "  Dirty pages: %zu (%zu huge, %zu nonhuge)\n"
+	    "  Retained pages: %zu\n"
 	    "  Purge passes: %" FMTu64 " (%" FMTu64 " / sec)\n"
 	    "  Purges: %" FMTu64 " (%" FMTu64 " / sec)\n"
 	    "  Hugeifies: %" FMTu64 " (%" FMTu64 " / sec)\n"
 	    "  Hugify failures: %" FMTu64 " (%" FMTu64 " / sec)\n"
 	    "  Dehugifies: %" FMTu64 " (%" FMTu64 " / sec)\n"
 	    "\n",
+	    npageslabs, npageslabs_huge, npageslabs_nonhuge,
+	    nactive, nactive_huge, nactive_nonhuge,
+	    ndirty, ndirty_huge, ndirty_nonhuge,
+	    nretained_nonhuge,
 	    npurge_passes, rate_per_second(npurge_passes, uptime),
 	    npurges, rate_per_second(npurges, uptime),
 	    nhugifies, rate_per_second(nhugifies, uptime),
 	    nhugify_failures, rate_per_second(nhugify_failures, uptime),
 	    ndehugifies, rate_per_second(ndehugifies, uptime));
 
+	emitter_json_kv(emitter, "npageslabs", emitter_type_size,
+	    &npageslabs);
+	emitter_json_kv(emitter, "nactive", emitter_type_size,
+	    &nactive);
+	emitter_json_kv(emitter, "ndirty", emitter_type_size,
+	    &ndirty);
+
 	emitter_json_kv(emitter, "npurge_passes", emitter_type_uint64,
 	    &npurge_passes);
 	emitter_json_kv(emitter, "npurges", emitter_type_uint64,
@@ -882,6 +933,24 @@ stats_arena_hpa_shard_counters_print(emitter_t *emitter, unsigned i,
 	    &nhugify_failures);
 	emitter_json_kv(emitter, "ndehugifies", emitter_type_uint64,
 	    &ndehugifies);
+
+	emitter_json_object_kv_begin(emitter, "slabs");
+	emitter_json_kv(emitter, "npageslabs_nonhuge", emitter_type_size,
+	    &npageslabs_nonhuge);
+	emitter_json_kv(emitter, "nactive_nonhuge", emitter_type_size,
+	    &nactive_nonhuge);
+	emitter_json_kv(emitter, "ndirty_nonhuge", emitter_type_size,
+	    &ndirty_nonhuge);
+	emitter_json_kv(emitter, "nretained_nonhuge", emitter_type_size,
+	    &nretained_nonhuge);
+
+	emitter_json_kv(emitter, "npageslabs_huge", emitter_type_size,
+	    &npageslabs_huge);
+	emitter_json_kv(emitter, "nactive_huge", emitter_type_size,
+	    &nactive_huge);
+	emitter_json_kv(emitter, "ndirty_huge", emitter_type_size,
+	    &ndirty_huge);
+	emitter_json_object_end(emitter); /* End "slabs" */
 }
 
 static void
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 8c8fb18c..6784306f 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -1002,6 +1002,63 @@ TEST_BEGIN(test_stats_arenas) {
 }
 TEST_END
 
+TEST_BEGIN(test_stats_arenas_hpa_shard_counters) {
+	test_skip_if(!config_stats);
+
+#define TEST_STATS_ARENAS_HPA_SHARD_COUNTERS(t, name) do {		\
+	t name;								\
+	size_t sz = sizeof(t);						\
+	expect_d_eq(mallctl("stats.arenas.0.hpa_shard."#name,		\
+	    (void *)&name, &sz,						\
+	    NULL, 0), 0, "Unexpected mallctl() failure");		\
+} while (0)
+
+	TEST_STATS_ARENAS_HPA_SHARD_COUNTERS(size_t, npageslabs);
+	TEST_STATS_ARENAS_HPA_SHARD_COUNTERS(size_t, nactive);
+	TEST_STATS_ARENAS_HPA_SHARD_COUNTERS(size_t, ndirty);
+	TEST_STATS_ARENAS_HPA_SHARD_COUNTERS(uint64_t, npurge_passes);
+	TEST_STATS_ARENAS_HPA_SHARD_COUNTERS(uint64_t, npurges);
+	TEST_STATS_ARENAS_HPA_SHARD_COUNTERS(uint64_t, nhugifies);
+	TEST_STATS_ARENAS_HPA_SHARD_COUNTERS(uint64_t, ndehugifies);
+
+#undef TEST_STATS_ARENAS_HPA_SHARD_COUNTERS
+}
+TEST_END
+
+TEST_BEGIN(test_stats_arenas_hpa_shard_slabs) {
+	test_skip_if(!config_stats);
+
+#define TEST_STATS_ARENAS_HPA_SHARD_SLABS_GEN(t, slab, name) do {	\
+	t slab##_##name;						\
+	size_t sz = sizeof(t);						\
+	expect_d_eq(mallctl("stats.arenas.0.hpa_shard."#slab"."#name,	\
+	    (void *)&slab##_##name, &sz,				\
+	    NULL, 0), 0, "Unexpected mallctl() failure");		\
+} while (0)
+
+#define TEST_STATS_ARENAS_HPA_SHARD_SLABS(t, slab, name) do {		\
+	TEST_STATS_ARENAS_HPA_SHARD_SLABS_GEN(t, slab,			\
+	    name##_##nonhuge);						\
+	TEST_STATS_ARENAS_HPA_SHARD_SLABS_GEN(t, slab, name##_##huge);	\
+} while (0)
+
+	TEST_STATS_ARENAS_HPA_SHARD_SLABS(size_t, slabs, npageslabs);
+	TEST_STATS_ARENAS_HPA_SHARD_SLABS(size_t, slabs, nactive);
+	TEST_STATS_ARENAS_HPA_SHARD_SLABS(size_t, slabs, ndirty);
+
+	TEST_STATS_ARENAS_HPA_SHARD_SLABS(size_t, full_slabs, npageslabs);
+	TEST_STATS_ARENAS_HPA_SHARD_SLABS(size_t, full_slabs, nactive);
+	TEST_STATS_ARENAS_HPA_SHARD_SLABS(size_t, full_slabs, ndirty);
+
+	TEST_STATS_ARENAS_HPA_SHARD_SLABS(size_t, empty_slabs, npageslabs);
+	TEST_STATS_ARENAS_HPA_SHARD_SLABS(size_t, empty_slabs, nactive);
+	TEST_STATS_ARENAS_HPA_SHARD_SLABS(size_t, empty_slabs, ndirty);
+
+#undef TEST_STATS_ARENAS_HPA_SHARD_SLABS
+#undef TEST_STATS_ARENAS_HPA_SHARD_SLABS_GEN
+}
+TEST_END
+
 static void
 alloc_hook(void *extra, UNUSED hook_alloc_t type, UNUSED void *result,
     UNUSED uintptr_t result_raw, UNUSED uintptr_t args_raw[3]) {
@@ -1321,6 +1378,8 @@ main(void) {
 	    test_arenas_lookup,
 	    test_prof_active,
 	    test_stats_arenas,
+	    test_stats_arenas_hpa_shard_counters,
+	    test_stats_arenas_hpa_shard_slabs,
 	    test_hooks,
 	    test_hooks_exhaustion,
 	    test_thread_idle,
diff --git a/test/unit/psset.c b/test/unit/psset.c
index 6ff72012..6bfdbb5f 100644
--- a/test/unit/psset.c
+++ b/test/unit/psset.c
@@ -64,6 +64,24 @@ test_psset_alloc_reuse(psset_t *psset, edata_t *r_edata, size_t size) {
 	return false;
 }
 
+static hpdata_t *
+test_psset_hugify(psset_t *psset, edata_t *edata) {
+	hpdata_t *ps = edata_ps_get(edata);
+	psset_update_begin(psset, ps);
+	hpdata_hugify(ps);
+	psset_update_end(psset, ps);
+	return ps;
+}
+
+static hpdata_t *
+test_psset_dehugify(psset_t *psset, edata_t *edata) {
+	hpdata_t *ps = edata_ps_get(edata);
+	psset_update_begin(psset, ps);
+	hpdata_dehugify(ps);
+	psset_update_end(psset, ps);
+	return ps;
+}
+
 static hpdata_t *
 test_psset_dalloc(psset_t *psset, edata_t *edata) {
 	hpdata_t *ps = edata_ps_get(edata);
@@ -339,6 +357,149 @@ TEST_BEGIN(test_multi_pageslab) {
 }
 TEST_END
 
+TEST_BEGIN(test_stats_merged) {
+	hpdata_t pageslab;
+	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
+
+	edata_t alloc[HUGEPAGE_PAGES];
+
+	psset_t psset;
+	psset_init(&psset);
+	expect_zu_eq(0, psset.stats.merged.npageslabs, "");
+	expect_zu_eq(0, psset.stats.merged.nactive, "");
+	expect_zu_eq(0, psset.stats.merged.ndirty, "");
+
+	edata_init_test(&alloc[0]);
+	test_psset_alloc_new(&psset, &pageslab, &alloc[0], PAGE);
+	for (size_t i = 1; i < HUGEPAGE_PAGES; i++) {
+		expect_zu_eq(1, psset.stats.merged.npageslabs, "");
+		expect_zu_eq(i, psset.stats.merged.nactive, "");
+		expect_zu_eq(0, psset.stats.merged.ndirty, "");
+
+		edata_init_test(&alloc[i]);
+		bool err = test_psset_alloc_reuse(&psset, &alloc[i], PAGE);
+		expect_false(err, "Nonempty psset failed page allocation.");
+	}
+	expect_zu_eq(1, psset.stats.merged.npageslabs, "");
+	expect_zu_eq(HUGEPAGE_PAGES, psset.stats.merged.nactive, "");
+	expect_zu_eq(0, psset.stats.merged.ndirty, "");
+
+	for (ssize_t i = HUGEPAGE_PAGES - 1; i > 0; i--) {
+		test_psset_dalloc(&psset, &alloc[i]);
+		expect_zu_eq(1, psset.stats.merged.npageslabs, "");
+		expect_zu_eq(i, psset.stats.merged.nactive, "");
+		expect_zu_eq(HUGEPAGE_PAGES - i, psset.stats.merged.ndirty, "");
+	}
+	/* No allocations have left. */
+	test_psset_dalloc(&psset, &alloc[0]);
+	expect_zu_eq(0, psset.stats.merged.npageslabs, "");
+	expect_zu_eq(0, psset.stats.merged.nactive, "");
+
+	/*
+	 * Last test_psset_dalloc call removed empty pageslab from psset, so
+	 * nothing has left there, even no dirty pages.
+	 */
+	expect_zu_eq(0, psset.stats.merged.ndirty, "");
+
+	test_psset_alloc_new(&psset, &pageslab, &alloc[0], PAGE);
+	expect_zu_eq(1, psset.stats.merged.npageslabs, "");
+	expect_zu_eq(1, psset.stats.merged.nactive, "");
+	expect_zu_eq(0, psset.stats.merged.ndirty, "");
+
+	psset_update_begin(&psset, &pageslab);
+	expect_zu_eq(0, psset.stats.merged.npageslabs, "");
+	expect_zu_eq(0, psset.stats.merged.nactive, "");
+	expect_zu_eq(0, psset.stats.merged.ndirty, "");
+
+	psset_update_end(&psset, &pageslab);
+	expect_zu_eq(1, psset.stats.merged.npageslabs, "");
+	expect_zu_eq(1, psset.stats.merged.nactive, "");
+	expect_zu_eq(0, psset.stats.merged.ndirty, "");
+}
+TEST_END
+
+TEST_BEGIN(test_stats_huge) {
+	test_skip_if(!config_stats);
+
+	hpdata_t pageslab;
+	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
+
+	edata_t alloc[HUGEPAGE_PAGES];
+
+	psset_t psset;
+	psset_init(&psset);
+	for (int huge = 0; huge < PSSET_NHUGE; ++huge) {
+		expect_zu_eq(0, psset.stats.slabs[huge].npageslabs, "");
+		expect_zu_eq(0, psset.stats.slabs[huge].nactive, "");
+		expect_zu_eq(0, psset.stats.slabs[huge].ndirty, "");
+	}
+
+	edata_init_test(&alloc[0]);
+	test_psset_alloc_new(&psset, &pageslab, &alloc[0], PAGE);
+	for (size_t i = 1; i < HUGEPAGE_PAGES; i++) {
+		expect_zu_eq(1, psset.stats.slabs[0].npageslabs, "");
+		expect_zu_eq(i, psset.stats.slabs[0].nactive, "");
+		expect_zu_eq(0, psset.stats.slabs[0].ndirty, "");
+
+		expect_zu_eq(0, psset.stats.slabs[1].npageslabs, "");
+		expect_zu_eq(0, psset.stats.slabs[1].nactive, "");
+		expect_zu_eq(0, psset.stats.slabs[1].ndirty, "");
+
+		edata_init_test(&alloc[i]);
+		bool err = test_psset_alloc_reuse(&psset, &alloc[i], PAGE);
+		expect_false(err, "Nonempty psset failed page allocation.");
+	}
+	expect_zu_eq(1, psset.stats.slabs[0].npageslabs, "");
+	expect_zu_eq(HUGEPAGE_PAGES, psset.stats.slabs[0].nactive, "");
+	expect_zu_eq(0, psset.stats.slabs[0].ndirty, "");
+
+	expect_zu_eq(0, psset.stats.slabs[1].npageslabs, "");
+	expect_zu_eq(0, psset.stats.slabs[1].nactive, "");
+	expect_zu_eq(0, psset.stats.slabs[1].ndirty, "");
+
+	test_psset_hugify(&psset, &alloc[0]);
+
+	/* All stats should been moved from nonhuge to huge. */
+	expect_zu_eq(0, psset.stats.slabs[0].npageslabs, "");
+	expect_zu_eq(0, psset.stats.slabs[0].nactive, "");
+	expect_zu_eq(0, psset.stats.slabs[0].ndirty, "");
+
+	expect_zu_eq(1, psset.stats.slabs[1].npageslabs, "");
+	expect_zu_eq(HUGEPAGE_PAGES, psset.stats.slabs[1].nactive, "");
+	expect_zu_eq(0, psset.stats.slabs[1].ndirty, "");
+
+	test_psset_dehugify(&psset, &alloc[0]);
+
+	/* And back from huge to nonhuge after dehugification. */
+	expect_zu_eq(1, psset.stats.slabs[0].npageslabs, "");
+	expect_zu_eq(HUGEPAGE_PAGES, psset.stats.slabs[0].nactive, "");
+	expect_zu_eq(0, psset.stats.slabs[0].ndirty, "");
+
+	expect_zu_eq(0, psset.stats.slabs[1].npageslabs, "");
+	expect_zu_eq(0, psset.stats.slabs[1].nactive, "");
+	expect_zu_eq(0, psset.stats.slabs[1].ndirty, "");
+
+	for (ssize_t i = HUGEPAGE_PAGES - 1; i > 0; i--) {
+		test_psset_dalloc(&psset, &alloc[i]);
+
+		expect_zu_eq(1, psset.stats.slabs[0].npageslabs, "");
+		expect_zu_eq(i, psset.stats.slabs[0].nactive, "");
+		expect_zu_eq(HUGEPAGE_PAGES - i, psset.stats.slabs[0].ndirty, "");
+
+		expect_zu_eq(0, psset.stats.slabs[1].npageslabs, "");
+		expect_zu_eq(0, psset.stats.slabs[1].nactive, "");
+		expect_zu_eq(0, psset.stats.slabs[1].ndirty, "");
+	}
+	test_psset_dalloc(&psset, &alloc[0]);
+
+	for (int huge = 0; huge < PSSET_NHUGE; huge++) {
+		expect_zu_eq(0, psset.stats.slabs[huge].npageslabs, "");
+		expect_zu_eq(0, psset.stats.slabs[huge].nactive, "");
+		expect_zu_eq(0, psset.stats.slabs[huge].ndirty, "");
+	}
+}
+TEST_END
+
 static void
 stats_expect_empty(psset_bin_stats_t *stats) {
 	assert_zu_eq(0, stats->npageslabs,
@@ -379,7 +540,9 @@ stats_expect(psset_t *psset, size_t nactive) {
 	expect_zu_eq(nactive, psset_nactive(psset), "");
 }
 
-TEST_BEGIN(test_stats) {
+TEST_BEGIN(test_stats_fullness) {
+	test_skip_if(!config_stats);
+
 	bool err;
 
 	hpdata_t pageslab;
@@ -739,7 +902,9 @@ main(void) {
 	    test_reuse,
 	    test_evict,
 	    test_multi_pageslab,
-	    test_stats,
+	    test_stats_merged,
+	    test_stats_huge,
+	    test_stats_fullness,
 	    test_oldest_fit,
 	    test_insert_remove,
 	    test_purge_prefers_nonhuge,

From 46690c9ec036cede074476caa05ecd6fe954bd23 Mon Sep 17 00:00:00 2001
From: Dmitry Ilvokhin <d@ilvokhin.com>
Date: Thu, 28 Nov 2024 07:10:33 -0800
Subject: [PATCH 242/395] Fix `test_retained` on boxes with a lot of CPUs

We are trying to create `ncpus * 2` threads for this test and place them
into `VARIABLE_ARRAY`, but `VARIABLE_ARRAY` can not be more than
`VARIABLE_ARRAY_SIZE_MAX` bytes. When there are a lot of threads on the
box test always fails.

```
$ nproc
176

$ make -j`nproc` tests_unit && ./test/unit/retained
<jemalloc>: ../test/unit/retained.c:123: Failed assertion:
"sizeof(thd_t) * (nthreads) <= VARIABLE_ARRAY_SIZE_MAX"
Aborted (core dumped)
```

There is no need for high concurrency for this test as we are only
checking stats there and it's behaviour is quite stable regarding number
of allocating threads.

Limited number of threads to 16 to save compute resources (on CI for
example) and reduce tests running time.

Before the change (`nproc` is 80 on this box).

```
$ make -j`nproc` tests_unit && time ./test/unit/retained
<...>
real    0m0.372s
user    0m14.236s
sys     0m12.338s
```

After the change (same box).

```
$ make -j`nproc` tests_unit && time ./test/unit/retained
<...>
real    0m0.018s
user    0m0.108s
sys     0m0.068s
```
---
 test/unit/retained.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/test/unit/retained.c b/test/unit/retained.c
index 340f2d38..40cbb0cd 100644
--- a/test/unit/retained.c
+++ b/test/unit/retained.c
@@ -110,8 +110,15 @@ TEST_BEGIN(test_retained) {
 	atomic_store_u(&epoch, 0, ATOMIC_RELAXED);
 
 	unsigned nthreads = ncpus * 2;
-	if (LG_SIZEOF_PTR < 3 && nthreads > 16) {
-		nthreads = 16; /* 32-bit platform could run out of vaddr. */
+	if (nthreads > 16) {
+		/*
+		 * Limit number of threads we are creating for following
+		 * reasons.
+		 * 1. On 32-bit platforms could run out of vaddr.
+		 * 2. On boxes with a lot of CPUs we might have not enough
+		 *    memory to fit thd_t into VARIABLE_ARRAY.
+		 */
+		nthreads = 16;
 	}
 	VARIABLE_ARRAY(thd_t, threads, nthreads);
 	for (unsigned i = 0; i < nthreads; i++) {

From 6786934280392e71a1e14d48b331d4eca58550a7 Mon Sep 17 00:00:00 2001
From: Guangli Dai <gdai@meta.com>
Date: Mon, 9 Dec 2024 14:24:19 -0800
Subject: [PATCH 243/395] Fix ehooks assertion for arena creation

---
 src/ehooks.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/ehooks.c b/src/ehooks.c
index fc2355e6..89e30409 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -53,7 +53,7 @@ ehooks_default_alloc_impl(tsdn_t *tsdn, void *new_addr, size_t size,
     size_t alignment, bool *zero, bool *commit, unsigned arena_ind) {
 	arena_t *arena = arena_get(tsdn, arena_ind, false);
 	/* NULL arena indicates arena_create. */
-	assert(arena != NULL || alignment == HUGEPAGE);
+	assert(arena != NULL || alignment == BASE_BLOCK_MIN_ALIGN);
 	dss_prec_t dss = (arena == NULL) ? dss_prec_disabled :
 	    (dss_prec_t)atomic_load_u(&arena->dss_prec, ATOMIC_RELAXED);
 	void *ret = extent_alloc_core(tsdn, arena, new_addr, size, alignment,

From a17385a882c252a292299ab047d13fc3b2d6fb16 Mon Sep 17 00:00:00 2001
From: Guangli Dai <gdai@meta.com>
Date: Fri, 13 Dec 2024 15:06:06 -0800
Subject: [PATCH 244/395] Enable large hugepage tests for arm64 on Travis

---
 .travis.yml           |  3 +++
 scripts/gen_travis.py | 17 +++++++++++++++++
 2 files changed, 20 insertions(+)

diff --git a/.travis.yml b/.travis.yml
index aad7eea7..ceda8989 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -295,6 +295,9 @@ jobs:
     - os: linux
       arch: arm64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: arm64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-lg-hugepage=29" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: arm64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
index ae0b9e2e..43457967 100755
--- a/scripts/gen_travis.py
+++ b/scripts/gen_travis.py
@@ -96,6 +96,15 @@ class Option(object):
         return (isinstance(obj, Option) and obj.type == self.type
                 and obj.value == self.value)
 
+    def __repr__(self):
+        type_names = {
+            Option.Type.COMPILER: 'COMPILER',
+            Option.Type.COMPILER_FLAG: 'COMPILER_FLAG',
+            Option.Type.CONFIGURE_FLAG: 'CONFIGURE_FLAG',
+            Option.Type.MALLOC_CONF: 'MALLOC_CONF',
+            Option.Type.FEATURE: 'FEATURE'
+        }
+        return f"Option({type_names[self.type]}, {repr(self.value)})"
 
 # The 'default' configuration is gcc, on linux, with no compiler or configure
 # flags.  We also test with clang, -m32, --enable-debug, --enable-prof,
@@ -125,7 +134,9 @@ configure_flag_unusuals = [Option.as_configure_flag(opt) for opt in (
     '--disable-libdl',
     '--enable-opt-safety-checks',
     '--with-lg-page=16',
+    '--with-lg-page=16 --with-lg-hugepage=29',
 )]
+LARGE_HUGEPAGE = Option.as_configure_flag("--with-lg-page=16 --with-lg-hugepage=29")
 
 
 malloc_conf_unusuals = [Option.as_malloc_conf(opt) for opt in (
@@ -250,6 +261,9 @@ def generate_linux(arch):
         # Avoid 32 bit build on ARM64
         exclude = (CROSS_COMPILE_32BIT,)
 
+    if arch != ARM64:
+        exclude += [LARGE_HUGEPAGE]
+
     return generate_jobs(os, arch, exclude, max_unusual_opts)
 
 
@@ -264,6 +278,9 @@ def generate_macos(arch):
         [Option.as_configure_flag('--enable-prof')] +
         [CLANG,])
 
+    if arch != ARM64:
+        exclude += [LARGE_HUGEPAGE]
+
     return generate_jobs(os, arch, exclude, max_unusual_opts)
 
 

From 587676fee8a77046e67d3ae8eb26e5456b6da481 Mon Sep 17 00:00:00 2001
From: Guangli Dai <gdai@meta.com>
Date: Wed, 11 Dec 2024 15:24:26 -0800
Subject: [PATCH 245/395] Disable psset test when hugepage size is too large.

---
 include/jemalloc/internal/hpa.h |  1 +
 src/hpa.c                       |  7 ++++++-
 test/unit/psset.c               | 12 ++++++++++++
 3 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index 4c410c40..1f90a15f 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -151,6 +151,7 @@ struct hpa_shard_s {
 	nstime_t last_purge;
 };
 
+bool hpa_hugepage_size_exceeds_limit();
 /*
  * Whether or not the HPA can be used given the current configuration.  This is
  * is not necessarily a guarantee that it backs its allocations by hugepages,
diff --git a/src/hpa.c b/src/hpa.c
index 14541413..cb3f978c 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -24,6 +24,11 @@ static void hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self,
     edata_list_active_t *list, bool *deferred_work_generated);
 static uint64_t hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self);
 
+bool
+hpa_hugepage_size_exceeds_limit() {
+	return HUGEPAGE > HUGEPAGE_MAX_EXPECTED_SIZE;
+}
+
 bool
 hpa_supported(void) {
 #ifdef _WIN32
@@ -52,7 +57,7 @@ hpa_supported(void) {
 		return false;
 	}
 	/* As mentioned in pages.h, do not support If HUGEPAGE is too large. */
-	if (HUGEPAGE > HUGEPAGE_MAX_EXPECTED_SIZE) {
+	if (hpa_hugepage_size_exceeds_limit()) {
 		return false;
 	}
 	return true;
diff --git a/test/unit/psset.c b/test/unit/psset.c
index 6bfdbb5f..c400f3b9 100644
--- a/test/unit/psset.c
+++ b/test/unit/psset.c
@@ -120,6 +120,7 @@ edata_expect(edata_t *edata, size_t page_offset, size_t page_cnt) {
 }
 
 TEST_BEGIN(test_empty) {
+	test_skip_if(hpa_hugepage_size_exceeds_limit());
 	bool err;
 	hpdata_t pageslab;
 	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
@@ -137,6 +138,7 @@ TEST_BEGIN(test_empty) {
 TEST_END
 
 TEST_BEGIN(test_fill) {
+	test_skip_if(hpa_hugepage_size_exceeds_limit());
 	bool err;
 
 	hpdata_t pageslab;
@@ -169,6 +171,7 @@ TEST_BEGIN(test_fill) {
 TEST_END
 
 TEST_BEGIN(test_reuse) {
+	test_skip_if(hpa_hugepage_size_exceeds_limit());
 	bool err;
 	hpdata_t *ps;
 
@@ -261,6 +264,7 @@ TEST_BEGIN(test_reuse) {
 TEST_END
 
 TEST_BEGIN(test_evict) {
+	test_skip_if(hpa_hugepage_size_exceeds_limit());
 	bool err;
 	hpdata_t *ps;
 
@@ -295,6 +299,7 @@ TEST_BEGIN(test_evict) {
 TEST_END
 
 TEST_BEGIN(test_multi_pageslab) {
+	test_skip_if(hpa_hugepage_size_exceeds_limit());
 	bool err;
 	hpdata_t *ps;
 
@@ -420,6 +425,7 @@ TEST_END
 
 TEST_BEGIN(test_stats_huge) {
 	test_skip_if(!config_stats);
+	test_skip_if(hpa_hugepage_size_exceeds_limit());
 
 	hpdata_t pageslab;
 	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
@@ -542,6 +548,7 @@ stats_expect(psset_t *psset, size_t nactive) {
 
 TEST_BEGIN(test_stats_fullness) {
 	test_skip_if(!config_stats);
+	test_skip_if(hpa_hugepage_size_exceeds_limit());
 
 	bool err;
 
@@ -637,6 +644,7 @@ init_test_pageslabs(psset_t *psset, hpdata_t *pageslab,
 }
 
 TEST_BEGIN(test_oldest_fit) {
+	test_skip_if(hpa_hugepage_size_exceeds_limit());
 	bool err;
 	edata_t alloc[HUGEPAGE_PAGES];
 	edata_t worse_alloc[HUGEPAGE_PAGES];
@@ -660,6 +668,7 @@ TEST_BEGIN(test_oldest_fit) {
 TEST_END
 
 TEST_BEGIN(test_insert_remove) {
+	test_skip_if(hpa_hugepage_size_exceeds_limit());
 	bool err;
 	hpdata_t *ps;
 	edata_t alloc[HUGEPAGE_PAGES];
@@ -706,6 +715,7 @@ TEST_BEGIN(test_insert_remove) {
 TEST_END
 
 TEST_BEGIN(test_purge_prefers_nonhuge) {
+	test_skip_if(hpa_hugepage_size_exceeds_limit());
 	/*
 	 * All else being equal, we should prefer purging non-huge pages over
 	 * huge ones for non-empty extents.
@@ -789,6 +799,7 @@ TEST_BEGIN(test_purge_prefers_nonhuge) {
 TEST_END
 
 TEST_BEGIN(test_purge_prefers_empty) {
+	test_skip_if(hpa_hugepage_size_exceeds_limit());
 	void *ptr;
 
 	psset_t psset;
@@ -825,6 +836,7 @@ TEST_BEGIN(test_purge_prefers_empty) {
 TEST_END
 
 TEST_BEGIN(test_purge_prefers_empty_huge) {
+	test_skip_if(hpa_hugepage_size_exceeds_limit());
 	void *ptr;
 
 	psset_t psset;

From d8486b2653dc54f4d836e389960f627ab56cb8b4 Mon Sep 17 00:00:00 2001
From: appujee <124090381+appujee@users.noreply.github.com>
Date: Fri, 25 Oct 2024 14:00:32 -0700
Subject: [PATCH 246/395] Remove unreachable() macro as c23 already defines it.

Taken from https://android-review.git.corp.google.com/c/platform/external/jemalloc_new/+/3316478

This might need more cleanups to remove the definition of JEMALLOC_INTERNAL_UNREACHABLE.
---
 include/jemalloc/internal/util.h | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h
index 24f23629..b400f231 100644
--- a/include/jemalloc/internal/util.h
+++ b/include/jemalloc/internal/util.h
@@ -39,12 +39,6 @@
 #  define unlikely(x) !!(x)
 #endif
 
-#if !defined(JEMALLOC_INTERNAL_UNREACHABLE)
-#  error JEMALLOC_INTERNAL_UNREACHABLE should have been defined by configure
-#endif
-
-#define unreachable() JEMALLOC_INTERNAL_UNREACHABLE()
-
 /* Set error code. */
 UTIL_INLINE void
 set_errno(int errnum) {

From 4b88bddbcac1f994034eb5d7485fd35663c3d325 Mon Sep 17 00:00:00 2001
From: appujee <124090381+appujee@users.noreply.github.com>
Date: Wed, 6 Nov 2024 13:14:10 -0800
Subject: [PATCH 247/395] Conditionally remove unreachable for C23+

---
 include/jemalloc/internal/util.h | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h
index b400f231..6646386e 100644
--- a/include/jemalloc/internal/util.h
+++ b/include/jemalloc/internal/util.h
@@ -39,6 +39,15 @@
 #  define unlikely(x) !!(x)
 #endif
 
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
+#include <stddef.h>
+#else
+#if !defined(JEMALLOC_INTERNAL_UNREACHABLE)
+#  error JEMALLOC_INTERNAL_UNREACHABLE should have been defined by configure
+#endif
+#define unreachable() JEMALLOC_INTERNAL_UNREACHABLE()
+#endif
+
 /* Set error code. */
 UTIL_INLINE void
 set_errno(int errnum) {

From 17881ebbfd76529904e826f425f3266834cf3a75 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dan=20Hor=C3=A1k?= <dan@danny.cz>
Date: Fri, 8 Nov 2024 15:34:06 +0000
Subject: [PATCH 248/395] Add configure check for gettid() presence

The gettid() function is available on Linux in glibc only since version
2.30. There are supported distributions that still use older glibc
version. Thus add a configure check if the gettid() function is
available and extend the check in src/prof_stack_range.c so it's skipped
also when gettid() isn't available.

Fixes: https://github.com/jemalloc/jemalloc/issues/2740
---
 configure.ac           | 9 +++++++++
 src/prof_stack_range.c | 2 +-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index a330e33e..e5fb3a6d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2706,6 +2706,15 @@ if test "x${je_cv_pthread_mutex_adaptive_np}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP], [ ], [ ])
 fi
 
+JE_COMPILABLE([gettid], [
+#include <unistd.h>
+], [
+  int tid = gettid();
+], [je_cv_gettid])
+if test "x${je_cv_gettid}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_HAVE_GETTID], [ ], [ ])
+fi
+
 JE_CFLAGS_SAVE()
 JE_CFLAGS_ADD([-D_GNU_SOURCE])
 JE_CFLAGS_ADD([-Werror])
diff --git a/src/prof_stack_range.c b/src/prof_stack_range.c
index c3458044..1f40dcc5 100644
--- a/src/prof_stack_range.c
+++ b/src/prof_stack_range.c
@@ -4,7 +4,7 @@
 #include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/prof_sys.h"
 
-#if defined (__linux__)
+#if defined (__linux__) && defined(JE_HAVE_GETTID)
 
 #include <errno.h>
 #include <fcntl.h>

From 52fa9577ba8fa94f41c8c92f845a74c3fb04db80 Mon Sep 17 00:00:00 2001
From: Dmitry Ilvokhin <d@ilvokhin.com>
Date: Tue, 14 Jan 2025 10:46:39 -0800
Subject: [PATCH 249/395] Fix integer overflow in test/unit/hash.c

`final[3]` is `uint8_t`. Integer conversion rank of `uint8_t` is lower
than integer conversion rank of `int`, so `uint8_t` got promoted to
`int`, which is signed integer type. Shift `final[3]` value left on 24,
when leftmost bit is set overflows `int` and it is undefined behaviour.

Before this change Undefined Behaviour Sanitizer was unhappy about it
with the following message.

```
../test/unit/hash.c:119:25: runtime error: left shift of 176 by 24
places cannot be represented in type 'int'
```

After this commit problem is gone.
---
 test/unit/hash.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/test/unit/hash.c b/test/unit/hash.c
index 17c66ec6..7276333d 100644
--- a/test/unit/hash.c
+++ b/test/unit/hash.c
@@ -115,8 +115,11 @@ hash_variant_verify_key(hash_variant_t variant, uint8_t *key) {
 	} default: not_reached();
 	}
 
-	computed = (final[0] << 0) | (final[1] << 8) | (final[2] << 16) |
-	    (final[3] << 24);
+	computed =
+	    ((uint32_t)final[0] << 0) |
+	    ((uint32_t)final[1] << 8) |
+	    ((uint32_t)final[2] << 16) |
+	    ((uint32_t)final[3] << 24);
 
 	switch (variant) {
 #ifdef JEMALLOC_BIG_ENDIAN

From 20cc983314ecf14ac08ccf0d60ce7e41f88babf6 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 21 Jan 2025 15:06:02 -0800
Subject: [PATCH 250/395] Fix the gettid() detection caught by @mrluanma .

---
 include/jemalloc/internal/jemalloc_internal_defs.h.in | 3 +++
 src/prof_stack_range.c                                | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 5cf77f47..742d599d 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -411,6 +411,9 @@
 /* Adaptive mutex support in pthreads. */
 #undef JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP
 
+/* gettid() support */
+#undef JEMALLOC_HAVE_GETTID
+
 /* GNU specific sched_getcpu support */
 #undef JEMALLOC_HAVE_SCHED_GETCPU
 
diff --git a/src/prof_stack_range.c b/src/prof_stack_range.c
index 1f40dcc5..6a99b56f 100644
--- a/src/prof_stack_range.c
+++ b/src/prof_stack_range.c
@@ -4,7 +4,7 @@
 #include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/prof_sys.h"
 
-#if defined (__linux__) && defined(JE_HAVE_GETTID)
+#if defined (__linux__) && defined(JEMALLOC_HAVE_GETTID)
 
 #include <errno.h>
 #include <fcntl.h>

From 607b86603532b59c35cfdf9abd61a0c14966092b Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 21 Jan 2025 15:15:56 -0800
Subject: [PATCH 251/395] Check for 0 input when setting max_background_thread
 through mallctl.

Reported by @nc7s.
---
 src/ctl.c                            | 3 ++-
 test/unit/background_thread_enable.c | 5 +++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/ctl.c b/src/ctl.c
index 66844105..b0fc0487 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -2181,7 +2181,8 @@ max_background_threads_ctl(tsd_t *tsd, const size_t *mib,
 			ret = 0;
 			goto label_return;
 		}
-		if (newval > opt_max_background_threads) {
+		if (newval > opt_max_background_threads ||
+		    newval == 0) {
 			ret = EINVAL;
 			goto label_return;
 		}
diff --git a/test/unit/background_thread_enable.c b/test/unit/background_thread_enable.c
index 5f42feff..3a2d55ac 100644
--- a/test/unit/background_thread_enable.c
+++ b/test/unit/background_thread_enable.c
@@ -54,6 +54,9 @@ TEST_BEGIN(test_max_background_threads) {
 	    "opt.max_background_threads should match");
 	expect_d_eq(mallctl("max_background_threads", NULL, NULL, &max_n_thds,
 	    sz_m), 0, "Failed to set max background threads");
+	size_t size_zero = 0;
+	expect_d_ne(mallctl("max_background_threads", NULL, NULL, &size_zero,
+	    sz_m), 0, "Should not allow zero background threads");
 
 	unsigned id;
 	size_t sz_u = sizeof(unsigned);
@@ -80,6 +83,8 @@ TEST_BEGIN(test_max_background_threads) {
 	new_max_thds = 1;
 	expect_d_eq(mallctl("max_background_threads", NULL, NULL, &new_max_thds,
 	    sz_m), 0, "Failed to set max background threads");
+	expect_d_ne(mallctl("max_background_threads", NULL, NULL, &size_zero,
+	    sz_m), 0, "Should not allow zero background threads");
 	expect_zu_eq(n_background_threads, new_max_thds,
 	    "Number of background threads should be 1.\n");
 }

From ef8e512e2916a7c2dfca289e9113324b87324723 Mon Sep 17 00:00:00 2001
From: Dmitry Ilvokhin <d@ilvokhin.com>
Date: Fri, 24 Jan 2025 07:48:58 -0800
Subject: [PATCH 252/395] Fix `bitmap_ffu` out of range read

We tried to load `g` from `bitmap[i]` before checking it is actually a
valid load. Tweaked a loop a bit to `break` early, when we are done
scanning for bits.

Before this commit undefined behaviour sanitizer from GCC 14+ was
unhappy at `test/unit/bitmap` test with following error.

```
../include/jemalloc/internal/bitmap.h:293:5: runtime error: load of
address 0x7bb1c2e08008 with insufficient space for an object of type
'const bitmap_t'
<...>
    #0 0x62671a149954 in bitmap_ffu ../include/jemalloc/internal/bitmap.h:293
    #1 0x62671a149954 in test_bitmap_xfu_body ../test/unit/bitmap.c:275
    #2 0x62671a14b767 in test_bitmap_xfu ../test/unit/bitmap.c:323
    #3 0x62671a376ad1 in p_test_impl ../test/src/test.c:149
    #4 0x62671a377135 in p_test ../test/src/test.c:200
    #5 0x62671a13da06 in main ../test/unit/bitmap.c:336
<...>
```
---
 include/jemalloc/internal/bitmap.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/bitmap.h b/include/jemalloc/internal/bitmap.h
index e501da47..8cd5f5a3 100644
--- a/include/jemalloc/internal/bitmap.h
+++ b/include/jemalloc/internal/bitmap.h
@@ -284,14 +284,17 @@ bitmap_ffu(const bitmap_t *bitmap, const bitmap_info_t *binfo, size_t min_bit) {
 	bitmap_t g = bitmap[i] & ~((1LU << (min_bit & BITMAP_GROUP_NBITS_MASK))
 	    - 1);
 	size_t bit;
-	do {
+	while (1) {
 		if (g != 0) {
 			bit = ffs_lu(g);
 			return (i << LG_BITMAP_GROUP_NBITS) + bit;
 		}
 		i++;
+		if (i >= binfo->ngroups) {
+			break;
+		}
 		g = bitmap[i];
-	} while (i < binfo->ngroups);
+	}
 	return binfo->nbits;
 #endif
 }

From 257e64b968ec40c285331dfb6e3db8a2b34999d1 Mon Sep 17 00:00:00 2001
From: Shai Duvdevani <duv@meta.com>
Date: Wed, 29 Jan 2025 15:25:10 -0800
Subject: [PATCH 253/395] Unlike `prof_sample` which is supported only with
 profiling mode active, `prof_threshold` is intended to be an always-supported
 allocation callback with much less overhead. The usage of the threshold
 allows performance critical callers to change program execution based on the
 callback: e.g. drop caches when memory becomes high or to predict the program
 is about to OOM ahead of time using peak memory watermarks.

---
 Makefile.in                                   |   3 +
 include/jemalloc/internal/prof_externs.h      |   4 +
 include/jemalloc/internal/prof_hook.h         |   5 +
 include/jemalloc/internal/prof_threshold.h    |  11 ++
 include/jemalloc/internal/thread_event.h      |   1 +
 include/jemalloc/internal/tsd_internals.h     |   2 +
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |   1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |   3 +
 .../projects/vc2017/jemalloc/jemalloc.vcxproj |   1 +
 .../vc2017/jemalloc/jemalloc.vcxproj.filters  |   3 +
 .../projects/vc2019/jemalloc/jemalloc.vcxproj |   1 +
 .../vc2019/jemalloc/jemalloc.vcxproj.filters  |   3 +
 .../projects/vc2022/jemalloc/jemalloc.vcxproj |   1 +
 .../vc2022/jemalloc/jemalloc.vcxproj.filters  |   3 +
 src/ctl.c                                     |  31 ++++++
 src/jemalloc.c                                |   4 +
 src/prof_threshold.c                          |  57 ++++++++++
 src/thread_event.c                            |   5 +
 test/unit/mallctl.c                           |   1 +
 test/unit/prof_threshold.c                    | 103 ++++++++++++++++++
 test/unit/prof_threshold_small.c              |   2 +
 test/unit/prof_threshold_small.sh             |   1 +
 22 files changed, 246 insertions(+)
 create mode 100644 include/jemalloc/internal/prof_threshold.h
 create mode 100644 src/prof_threshold.c
 create mode 100644 test/unit/prof_threshold.c
 create mode 100644 test/unit/prof_threshold_small.c
 create mode 100644 test/unit/prof_threshold_small.sh

diff --git a/Makefile.in b/Makefile.in
index 27eb90d3..1914fc28 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -145,6 +145,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/prof_stack_range.c \
 	$(srcroot)src/prof_stats.c \
 	$(srcroot)src/prof_sys.c \
+	$(srcroot)src/prof_threshold.c \
 	$(srcroot)src/psset.c \
 	$(srcroot)src/rtree.c \
 	$(srcroot)src/safety_check.c \
@@ -266,6 +267,8 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/prof_stats.c \
 	$(srcroot)test/unit/prof_tctx.c \
 	$(srcroot)test/unit/prof_thread_name.c \
+	$(srcroot)test/unit/prof_threshold.c \
+	$(srcroot)test/unit/prof_threshold_small.c \
 	$(srcroot)test/unit/prof_sys_thread_name.c \
 	$(srcroot)test/unit/psset.c \
 	$(srcroot)test/unit/ql.c \
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 952ace7d..789e3811 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -11,6 +11,7 @@ extern bool opt_prof_active;
 extern bool opt_prof_thread_active_init;
 extern unsigned opt_prof_bt_max;
 extern size_t opt_lg_prof_sample;    /* Mean bytes between samples. */
+extern size_t opt_experimental_lg_prof_threshold;    /* Mean bytes between thresholds. */
 extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */
 extern bool opt_prof_gdump;          /* High-water memory dumping. */
 extern bool opt_prof_final;          /* Final profile dumping. */
@@ -67,6 +68,9 @@ prof_sample_hook_t prof_sample_hook_get(void);
 void prof_sample_free_hook_set(prof_sample_free_hook_t hook);
 prof_sample_free_hook_t prof_sample_free_hook_get(void);
 
+void prof_threshold_hook_set(prof_threshold_hook_t hook);
+prof_threshold_hook_t prof_threshold_hook_get(void);
+
 /* Functions only accessed in prof_inlines.h */
 prof_tdata_t *prof_tdata_init(tsd_t *tsd);
 prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
diff --git a/include/jemalloc/internal/prof_hook.h b/include/jemalloc/internal/prof_hook.h
index 087dadc6..2f3a81af 100644
--- a/include/jemalloc/internal/prof_hook.h
+++ b/include/jemalloc/internal/prof_hook.h
@@ -26,4 +26,9 @@ typedef void (*prof_sample_hook_t)(const void *ptr, size_t size, void **backtrac
 /* ptr, size */
 typedef void (*prof_sample_free_hook_t)(const void *, size_t);
 
+/*
+ * A callback hook that notifies when an allocation threshold has been crossed.
+ */
+typedef void (*prof_threshold_hook_t)(uint64_t alloc, uint64_t dealloc, uint64_t peak);
+
 #endif /* JEMALLOC_INTERNAL_PROF_HOOK_H */
diff --git a/include/jemalloc/internal/prof_threshold.h b/include/jemalloc/internal/prof_threshold.h
new file mode 100644
index 00000000..dc9c8f2b
--- /dev/null
+++ b/include/jemalloc/internal/prof_threshold.h
@@ -0,0 +1,11 @@
+#ifndef JEMALLOC_INTERNAL_THRESHOLD_EVENT_H
+#define JEMALLOC_INTERNAL_THRESHOLD_EVENT_H
+
+#include "jemalloc/internal/tsd_types.h"
+
+/* The activity-triggered hooks. */
+uint64_t prof_threshold_new_event_wait(tsd_t *tsd);
+uint64_t prof_threshold_postponed_event_wait(tsd_t *tsd);
+void prof_threshold_event_handler(tsd_t *tsd, uint64_t elapsed);
+
+#endif /* JEMALLOC_INTERNAL_THRESHOLD_EVENT_H */
diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h
index 46c57ed5..ad46ffe7 100644
--- a/include/jemalloc/internal/thread_event.h
+++ b/include/jemalloc/internal/thread_event.h
@@ -56,6 +56,7 @@ void tsd_te_init(tsd_t *tsd);
 #define ITERATE_OVER_ALL_EVENTS						\
     E(tcache_gc,		(opt_tcache_gc_incr_bytes > 0), true)	\
     E(prof_sample,		(config_prof && opt_prof), true)  	\
+    E(prof_threshold,		config_stats, true)  			\
     E(stats_interval,		(opt_stats_interval >= 0), true)   	\
     E(tcache_gc_dalloc,		(opt_tcache_gc_incr_bytes > 0), false)	\
     E(peak_alloc,		config_stats, true)			\
diff --git a/include/jemalloc/internal/tsd_internals.h b/include/jemalloc/internal/tsd_internals.h
index 439f1d10..0ed33234 100644
--- a/include/jemalloc/internal/tsd_internals.h
+++ b/include/jemalloc/internal/tsd_internals.h
@@ -72,6 +72,7 @@ typedef ql_elm(tsd_t) tsd_link_t;
     O(tcache_gc_dalloc_event_wait,	uint64_t,	uint64_t)	\
     O(prof_sample_event_wait,	uint64_t,		uint64_t)	\
     O(prof_sample_last_event,	uint64_t,		uint64_t)	\
+    O(prof_threshold_event_wait,	uint64_t,	uint64_t)	\
     O(stats_interval_event_wait,	uint64_t,	uint64_t)	\
     O(stats_interval_last_event,	uint64_t,	uint64_t)	\
     O(peak_alloc_event_wait,	uint64_t,		uint64_t)	\
@@ -105,6 +106,7 @@ typedef ql_elm(tsd_t) tsd_link_t;
     /* tcache_gc_dalloc_event_wait */	0,				\
     /* prof_sample_event_wait */	0,				\
     /* prof_sample_last_event */	0,				\
+    /* prof_threshold_event_wait */	0,				\
     /* stats_interval_event_wait */	0,				\
     /* stats_interval_last_event */	0,				\
     /* peak_alloc_event_wait */		0,				\
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 58bd7b3e..c43b30b1 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -83,6 +83,7 @@
     <ClCompile Include="..\..\..\..\src\prof_recent.c" />
     <ClCompile Include="..\..\..\..\src\prof_stats.c" />
     <ClCompile Include="..\..\..\..\src\prof_sys.c" />
+    <ClCompile Include="..\..\..\..\src\prof_threshold.c" />
     <ClCompile Include="..\..\..\..\src\psset.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\safety_check.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 82ad3e35..f091475e 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -133,6 +133,9 @@
     <ClCompile Include="..\..\..\..\src\prof_sys.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_threshold.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\psset.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 6e59c035..a195f6b3 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -83,6 +83,7 @@
     <ClCompile Include="..\..\..\..\src\prof_recent.c" />
     <ClCompile Include="..\..\..\..\src\prof_stats.c" />
     <ClCompile Include="..\..\..\..\src\prof_sys.c" />
+    <ClCompile Include="..\..\..\..\src\prof_threshold.c" />
     <ClCompile Include="..\..\..\..\src\psset.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\safety_check.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 82ad3e35..f091475e 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -133,6 +133,9 @@
     <ClCompile Include="..\..\..\..\src\prof_sys.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_threshold.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\psset.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
index db06fc6d..cd16005d 100644
--- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
@@ -83,6 +83,7 @@
     <ClCompile Include="..\..\..\..\src\prof_recent.c" />
     <ClCompile Include="..\..\..\..\src\prof_stats.c" />
     <ClCompile Include="..\..\..\..\src\prof_sys.c" />
+    <ClCompile Include="..\..\..\..\src\prof_threshold.c" />
     <ClCompile Include="..\..\..\..\src\psset.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\safety_check.c" />
diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
index 82ad3e35..f091475e 100644
--- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
@@ -133,6 +133,9 @@
     <ClCompile Include="..\..\..\..\src\prof_sys.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_threshold.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\psset.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
index 01de0dcb..2d8c4be6 100644
--- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
@@ -83,6 +83,7 @@
     <ClCompile Include="..\..\..\..\src\prof_recent.c" />
     <ClCompile Include="..\..\..\..\src\prof_stats.c" />
     <ClCompile Include="..\..\..\..\src\prof_sys.c" />
+    <ClCompile Include="..\..\..\..\src\prof_threshold.c" />
     <ClCompile Include="..\..\..\..\src\psset.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\safety_check.c" />
diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
index 82ad3e35..f091475e 100644
--- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
@@ -133,6 +133,9 @@
     <ClCompile Include="..\..\..\..\src\prof_sys.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_threshold.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\psset.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/ctl.c b/src/ctl.c
index b0fc0487..1ebcbf8e 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -154,6 +154,7 @@ CTL_PROTO(opt_prof_active)
 CTL_PROTO(opt_prof_thread_active_init)
 CTL_PROTO(opt_prof_bt_max)
 CTL_PROTO(opt_lg_prof_sample)
+CTL_PROTO(opt_experimental_lg_prof_threshold)
 CTL_PROTO(opt_lg_prof_interval)
 CTL_PROTO(opt_prof_gdump)
 CTL_PROTO(opt_prof_final)
@@ -357,6 +358,7 @@ CTL_PROTO(experimental_hooks_prof_backtrace)
 CTL_PROTO(experimental_hooks_prof_dump)
 CTL_PROTO(experimental_hooks_prof_sample)
 CTL_PROTO(experimental_hooks_prof_sample_free)
+CTL_PROTO(experimental_hooks_prof_threshold)
 CTL_PROTO(experimental_hooks_safety_check_abort)
 CTL_PROTO(experimental_thread_activity_callback)
 CTL_PROTO(experimental_utilization_query)
@@ -539,6 +541,7 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("prof_thread_active_init"), CTL(opt_prof_thread_active_init)},
 	{NAME("prof_bt_max"), CTL(opt_prof_bt_max)},
 	{NAME("lg_prof_sample"), CTL(opt_lg_prof_sample)},
+	{NAME("experimental_lg_prof_threshold"), CTL(opt_experimental_lg_prof_threshold)},
 	{NAME("lg_prof_interval"), CTL(opt_lg_prof_interval)},
 	{NAME("prof_gdump"),	CTL(opt_prof_gdump)},
 	{NAME("prof_final"),	CTL(opt_prof_final)},
@@ -965,6 +968,7 @@ static const ctl_named_node_t experimental_hooks_node[] = {
 	{NAME("prof_dump"),	CTL(experimental_hooks_prof_dump)},
 	{NAME("prof_sample"),	CTL(experimental_hooks_prof_sample)},
 	{NAME("prof_sample_free"),	CTL(experimental_hooks_prof_sample_free)},
+	{NAME("prof_threshold"),	CTL(experimental_hooks_prof_threshold)},
 	{NAME("safety_check_abort"),	CTL(experimental_hooks_safety_check_abort)},
 };
 
@@ -2317,6 +2321,7 @@ CTL_RO_NL_CGEN(config_prof, opt_prof_thread_active_init,
     opt_prof_thread_active_init, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_bt_max, opt_prof_bt_max, unsigned)
 CTL_RO_NL_CGEN(config_prof, opt_lg_prof_sample, opt_lg_prof_sample, size_t)
+CTL_RO_NL_CGEN(config_prof, opt_experimental_lg_prof_threshold, opt_experimental_lg_prof_threshold, size_t)
 CTL_RO_NL_CGEN(config_prof, opt_prof_accum, opt_prof_accum, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_pid_namespace, opt_prof_pid_namespace,
     bool)
@@ -3778,6 +3783,32 @@ label_return:
 	return ret;
 }
 
+
+static int
+experimental_hooks_prof_threshold_ctl(tsd_t *tsd, const size_t *mib,
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+
+	if (oldp == NULL && newp == NULL) {
+		ret = EINVAL;
+		goto label_return;
+	}
+	if (oldp != NULL) {
+		prof_threshold_hook_t old_hook =
+		    prof_threshold_hook_get();
+		READ(old_hook, prof_threshold_hook_t);
+	}
+	if (newp != NULL) {
+		prof_threshold_hook_t new_hook JEMALLOC_CC_SILENCE_INIT(NULL);
+		WRITE(new_hook, prof_threshold_hook_t);
+		prof_threshold_hook_set(new_hook);
+	}
+	ret = 0;
+label_return:
+	return ret;
+}
+
+
 /* For integration test purpose only.  No plan to move out of experimental. */
 static int
 experimental_hooks_safety_check_abort_ctl(tsd_t *tsd, const size_t *mib,
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 67be7681..6d2f6494 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1619,6 +1619,10 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				    "lg_prof_sample", 0, (sizeof(uint64_t) << 3)
 				    - 1, CONF_DONT_CHECK_MIN, CONF_CHECK_MAX,
 				    true)
+				CONF_HANDLE_SIZE_T(opt_experimental_lg_prof_threshold,
+				    "experimental_lg_prof_threshold", 0, (sizeof(uint64_t) << 3)
+				    - 1, CONF_DONT_CHECK_MIN, CONF_CHECK_MAX,
+				    true)
 				CONF_HANDLE_BOOL(opt_prof_accum, "prof_accum")
 				CONF_HANDLE_UNSIGNED(opt_prof_bt_max, "prof_bt_max",
 				    1, PROF_BT_MAX_LIMIT, CONF_CHECK_MIN, CONF_CHECK_MAX,
diff --git a/src/prof_threshold.c b/src/prof_threshold.c
new file mode 100644
index 00000000..28a525fc
--- /dev/null
+++ b/src/prof_threshold.c
@@ -0,0 +1,57 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/activity_callback.h"
+#include "jemalloc/internal/prof_threshold.h"
+
+#include "jemalloc/internal/prof_externs.h"
+
+/*
+ * Update every 128MB by default.
+ */
+#define PROF_THRESHOLD_LG_WAIT_DEFAULT 27
+
+/* Logically a prof_threshold_hook_t. */
+static atomic_p_t prof_threshold_hook;
+size_t opt_experimental_lg_prof_threshold = PROF_THRESHOLD_LG_WAIT_DEFAULT;
+
+void
+prof_threshold_hook_set(prof_threshold_hook_t hook) {
+	atomic_store_p(&prof_threshold_hook, hook, ATOMIC_RELEASE);
+}
+
+prof_threshold_hook_t
+prof_threshold_hook_get(void) {
+	return (prof_threshold_hook_t)atomic_load_p(&prof_threshold_hook,
+	    ATOMIC_ACQUIRE);
+}
+
+/* Invoke callback for threshold reached */
+static void
+prof_threshold_update(tsd_t *tsd) {
+	prof_threshold_hook_t prof_threshold_hook = prof_threshold_hook_get();
+	if (prof_threshold_hook == NULL) {
+		return;
+        }
+	uint64_t alloc = tsd_thread_allocated_get(tsd);
+	uint64_t dalloc = tsd_thread_deallocated_get(tsd);
+	peak_t *peak = tsd_peakp_get(tsd);
+	pre_reentrancy(tsd, NULL);
+	prof_threshold_hook(alloc, dalloc, peak->cur_max);
+	post_reentrancy(tsd);
+}
+
+uint64_t
+prof_threshold_new_event_wait(tsd_t *tsd) {
+	return 1 << opt_experimental_lg_prof_threshold;
+}
+
+uint64_t
+prof_threshold_postponed_event_wait(tsd_t *tsd) {
+	return TE_MIN_START_WAIT;
+}
+
+void
+prof_threshold_event_handler(tsd_t *tsd, uint64_t elapsed) {
+	prof_threshold_update(tsd);
+}
diff --git a/src/thread_event.c b/src/thread_event.c
index 37eb5827..a8276cd7 100644
--- a/src/thread_event.c
+++ b/src/thread_event.c
@@ -69,6 +69,11 @@ peak_dalloc_fetch_elapsed(tsd_t *tsd) {
 	return TE_INVALID_ELAPSED;
 }
 
+static uint64_t
+prof_threshold_fetch_elapsed(tsd_t *tsd) {
+	return TE_INVALID_ELAPSED;
+}
+
 /* Per event facilities done. */
 
 static bool
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 6784306f..02fedaa7 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -319,6 +319,7 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(bool, prof_active, prof);
 	TEST_MALLCTL_OPT(unsigned, prof_bt_max, prof);
 	TEST_MALLCTL_OPT(ssize_t, lg_prof_sample, prof);
+	TEST_MALLCTL_OPT(ssize_t, experimental_lg_prof_threshold, prof);
 	TEST_MALLCTL_OPT(bool, prof_accum, prof);
 	TEST_MALLCTL_OPT(bool, prof_pid_namespace, prof);
 	TEST_MALLCTL_OPT(ssize_t, lg_prof_interval, prof);
diff --git a/test/unit/prof_threshold.c b/test/unit/prof_threshold.c
new file mode 100644
index 00000000..48e9df19
--- /dev/null
+++ b/test/unit/prof_threshold.c
@@ -0,0 +1,103 @@
+#include "test/jemalloc_test.h"
+
+/* Test config (set in reset_test_config) */
+#define ALLOC_ITERATIONS_IN_THRESHOLD 10
+uint64_t threshold_bytes = 0;
+uint64_t chunk_size = 0;
+
+/* Test globals for calblack */
+uint64_t hook_calls = 0;
+uint64_t last_peak = 0;
+uint64_t last_alloc = 0;
+uint64_t alloc_baseline = 0;
+
+void
+mock_prof_threshold_hook(uint64_t alloc, uint64_t dealloc, uint64_t peak) {
+	hook_calls++;
+	last_peak = peak;
+	last_alloc = alloc;
+}
+
+/* Need the do_write flag because NULL is a valid to_write value. */
+static void
+read_write_prof_threshold_hook(prof_threshold_hook_t *to_read, bool do_write,
+    prof_threshold_hook_t to_write) {
+	size_t hook_sz = sizeof(prof_threshold_hook_t);
+	expect_d_eq(mallctl("experimental.hooks.prof_threshold",
+	    (void *)to_read, &hook_sz, do_write ? &to_write : NULL, hook_sz), 0,
+	    "Unexpected prof_threshold_hook mallctl failure");
+}
+
+static void
+write_prof_threshold_hook(prof_threshold_hook_t new_hook) {
+	read_write_prof_threshold_hook(NULL, true, new_hook);
+}
+
+static prof_threshold_hook_t
+read_prof_threshold_hook() {
+	prof_threshold_hook_t hook;
+	read_write_prof_threshold_hook(&hook, false, NULL);
+	return hook;
+}
+
+static void reset_test_config() {
+	hook_calls = 0;
+	last_peak = 0;
+	alloc_baseline = last_alloc; /* We run the test multiple times */
+	last_alloc = 0;
+	threshold_bytes = 1 << opt_experimental_lg_prof_threshold;
+	chunk_size = threshold_bytes / ALLOC_ITERATIONS_IN_THRESHOLD;
+}
+
+static void expect_threshold_calls(int calls) {
+	expect_zu_eq(hook_calls, calls, "Hook called the right amount of times");
+	expect_u64_lt(last_peak, chunk_size * 2, "We allocate chunk_size at a time");
+	expect_u64_ge(last_alloc, threshold_bytes * calls + alloc_baseline, "Crosses");
+}
+
+static void allocate_chunks(int chunks) {
+	for (int i = 0; i < chunks; i++) {
+		void* p = mallocx(chunk_size, 0);
+		expect_ptr_not_null(p, "Failed to allocate");
+		free(p);
+	}
+}
+
+TEST_BEGIN(test_prof_threshold_hook) {
+	/* Test setting and reading the hook (both value and null) */
+	write_prof_threshold_hook(mock_prof_threshold_hook);
+	expect_ptr_eq(read_prof_threshold_hook(), mock_prof_threshold_hook, "Unexpected hook");
+
+	write_prof_threshold_hook(NULL);
+	expect_ptr_null(read_prof_threshold_hook(), "Hook was erased");
+
+	/* Reset everything before the test */
+	reset_test_config();
+	write_prof_threshold_hook(mock_prof_threshold_hook);
+
+	int err = mallctl("thread.peak.reset", NULL, NULL, NULL, 0);
+	expect_d_eq(err, 0, "Peak reset failed");
+
+	/* Note that since we run this test multiple times and we don't reset
+	   the allocation counter, each time we offset the callback by the
+	   amount we allocate over the threshold. */
+
+	/* A simple small allocation is not enough to trigger the callback */
+	allocate_chunks(1);
+	expect_zu_eq(hook_calls, 0, "Hook not called yet");
+
+	/* Enough allocations to trigger the callback */
+	allocate_chunks(ALLOC_ITERATIONS_IN_THRESHOLD);
+	expect_threshold_calls(1);
+
+	/* Enough allocations to trigger the callback again */
+	allocate_chunks(ALLOC_ITERATIONS_IN_THRESHOLD);
+	expect_threshold_calls(2);
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_prof_threshold_hook);
+}
diff --git a/test/unit/prof_threshold_small.c b/test/unit/prof_threshold_small.c
new file mode 100644
index 00000000..67f444b1
--- /dev/null
+++ b/test/unit/prof_threshold_small.c
@@ -0,0 +1,2 @@
+#include "test/jemalloc_test.h"
+#include "prof_threshold.c"
diff --git a/test/unit/prof_threshold_small.sh b/test/unit/prof_threshold_small.sh
new file mode 100644
index 00000000..62726069
--- /dev/null
+++ b/test/unit/prof_threshold_small.sh
@@ -0,0 +1 @@
+export MALLOC_CONF="experimental_lg_prof_threshold:22"

From 1abeae9ebd7b3c9f3ebb5e49db393149c37f18f9 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 29 Jan 2025 21:33:30 -0800
Subject: [PATCH 254/395] Fix test/unit/prof_threshold when !config_stats

---
 test/unit/prof_threshold.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/test/unit/prof_threshold.c b/test/unit/prof_threshold.c
index 48e9df19..2026c1c6 100644
--- a/test/unit/prof_threshold.c
+++ b/test/unit/prof_threshold.c
@@ -64,6 +64,8 @@ static void allocate_chunks(int chunks) {
 }
 
 TEST_BEGIN(test_prof_threshold_hook) {
+	test_skip_if(!config_stats);
+
 	/* Test setting and reading the hook (both value and null) */
 	write_prof_threshold_hook(mock_prof_threshold_hook);
 	expect_ptr_eq(read_prof_threshold_hook(), mock_prof_threshold_hook, "Unexpected hook");

From 3bc89cfecab89cdc2cd6ed8566e15b7fa4fdac88 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 30 Jan 2025 10:45:11 -0800
Subject: [PATCH 255/395] Avoid implicit conversion in test/unit/prof_threshold

---
 test/unit/prof_threshold.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/unit/prof_threshold.c b/test/unit/prof_threshold.c
index 2026c1c6..c6f53983 100644
--- a/test/unit/prof_threshold.c
+++ b/test/unit/prof_threshold.c
@@ -50,14 +50,14 @@ static void reset_test_config() {
 }
 
 static void expect_threshold_calls(int calls) {
-	expect_zu_eq(hook_calls, calls, "Hook called the right amount of times");
+	expect_u64_eq(hook_calls, calls, "Hook called the right amount of times");
 	expect_u64_lt(last_peak, chunk_size * 2, "We allocate chunk_size at a time");
 	expect_u64_ge(last_alloc, threshold_bytes * calls + alloc_baseline, "Crosses");
 }
 
 static void allocate_chunks(int chunks) {
 	for (int i = 0; i < chunks; i++) {
-		void* p = mallocx(chunk_size, 0);
+		void* p = mallocx((size_t)chunk_size, 0);
 		expect_ptr_not_null(p, "Failed to allocate");
 		free(p);
 	}
@@ -86,7 +86,7 @@ TEST_BEGIN(test_prof_threshold_hook) {
 
 	/* A simple small allocation is not enough to trigger the callback */
 	allocate_chunks(1);
-	expect_zu_eq(hook_calls, 0, "Hook not called yet");
+	expect_u64_eq(hook_calls, 0, "Hook not called yet");
 
 	/* Enough allocations to trigger the callback */
 	allocate_chunks(ALLOC_ITERATIONS_IN_THRESHOLD);

From 34c823f1479047990a73d0e9acf396c2e04fb6b1 Mon Sep 17 00:00:00 2001
From: Dmitry Ilvokhin <d@ilvokhin.com>
Date: Fri, 10 Jan 2025 10:51:21 -0800
Subject: [PATCH 256/395] Add autoconf options to enable sanitizers

This commit allows to enable sanitizers with autoconf options, instead
of modifying `CFLAGS`, `CXXFLAGS` and `LDFLAGS` directly.

* `--enable-tsan` option to enable Thread Sanitizer.
* `--enable-ubsan` option to enable Undefined Behaviour Sanitizer.

End goal is to speedup development by finding problems quickly, early
and easier. Eventually, when all current issues will be fixed, we can
enable sanitizers in CI. Fortunately, there are not a lot of problems we
need to fix.

Address Sanitizer is a bit controversial, because it replaces memory
allocator, so we decided to left it out for a while.

Below are couple of examples of how tests look like under different
sanitizers at the moment.

```
$  ../configure --enable-tsan --enable-debug
<...>
asan               : 0
tsan               : 1
ubsan              : 0
$ make -j`nproc` check
<...>
  Thread T13 (tid=332043, running) created by main thread at:
    #0 pthread_create <null> (libtsan.so.0+0x61748)
    #1 thd_create ../test/src/thd.c:25 (bin_batching+0x5631ca)
    #2 stress_run ../test/unit/bin_batching.c:148
(bin_batching+0x40364c)
    #3 test_races ../test/unit/bin_batching.c:249
(bin_batching+0x403d79)
    #4 p_test_impl ../test/src/test.c:149 (bin_batching+0x562811)
    #5 p_test_no_reentrancy ../test/src/test.c:213
(bin_batching+0x562d35)
    #6 main ../test/unit/bin_batching.c:268 (bin_batching+0x40417e)

SUMMARY: ThreadSanitizer: data race
../include/jemalloc/internal/edata.h:498 in edata_nfree_inc
```

```
$ ../configure --enable-ubsan --enable-debug
<...>
asan               : 0
tsan               : 0
ubsan              : 1
$ make -j`nproc` check
<...>
=== test/unit/hash ===
../test/unit/hash.c:119:16: runtime error: left shift of 176 by 24
places cannot be represented in type 'int'
<...>
```
---
 configure.ac | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 64 insertions(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index e5fb3a6d..7f59b3f1 100644
--- a/configure.ac
+++ b/configure.ac
@@ -92,6 +92,32 @@ AC_LANG_POP([C++])
 JE_CONCAT_VVV(CXXFLAGS, CONFIGURE_CXXFLAGS, SPECIFIED_CXXFLAGS)
 ])
 
+CONFIGURE_LDFLAGS=
+SPECIFIED_LDFLAGS="${LDFLAGS}"
+dnl JE_LDFLAGS_ADD(ldflag)
+dnl
+dnl LDFLAGS is the concatenation of CONFIGURE_LDFLAGS and SPECIFIED_LDFLAGS
+dnl This macro appends to CONFIGURE_LDFLAGS and regenerates LDFLAGS.
+AC_DEFUN([JE_LDFLAGS_ADD],
+[
+AC_MSG_CHECKING([whether linker supports $1])
+T_CONFIGURE_LDFLAGS="${CONFIGURE_LDFLAGS}"
+JE_APPEND_VS(CONFIGURE_LDFLAGS, $1)
+JE_CONCAT_VVV(LDFLAGS, CONFIGURE_LDFLAGS, SPECIFIED_LDFLAGS)
+AC_LINK_IFELSE([AC_LANG_PROGRAM(
+[[
+]], [[
+    return 0;
+]])],
+              [je_cv_ldflags_added=$1]
+              AC_MSG_RESULT([yes]),
+              [je_cv_ldflags_added=]
+              AC_MSG_RESULT([no])
+              [CONFIGURE_LDFLAGS="${T_CONFIGURE_LDFLAGS}"]
+)
+JE_CONCAT_VVV(LDFLAGS, CONFIGURE_LDFLAGS, SPECIFIED_LDFLAGS)
+])
+
 dnl JE_COMPILABLE(label, hcode, mcode, rvar)
 dnl
 dnl Use AC_LINK_IFELSE() rather than AC_COMPILE_IFELSE() so that linker errors
@@ -2647,6 +2673,40 @@ if test "x$enable_pageid" = "x1" ; then
   AC_DEFINE([JEMALLOC_PAGEID], [ ], [ ])
 fi
 
+AC_ARG_ENABLE([tsan],
+  [AS_HELP_STRING([--enable-tsan],
+                  [Enable thread sanitizer])],
+[if test "x$enable_tsan" = "xno" ; then
+  enable_tsan="0"
+else
+  enable_tsan="1"
+fi
+],
+[enable_tsan="0"]
+)
+if test "x$enable_tsan" = "x1" ; then
+  JE_CFLAGS_ADD([-fsanitize=thread])
+  JE_CXXFLAGS_ADD([-fsanitize=thread])
+  JE_LDFLAGS_ADD([-fsanitize=thread])
+fi
+
+AC_ARG_ENABLE([ubsan],
+  [AS_HELP_STRING([--enable-ubsan],
+                  [Enable undefined behavior sanitizer])],
+[if test "x$enable_ubsan" = "xno" ; then
+  enable_ubsan="0"
+else
+  enable_ubsan="1"
+fi
+],
+[enable_ubsan="0"]
+)
+if test "x$enable_ubsan" = "x1" ; then
+  JE_CFLAGS_ADD([-fsanitize=undefined])
+  JE_CXXFLAGS_ADD([-fsanitize=undefined])
+  JE_LDFLAGS_ADD([-fsanitize=undefined])
+fi
+
 dnl ============================================================================
 dnl Enable background threads if possible.
 
@@ -2869,7 +2929,8 @@ AC_MSG_RESULT([CXX                : ${CXX}])
 AC_MSG_RESULT([CONFIGURE_CXXFLAGS : ${CONFIGURE_CXXFLAGS}])
 AC_MSG_RESULT([SPECIFIED_CXXFLAGS : ${SPECIFIED_CXXFLAGS}])
 AC_MSG_RESULT([EXTRA_CXXFLAGS     : ${EXTRA_CXXFLAGS}])
-AC_MSG_RESULT([LDFLAGS            : ${LDFLAGS}])
+AC_MSG_RESULT([CONFIGURE_LDFLAGS  : ${CONFIGURE_LDFLAGS}])
+AC_MSG_RESULT([SPECIFIED_LDFLAGS  : ${SPECIFIED_LDFLAGS}])
 AC_MSG_RESULT([EXTRA_LDFLAGS      : ${EXTRA_LDFLAGS}])
 AC_MSG_RESULT([DSO_LDFLAGS        : ${DSO_LDFLAGS}])
 AC_MSG_RESULT([LIBS               : ${LIBS}])
@@ -2916,4 +2977,6 @@ AC_MSG_RESULT([cache-oblivious    : ${enable_cache_oblivious}])
 AC_MSG_RESULT([pageid             : ${enable_pageid}])
 AC_MSG_RESULT([cxx                : ${enable_cxx}])
 AC_MSG_RESULT([dss                : ${enable_dss}])
+AC_MSG_RESULT([tsan               : ${enable_tsan}])
+AC_MSG_RESULT([ubsan              : ${enable_ubsan}])
 AC_MSG_RESULT([===============================================================================])

From c17bf8b368dd400614a42942c2c31a50bce5c680 Mon Sep 17 00:00:00 2001
From: roblabla <unfiltered@roblab.la>
Date: Tue, 30 Jul 2024 14:56:42 +0200
Subject: [PATCH 257/395] Disable config from file or envvar with build flag

This adds a new autoconf flag, --disable-user-config, which disables
reading the configuration from /etc/malloc.conf or the MALLOC_CONF
environment variable. This can be useful when integrating jemalloc in a
binary that internally handles all aspects of the configuration and
shouldn't be impacted by ambient change in the environment.
---
 configure.ac                        | 18 +++++++++++++
 include/jemalloc/jemalloc_defs.h.in |  6 +++++
 src/jemalloc.c                      | 40 ++++++++++++++++++-----------
 3 files changed, 49 insertions(+), 15 deletions(-)

diff --git a/configure.ac b/configure.ac
index 7f59b3f1..b01ff56b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1401,6 +1401,23 @@ if test "x$enable_stats" = "x1" ; then
 fi
 AC_SUBST([enable_stats])
 
+dnl Disable reading configuration from file and environment variable
+AC_ARG_ENABLE([user_config],
+  [AS_HELP_STRING([--disable-user-config],
+  [Do not read malloc config from /etc/malloc.conf or MALLOC_CONF])],
+[if test "x$enable_user_config" = "xno" ; then
+  enable_user_config="0"
+else
+  enable_user_config="1"
+fi
+],
+[enable_user_config="1"]
+)
+if test "x$enable_user_config" = "x1" ; then
+  AC_DEFINE([JEMALLOC_CONFIG_ENV], [ ], [ ])
+  AC_DEFINE([JEMALLOC_CONFIG_FILE], [ ], [ ])
+fi
+
 dnl Do not enable smallocx by default.
 AC_ARG_ENABLE([experimental_smallocx],
   [AS_HELP_STRING([--enable-experimental-smallocx], [Enable experimental smallocx API])],
@@ -2962,6 +2979,7 @@ AC_MSG_RESULT([static libs        : ${enable_static}])
 AC_MSG_RESULT([autogen            : ${enable_autogen}])
 AC_MSG_RESULT([debug              : ${enable_debug}])
 AC_MSG_RESULT([stats              : ${enable_stats}])
+AC_MSG_RESULT([user_config        : ${enable_user_config}])
 AC_MSG_RESULT([experimental_smallocx : ${enable_experimental_smallocx}])
 AC_MSG_RESULT([prof               : ${enable_prof}])
 AC_MSG_RESULT([prof-libunwind     : ${enable_prof_libunwind}])
diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in
index ef04e756..96c75011 100644
--- a/include/jemalloc/jemalloc_defs.h.in
+++ b/include/jemalloc/jemalloc_defs.h.in
@@ -46,6 +46,12 @@
  */
 #undef JEMALLOC_USE_CXX_THROW
 
+/*
+ * If undefined, disables reading configuration from environment variable or file
+ */
+#undef JEMALLOC_CONFIG_ENV
+#undef JEMALLOC_CONFIG_FILE
+
 #ifdef _MSC_VER
 #  ifdef _WIN64
 #    define LG_SIZEOF_PTR_WIN 3
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 6d2f6494..8ae72efb 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -985,44 +985,53 @@ obtain_malloc_conf(unsigned which_source, char readlink_buf[PATH_MAX + 1]) {
 		}
 		break;
 	case 2: {
+#ifndef JEMALLOC_CONFIG_FILE
+		ret = NULL;
+		break;
+#else
 		ssize_t linklen = 0;
-#ifndef _WIN32
+#  ifndef _WIN32
 		int saved_errno = errno;
 		const char *linkname =
-#  ifdef JEMALLOC_PREFIX
+#    ifdef JEMALLOC_PREFIX
 		    "/etc/"JEMALLOC_PREFIX"malloc.conf"
-#  else
+#    else
 		    "/etc/malloc.conf"
-#  endif
+#    endif
 		    ;
 
 		/*
 		 * Try to use the contents of the "/etc/malloc.conf" symbolic
 		 * link's name.
 		 */
-#ifndef JEMALLOC_READLINKAT
+#    ifndef JEMALLOC_READLINKAT
 		linklen = readlink(linkname, readlink_buf, PATH_MAX);
-#else
+#    else
 		linklen = readlinkat(AT_FDCWD, linkname, readlink_buf, PATH_MAX);
-#endif
+#    endif
 		if (linklen == -1) {
 			/* No configuration specified. */
 			linklen = 0;
 			/* Restore errno. */
 			set_errno(saved_errno);
 		}
-#endif
+#  endif
 		readlink_buf[linklen] = '\0';
 		ret = readlink_buf;
 		break;
-	} case 3: {
-		const char *envname =
-#ifdef JEMALLOC_PREFIX
-		    JEMALLOC_CPREFIX"MALLOC_CONF"
-#else
-		    "MALLOC_CONF"
 #endif
-		    ;
+	} case 3: {
+#ifndef JEMALLOC_CONFIG_ENV
+		ret = NULL;
+		break;
+#else
+		const char *envname =
+#  ifdef JEMALLOC_PREFIX
+			JEMALLOC_CPREFIX"MALLOC_CONF"
+#  else
+			"MALLOC_CONF"
+#  endif
+			;
 
 		if ((ret = jemalloc_getenv(envname)) != NULL) {
 			opt_malloc_conf_env_var = ret;
@@ -1031,6 +1040,7 @@ obtain_malloc_conf(unsigned which_source, char readlink_buf[PATH_MAX + 1]) {
 			ret = NULL;
 		}
 		break;
+#endif
 	} case 4: {
 		ret = je_malloc_conf_2_conf_harder;
 		break;

From 421b17a622a5037b82aa658dc0cc8264ddd6e711 Mon Sep 17 00:00:00 2001
From: Dmitry Ilvokhin <d@ilvokhin.com>
Date: Wed, 5 Feb 2025 11:00:15 -0800
Subject: [PATCH 258/395] Remove age_counter from hpa_central

Before this commit we had two age counters: one global in HPA central
and one local in each HPA shard. We used HPA shard counter, when we are
reused empty pageslab and HPA central counter anywhere else. They
suppose to be comparable, because we use them for allocation placement
decisions, but in reality they are not, there is no ordering guarantees
between them.

At the moment, there is no way for pageslab to migrate between HPA
shards, so we don't actually need HPA central age counter.
---
 include/jemalloc/internal/hpa.h |  2 --
 src/hpa.c                       | 10 +++++-----
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index 1f90a15f..d788d051 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -31,8 +31,6 @@ struct hpa_central_s {
 	size_t eden_len;
 	/* Source for metadata. */
 	base_t *base;
-	/* Number of grow operations done on this hpa_central_t. */
-	uint64_t age_counter;
 
 	/* The HPA hooks. */
 	hpa_hooks_t hooks;
diff --git a/src/hpa.c b/src/hpa.c
index cb3f978c..932cf201 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -82,7 +82,6 @@ hpa_central_init(hpa_central_t *central, base_t *base, const hpa_hooks_t *hooks)
 	central->base = base;
 	central->eden = NULL;
 	central->eden_len = 0;
-	central->age_counter = 0;
 	central->hooks = *hooks;
 	return false;
 }
@@ -95,7 +94,7 @@ hpa_alloc_ps(tsdn_t *tsdn, hpa_central_t *central) {
 
 static hpdata_t *
 hpa_central_extract(tsdn_t *tsdn, hpa_central_t *central, size_t size,
-    bool *oom) {
+    uint64_t age, bool *oom) {
 	/* Don't yet support big allocations; these should get filtered out. */
 	assert(size <= HUGEPAGE);
 	/*
@@ -118,7 +117,7 @@ hpa_central_extract(tsdn_t *tsdn, hpa_central_t *central, size_t size,
 			malloc_mutex_unlock(tsdn, &central->grow_mtx);
 			return NULL;
 		}
-		hpdata_init(ps, central->eden, central->age_counter++);
+		hpdata_init(ps, central->eden, age);
 		central->eden = NULL;
 		central->eden_len = 0;
 		malloc_mutex_unlock(tsdn, &central->grow_mtx);
@@ -168,7 +167,7 @@ hpa_central_extract(tsdn_t *tsdn, hpa_central_t *central, size_t size,
 	assert(central->eden_len % HUGEPAGE == 0);
 	assert(HUGEPAGE_ADDR2BASE(central->eden) == central->eden);
 
-	hpdata_init(ps, central->eden, central->age_counter++);
+	hpdata_init(ps, central->eden, age);
 
 	char *eden_char = (char *)central->eden;
 	eden_char += HUGEPAGE;
@@ -738,7 +737,8 @@ hpa_alloc_batch_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 	 * deallocations (and allocations of smaller sizes) may still succeed
 	 * while we're doing this potentially expensive system call.
 	 */
-	hpdata_t *ps = hpa_central_extract(tsdn, shard->central, size, &oom);
+	hpdata_t *ps = hpa_central_extract(tsdn, shard->central, size,
+	    shard->age_counter++, &oom);
 	if (ps == NULL) {
 		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 		return nsuccess;

From 499f3068593ec61dae961e2c8ea3e0cf1482d616 Mon Sep 17 00:00:00 2001
From: Dmitry Ilvokhin <d@ilvokhin.com>
Date: Wed, 5 Feb 2025 04:32:31 -0800
Subject: [PATCH 259/395] Fix arena 0 `deferral_allowed` flag init

Arena 0 have a dedicated initialization path, which differs from
initialization path of other arenas. The main difference for the purpose
of this change is that we initialize arena 0 before we initialize
background threads. HPA shard options have `deferral_allowed` flag which
should be equal to `background_thread_enabled()` return value, but it
wasn't the case before this change, because for arena 0
`background_thread_enabled()` was initialized correctly after arena 0
initialization phase already ended.

Below is initialization sequence for arena 0 after this commit to
illustrate everything still should be initialized correctly.

* `hpa_central_init` initializes HPA Central, before we initialize every
  HPA shard (including arena's 0).
* `background_thread_boot1` initializes `background_thread_enabled()`
  return value.
* `pa_shard_enable_hpa` initializes arena 0 HPA shard.

```
                       malloc_init_hard -------------
                      /           /                  \
                     /           /                    \
                    /           /                      \
malloc_init_hard_a0_locked  background_thread_boot1  pa_shard_enable_hpa
        /                     /                          \
       /                     /                            \
      /                     /                              \
arena_boot       background_thread_enabled_seta         hpa_shard_init
     |
     |
pa_central_init
     |
     |
hpa_central_init
```
---
 src/arena.c                       |  4 ++--
 src/jemalloc.c                    | 21 ++++++++++++++-------
 test/unit/hpa_background_thread.c | 31 +++++++++++++++++++++++++++++++
 3 files changed, 47 insertions(+), 9 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 884d1bf9..ab6006d7 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1789,8 +1789,8 @@ arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) {
 	 * We turn on the HPA if set to.  There are two exceptions:
 	 * - Custom extent hooks (we should only return memory allocated from
 	 *   them in that case).
-	 * - Arena 0 initialization.  In this case, we're mid-bootstrapping, and
-	 *   so arena_hpa_global is not yet initialized.
+	 * - Arena 0 initialization.  In this case, we're mid-bootstrapping,
+	 *   and so background_thread_enabled is not yet initialized.
 	 */
 	if (opt_hpa && ehooks_are_default(base_ehooks_get(base)) && ind != 0) {
 		hpa_shard_opts_t hpa_shard_opts = opt_hpa_opts;
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 8ae72efb..55e85710 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1964,13 +1964,6 @@ malloc_init_hard_a0_locked(void) {
 		} else {
 			opt_hpa = false;
 		}
-	} else if (opt_hpa) {
-		hpa_shard_opts_t hpa_shard_opts = opt_hpa_opts;
-		hpa_shard_opts.deferral_allowed = background_thread_enabled();
-		if (pa_shard_enable_hpa(TSDN_NULL, &a0->pa_shard,
-		    &hpa_shard_opts, &opt_hpa_sec_opts)) {
-			return true;
-		}
 	}
 
 	malloc_init_state = malloc_init_a0_initialized;
@@ -2225,6 +2218,20 @@ malloc_init_hard(void) {
 	    || background_thread_boot1(tsd_tsdn(tsd), b0get())) {
 		UNLOCK_RETURN(tsd_tsdn(tsd), true, true)
 	}
+	if (opt_hpa) {
+		/*
+		 * We didn't initialize arena 0 hpa_shard in arena_new, because
+		 * background_thread_enabled wasn't initialized yet, but we
+		 * need it to set correct value for deferral_allowed.
+		 */
+		arena_t *a0 = arena_get(tsd_tsdn(tsd), 0, false);
+		hpa_shard_opts_t hpa_shard_opts = opt_hpa_opts;
+		hpa_shard_opts.deferral_allowed = background_thread_enabled();
+		if (pa_shard_enable_hpa(tsd_tsdn(tsd), &a0->pa_shard,
+		    &hpa_shard_opts, &opt_hpa_sec_opts)) {
+			UNLOCK_RETURN(tsd_tsdn(tsd), true, true)
+		}
+	}
 	if (config_prof && prof_boot2(tsd, b0get())) {
 		UNLOCK_RETURN(tsd_tsdn(tsd), true, true)
 	}
diff --git a/test/unit/hpa_background_thread.c b/test/unit/hpa_background_thread.c
index e4abb63b..93f046b5 100644
--- a/test/unit/hpa_background_thread.c
+++ b/test/unit/hpa_background_thread.c
@@ -1,6 +1,31 @@
 #include "test/jemalloc_test.h"
 #include "test/sleep.h"
 
+TEST_BEGIN(test_hpa_background_thread_a0_initialized) {
+	/*
+	 * Arena 0 has dedicated initialization path.  We'd like to make sure
+	 * deferral_allowed value initialized correctly from the start of the
+	 * application.
+	 */
+	test_skip_if(!config_stats);
+	test_skip_if(!hpa_supported());
+	test_skip_if(!have_background_thread);
+	test_skip_if(san_guard_enabled());
+
+	bool enabled = false;
+	size_t sz = sizeof(enabled);
+	int err = mallctl("background_thread", (void *)&enabled, &sz, NULL, 0);
+	expect_d_eq(err, 0, "Unexpected mallctl() failure");
+	expect_true(enabled, "Background thread should be enabled");
+
+	arena_t *a0 = arena_get(TSDN_NULL, 0, false);
+	expect_ptr_ne(a0, NULL, "");
+	bool deferral_allowed = a0->pa_shard.hpa_shard.opts.deferral_allowed;
+	expect_true(deferral_allowed,
+	    "Should have deferral_allowed option enabled for arena #0");
+}
+TEST_END
+
 static void
 sleep_for_background_thread_interval(void) {
 	/*
@@ -207,6 +232,12 @@ main(void) {
 		opt_background_thread = true;
 	}
 	return test_no_reentrancy(
+	    /*
+	     * Unfortunately, order of tests is important here.  We need to
+	     * make sure arena #0 initialized correctly, before we start
+	     * turning background thread on and off in other tests.
+	     */
+	    test_hpa_background_thread_a0_initialized,
 	    test_hpa_background_thread_purges,
 	    test_hpa_background_thread_enable_disable);
 }

From f55e0c3f5c7c5ea1ee40c7c0c6dff4f19aab9c32 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 3 Mar 2025 13:52:06 -0800
Subject: [PATCH 260/395] Remove unsupported Cirrus CI config

---
 .cirrus.yml | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/.cirrus.yml b/.cirrus.yml
index 13714014..585aa42f 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -31,9 +31,6 @@ task:
      - name: 15-CURRENT
        freebsd_instance:
          image_family: freebsd-15-0-snap
-     - name: 14-STABLE
-       freebsd_instance:
-         image_family: freebsd-14-0-snap
   install_script:
     - sed -i.bak -e 's,pkg+http://pkg.FreeBSD.org/\${ABI}/quarterly,pkg+http://pkg.FreeBSD.org/\${ABI}/latest,' /etc/pkg/FreeBSD.conf
     - pkg upgrade -y

From ac279d7e717e6b5f836657fbc525d0975f80a7d0 Mon Sep 17 00:00:00 2001
From: Guangli Dai <gdai@meta.com>
Date: Mon, 3 Mar 2025 10:17:03 -0800
Subject: [PATCH 261/395] Fix profiling sample metadata lookup during xallocx

---
 src/jemalloc.c         | 10 +++++++++-
 test/unit/prof_small.c | 29 ++++++++++++++++++++++++++++-
 2 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 55e85710..31d4cb27 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -3708,7 +3708,15 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 		prof_info_get(tsd, ptr, alloc_ctx, &prof_info);
 		prof_alloc_rollback(tsd, tctx);
 	} else {
-		prof_info_get_and_reset_recent(tsd, ptr, alloc_ctx, &prof_info);
+		/*
+		 * Need to retrieve the new alloc_ctx since the modification
+		 * to edata has already been done.
+		 */
+		emap_alloc_ctx_t new_alloc_ctx;
+		emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr,
+		    &new_alloc_ctx);
+		prof_info_get_and_reset_recent(tsd, ptr, &new_alloc_ctx,
+		    &prof_info);
 		assert(usize <= usize_max);
 		sample_event = te_prof_sample_event_lookahead(tsd, usize);
 		prof_realloc(tsd, ptr, size, usize, tctx, prof_active, ptr,
diff --git a/test/unit/prof_small.c b/test/unit/prof_small.c
index 4a982b70..e3462c1f 100644
--- a/test/unit/prof_small.c
+++ b/test/unit/prof_small.c
@@ -31,13 +31,39 @@ TEST_BEGIN(test_profile_small_allocations) {
 }
 TEST_END
 
+TEST_BEGIN(test_profile_small_allocations_sdallocx) {
+	test_skip_if(!config_prof);
+
+	for (szind_t index = 0; index < SC_NBINS; index++) {
+		size_t size = sz_index2size(index);
+		void *ptr = malloc(size);
+		assert_small_allocation_sampled(ptr, size);
+		/*
+		 * While free calls into ifree, sdallocx calls into isfree,
+		 * This test covers the isfree path to make sure promoted small
+		 * allocs are handled properly.
+		 */
+		sdallocx(ptr, size, 0);
+	}
+}
+TEST_END
+
 TEST_BEGIN(test_profile_small_reallocations_growing) {
 	test_skip_if(!config_prof);
 
 	void *ptr = NULL;
-	for (szind_t index = 0; index < SC_NBINS; index++) {
+	for (szind_t index = 0; index <= SC_NBINS; index++) {
 		size_t size = sz_index2size(index);
 		ptr = realloc(ptr, size);
+		/*
+		 * When index reaches SC_NBINS, it is no longer a small alloc,
+		 * we still want to test the realloc from a small alloc to a
+		 * large one, but we should not assert_small_allocation_sampled
+		 * on it.
+		 */
+		if (index == SC_NBINS) {
+			break;
+		}
 		assert_small_allocation_sampled(ptr, size);
 	}
 }
@@ -72,6 +98,7 @@ TEST_END
 int
 main(void) {
 	return test(test_profile_small_allocations,
+	    test_profile_small_allocations_sdallocx,
 	    test_profile_small_reallocations_growing,
 	    test_profile_small_reallocations_shrinking,
 	    test_profile_small_reallocations_same_size_class);

From c067a55c790bebd69fd6d87935f8c353524ef814 Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@meta.com>
Date: Tue, 26 Mar 2024 14:35:29 -0700
Subject: [PATCH 262/395] Introducing a new usize calculation policy

Converting size to usize is what jemalloc has been done by ceiling
size to the closest size class. However, this causes lots of memory
wastes with HPA enabled.  This commit changes how usize is calculated so
that the gap between two contiguous usize is no larger than a page.
Specifically, this commit includes the following changes:

1. Adding a build-time config option (--enable-limit-usize-gap) and a
runtime one (limit_usize_gap) to guard the changes.
When build-time
config is enabled, some minor CPU overhead is expected because usize
will be stored and accessed apart from index.  When runtime option is
also enabled (it can only be enabled with the build-time config
enabled). a new usize calculation approach wil be employed.  This new
calculation will ceil size to the closest multiple of PAGE for all sizes
larger than USIZE_GROW_SLOW_THRESHOLD instead of using the size classes.
Note when the build-time config is enabled, the runtime option is
default on.

2. Prepare tcache for size to grow by PAGE over GROUP*PAGE.
To prepare for the upcoming changes where size class grows by PAGE when
larger than NGROUP * PAGE, disable the tcache when it is larger than 2 *
NGROUP * PAGE. The threshold for tcache is set higher to prevent perf
regression as much as possible while usizes between NGROUP * PAGE and 2 *
NGROUP * PAGE happen to grow by PAGE.

3. Prepare pac and hpa psset for size to grow by PAGE over GROUP*PAGE
For PAC, to avoid having too many bins, arena bins still have the same
layout.  This means some extra search is needed for a page-level request that
is not aligned with the orginal size class: it should also search the heap
before the current index since the previous heap might also be able to
have some allocations satisfying it.  The same changes apply to HPA's
psset.
This search relies on the enumeration of the heap because not all allocs in
the previous heap are guaranteed to satisfy the request.  To balance the
memory and CPU overhead, we currently enumerate at most a fixed number
of nodes before concluding none can satisfy the request during an
enumeration.

4. Add bytes counter to arena large stats.
To prepare for the upcoming usize changes, stats collected by
multiplying alive allocations and the bin size is no longer accurate.
Thus, add separate counters to record the bytes malloced and dalloced.

5. Change structs use when freeing to avoid using index2size for large sizes.
  - Change the definition of emap_alloc_ctx_t
  - Change the read of both from edata_t.
  - Change the assignment and usage of emap_alloc_ctx_t.
  - Change other callsites of index2size.
Note for the changes in the data structure, i.e., emap_alloc_ctx_t,
will be used when the build-time config (--enable-limit-usize-gap) is
enabled but they will store the same value as index2size(szind) if the
runtime option (opt_limit_usize_gap) is not enabled.

6. Adapt hpa to the usize changes.
Change the settings in sec to limit is usage for sizes larger than
USIZE_GROW_SLOW_THRESHOLD and modify corresponding tests.

7. Modify usize calculation and corresponding tests.
Change the sz_s2u_compute. Note sz_index2size is not always safe now
while sz_size2index still works as expected.
---
 configure.ac                                  |  19 +++
 include/jemalloc/internal/arena_inlines_b.h   |  52 +++++---
 include/jemalloc/internal/arena_stats.h       |  10 +-
 include/jemalloc/internal/edata.h             |  61 ++++++++-
 include/jemalloc/internal/emap.h              |  70 +++++++++-
 include/jemalloc/internal/hpdata.h            |   8 +-
 .../internal/jemalloc_internal_defs.h.in      |   6 +
 .../internal/jemalloc_internal_externs.h      |   1 +
 .../internal/jemalloc_internal_inlines_c.h    |   5 +-
 .../jemalloc/internal/jemalloc_preamble.h.in  |   8 ++
 include/jemalloc/internal/ph.h                | 120 +++++++++++++++++-
 include/jemalloc/internal/sc.h                |  18 +++
 include/jemalloc/internal/sz.h                |  47 ++++++-
 include/jemalloc/internal/tcache_types.h      |   6 +-
 src/arena.c                                   |  28 +++-
 src/ctl.c                                     |   8 +-
 src/eset.c                                    | 119 ++++++++++++++++-
 src/hpa.c                                     |   2 +-
 src/jemalloc.c                                |  58 +++++++--
 src/prof_data.c                               |   8 +-
 src/psset.c                                   |  34 ++++-
 src/sec.c                                     |   7 +
 src/tcache.c                                  |   3 +-
 test/integration/rallocx.c                    |   2 +-
 test/test.sh.in                               |   1 +
 test/unit/arena_reset.c                       |   3 +-
 test/unit/hpa.c                               |   2 +-
 test/unit/mallctl.c                           |   1 +
 test/unit/ph.c                                |  19 ++-
 test/unit/sec.c                               |   3 +-
 test/unit/size_classes.c                      |  38 +++++-
 test/unit/size_classes.sh                     |   5 +
 test/unit/stats.c                             |  15 ++-
 33 files changed, 713 insertions(+), 74 deletions(-)
 create mode 100644 test/unit/size_classes.sh

diff --git a/configure.ac b/configure.ac
index b01ff56b..a55a5a08 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2732,6 +2732,24 @@ if test "x${have_pthread}" = "x1" -a "x${je_cv_os_unfair_lock}" != "xyes" -a \
   AC_DEFINE([JEMALLOC_BACKGROUND_THREAD], [ ], [ ])
 fi
 
+dnl ============================================================================
+dnl Limit the gap between two contiguous usizes to be at most PAGE.
+AC_ARG_ENABLE([limit_usize_gap],
+  [AS_HELP_STRING([--enable-limit-usize-gap],
+                  [Limit the gap between two contiguous usizes])],
+[if test "x$limit_usize_gap" = "xno" ; then
+  limit_usize_gap="0"
+else
+  limit_usize_gap="1"
+fi
+],
+[limit_usize_gap="0"]
+)
+if test "x$limit_usize_gap" = "x1" ; then
+  AC_DEFINE([LIMIT_USIZE_GAP], [ ])
+fi
+AC_SUBST([limit_usize_gap])
+
 dnl ============================================================================
 dnl Check for glibc malloc hooks
 
@@ -2997,4 +3015,5 @@ AC_MSG_RESULT([cxx                : ${enable_cxx}])
 AC_MSG_RESULT([dss                : ${enable_dss}])
 AC_MSG_RESULT([tsan               : ${enable_tsan}])
 AC_MSG_RESULT([ubsan              : ${enable_ubsan}])
+AC_MSG_RESULT([limit-usize-gap    : ${limit_usize_gap}])
 AC_MSG_RESULT([===============================================================================])
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index ea246cc5..108493f2 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -51,7 +51,7 @@ arena_choose_maybe_huge(tsd_t *tsd, arena_t *arena, size_t size) {
 }
 
 JEMALLOC_ALWAYS_INLINE bool
-large_dalloc_safety_checks(edata_t *edata, const void *ptr, szind_t szind) {
+large_dalloc_safety_checks(edata_t *edata, const void *ptr, size_t input_size) {
 	if (!config_opt_safety_checks) {
 		return false;
 	}
@@ -68,7 +68,6 @@ large_dalloc_safety_checks(edata_t *edata, const void *ptr, szind_t szind) {
 		    "possibly caused by double free bugs.", ptr);
 		return true;
 	}
-	size_t input_size = sz_index2size(szind);
 	if (unlikely(input_size != edata_usize_get(edata))) {
 		safety_check_fail_sized_dealloc(/* current_dealloc */ true, ptr,
 		    /* true_size */ edata_usize_get(edata), input_size);
@@ -101,9 +100,10 @@ arena_prof_info_get(tsd_t *tsd, const void *ptr, emap_alloc_ctx_t *alloc_ctx,
 	if (unlikely(!is_slab)) {
 		/* edata must have been initialized at this point. */
 		assert(edata != NULL);
+		size_t usize = (alloc_ctx == NULL)? edata_usize_get(edata):
+		    emap_alloc_ctx_usize_get(alloc_ctx);
 		if (reset_recent &&
-		    large_dalloc_safety_checks(edata, ptr,
-		    edata_szind_get(edata))) {
+		    large_dalloc_safety_checks(edata, ptr, usize)) {
 			prof_info->alloc_tctx = PROF_TCTX_SENTINEL;
 			return;
 		}
@@ -225,7 +225,7 @@ arena_salloc(tsdn_t *tsdn, const void *ptr) {
 	emap_alloc_ctx_lookup(tsdn, &arena_emap_global, ptr, &alloc_ctx);
 	assert(alloc_ctx.szind != SC_NSIZES);
 
-	return sz_index2size(alloc_ctx.szind);
+	return emap_alloc_ctx_usize_get(&alloc_ctx);
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
@@ -256,17 +256,24 @@ arena_vsalloc(tsdn_t *tsdn, const void *ptr) {
 
 	assert(full_alloc_ctx.szind != SC_NSIZES);
 
-	return sz_index2size(full_alloc_ctx.szind);
+	return edata_usize_get(full_alloc_ctx.edata);
 }
 
 static inline void
-arena_dalloc_large_no_tcache(tsdn_t *tsdn, void *ptr, szind_t szind) {
+arena_dalloc_large_no_tcache(tsdn_t *tsdn, void *ptr, szind_t szind,
+    size_t usize) {
+	/*
+	 * szind is still needed in this function mainly becuase
+	 * szind < SC_NBINS determines not only if this is a small alloc,
+	 * but also if szind is valid (an inactive extent would have
+	 * szind == SC_NSIZES).
+	 */
 	if (config_prof && unlikely(szind < SC_NBINS)) {
 		arena_dalloc_promoted(tsdn, ptr, NULL, true);
 	} else {
 		edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global,
 		    ptr);
-		if (large_dalloc_safety_checks(edata, ptr, szind)) {
+		if (large_dalloc_safety_checks(edata, ptr, usize)) {
 			/* See the comment in isfree. */
 			return;
 		}
@@ -287,19 +294,22 @@ arena_dalloc_no_tcache(tsdn_t *tsdn, void *ptr) {
 		assert(alloc_ctx.szind == edata_szind_get(edata));
 		assert(alloc_ctx.szind < SC_NSIZES);
 		assert(alloc_ctx.slab == edata_slab_get(edata));
+		assert(emap_alloc_ctx_usize_get(&alloc_ctx) ==
+		    edata_usize_get(edata));
 	}
 
 	if (likely(alloc_ctx.slab)) {
 		/* Small allocation. */
 		arena_dalloc_small(tsdn, ptr);
 	} else {
-		arena_dalloc_large_no_tcache(tsdn, ptr, alloc_ctx.szind);
+		arena_dalloc_large_no_tcache(tsdn, ptr, alloc_ctx.szind,
+		    emap_alloc_ctx_usize_get(&alloc_ctx));
 	}
 }
 
 JEMALLOC_ALWAYS_INLINE void
 arena_dalloc_large(tsdn_t *tsdn, void *ptr, tcache_t *tcache, szind_t szind,
-    bool slow_path) {
+    size_t usize, bool slow_path) {
 	assert (!tsdn_null(tsdn) && tcache != NULL);
 	bool is_sample_promoted = config_prof && szind < SC_NBINS;
 	if (unlikely(is_sample_promoted)) {
@@ -313,7 +323,7 @@ arena_dalloc_large(tsdn_t *tsdn, void *ptr, tcache_t *tcache, szind_t szind,
 		} else {
 			edata_t *edata = emap_edata_lookup(tsdn,
 			    &arena_emap_global, ptr);
-			if (large_dalloc_safety_checks(edata, ptr, szind)) {
+			if (large_dalloc_safety_checks(edata, ptr, usize)) {
 				/* See the comment in isfree. */
 				return;
 			}
@@ -396,6 +406,8 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 		assert(alloc_ctx.szind == edata_szind_get(edata));
 		assert(alloc_ctx.szind < SC_NSIZES);
 		assert(alloc_ctx.slab == edata_slab_get(edata));
+		assert(emap_alloc_ctx_usize_get(&alloc_ctx) ==
+		    edata_usize_get(edata));
 	}
 
 	if (likely(alloc_ctx.slab)) {
@@ -407,7 +419,7 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 		    alloc_ctx.szind, slow_path);
 	} else {
 		arena_dalloc_large(tsdn, ptr, tcache, alloc_ctx.szind,
-		    slow_path);
+		    emap_alloc_ctx_usize_get(&alloc_ctx), slow_path);
 	}
 }
 
@@ -422,8 +434,9 @@ arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) {
 		 * There is no risk of being confused by a promoted sampled
 		 * object, so base szind and slab on the given size.
 		 */
-		alloc_ctx.szind = sz_size2index(size);
-		alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS);
+		szind_t szind = sz_size2index(size);
+		emap_alloc_ctx_init(&alloc_ctx, szind, (szind < SC_NBINS),
+		    size);
 	}
 
 	if ((config_prof && opt_prof) || config_debug) {
@@ -446,7 +459,8 @@ arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) {
 		/* Small allocation. */
 		arena_dalloc_small(tsdn, ptr);
 	} else {
-		arena_dalloc_large_no_tcache(tsdn, ptr, alloc_ctx.szind);
+		arena_dalloc_large_no_tcache(tsdn, ptr, alloc_ctx.szind,
+		    emap_alloc_ctx_usize_get(&alloc_ctx));
 	}
 }
 
@@ -469,6 +483,7 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 			emap_alloc_ctx_lookup(tsdn, &arena_emap_global, ptr,
 			    &alloc_ctx);
 			assert(alloc_ctx.szind == sz_size2index(size));
+			assert(emap_alloc_ctx_usize_get(&alloc_ctx) == size);
 		} else {
 			alloc_ctx = *caller_alloc_ctx;
 		}
@@ -486,6 +501,11 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 		    ptr);
 		assert(alloc_ctx.szind == edata_szind_get(edata));
 		assert(alloc_ctx.slab == edata_slab_get(edata));
+		emap_alloc_ctx_init(&alloc_ctx, alloc_ctx.szind, alloc_ctx.slab,
+		    sz_s2u(size));
+		assert(!config_limit_usize_gap ||
+		    emap_alloc_ctx_usize_get(&alloc_ctx) ==
+		    edata_usize_get(edata));
 	}
 
 	if (likely(alloc_ctx.slab)) {
@@ -497,7 +517,7 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 		    alloc_ctx.szind, slow_path);
 	} else {
 		arena_dalloc_large(tsdn, ptr, tcache, alloc_ctx.szind,
-		    slow_path);
+		    sz_s2u(size), slow_path);
 	}
 }
 
diff --git a/include/jemalloc/internal/arena_stats.h b/include/jemalloc/internal/arena_stats.h
index 3d512630..7f075114 100644
--- a/include/jemalloc/internal/arena_stats.h
+++ b/include/jemalloc/internal/arena_stats.h
@@ -14,12 +14,18 @@ JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
 typedef struct arena_stats_large_s arena_stats_large_t;
 struct arena_stats_large_s {
 	/*
-	 * Total number of allocation/deallocation requests served directly by
-	 * the arena.
+	 * Total number of large allocation/deallocation requests served directly
+	 * by the arena.
 	 */
 	locked_u64_t	nmalloc;
 	locked_u64_t	ndalloc;
 
+	/*
+	 * Total large active bytes (allocated - deallocated) served directly
+	 * by the arena.
+	 */
+	locked_u64_t	active_bytes;
+
 	/*
 	 * Number of allocation requests that correspond to this size class.
 	 * This includes requests served by tcache, though tcache only
diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index 2381ccbc..b087ea31 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -21,6 +21,14 @@
  */
 #define EDATA_ALIGNMENT 128
 
+/*
+ * Defines how many nodes visited when enumerating the heap to search for
+ * qualifed extents.  More nodes visited may result in better choices at
+ * the cost of longer search time.  This size should not exceed 2^16 - 1
+ * because we use uint16_t for accessing the queue needed for enumeration.
+ */
+#define ESET_ENUMERATE_MAX_NUM 32
+
 enum extent_state_e {
 	extent_state_active   = 0,
 	extent_state_dirty    = 1,
@@ -89,8 +97,8 @@ struct edata_cmp_summary_s {
 
 /* Extent (span of pages).  Use accessor functions for e_* fields. */
 typedef struct edata_s edata_t;
-ph_structs(edata_avail, edata_t);
-ph_structs(edata_heap, edata_t);
+ph_structs(edata_avail, edata_t, ESET_ENUMERATE_MAX_NUM);
+ph_structs(edata_heap, edata_t, ESET_ENUMERATE_MAX_NUM);
 struct edata_s {
 	/*
 	 * Bitfield containing several fields:
@@ -281,7 +289,54 @@ edata_szind_get(const edata_t *edata) {
 
 static inline size_t
 edata_usize_get(const edata_t *edata) {
-	return sz_index2size(edata_szind_get(edata));
+	assert(edata != NULL);
+	/*
+	 * When sz_limit_usize_gap_enabled() is true, two cases:
+	 * 1. if usize_from_ind is not smaller than SC_LARGE_MINCLASS,
+	 * usize_from_size is accurate;
+	 * 2. otherwise, usize_from_ind is accurate.
+	 *
+	 * When sz_limit_usize_gap_enabled() is not true, the two should be the
+	 * same when usize_from_ind is not smaller than SC_LARGE_MINCLASS.
+	 *
+	 * Note sampled small allocs will be promoted.  Their extent size is
+	 * recorded in edata_size_get(edata), while their szind reflects the
+	 * true usize.  Thus, usize retrieved here is still accurate for
+	 * sampled small allocs.
+	 */
+	szind_t szind = edata_szind_get(edata);
+#ifdef JEMALLOC_JET
+	/*
+	 * Double free is invalid and results in undefined behavior.  However,
+	 * for double free tests to end gracefully, return an invalid usize
+	 * when szind shows the edata is not active, i.e., szind == SC_NSIZES.
+	 */
+	if (unlikely(szind == SC_NSIZES)) {
+		return SC_LARGE_MAXCLASS + 1;
+	}
+#endif
+
+	if (!sz_limit_usize_gap_enabled() || szind < SC_NBINS) {
+		size_t usize_from_ind = sz_index2size(szind);
+		if (!sz_limit_usize_gap_enabled() &&
+		    usize_from_ind >= SC_LARGE_MINCLASS) {
+			size_t size = (edata->e_size_esn & EDATA_SIZE_MASK);
+			assert(size > sz_large_pad);
+			size_t usize_from_size = size - sz_large_pad;
+			assert(usize_from_ind == usize_from_size);
+		}
+		return usize_from_ind;
+	}
+
+	size_t size = (edata->e_size_esn & EDATA_SIZE_MASK);
+	assert(size > sz_large_pad);
+	size_t usize_from_size = size - sz_large_pad;
+	/*
+	 * no matter limit-usize-gap enabled or not, usize retrieved from size
+	 * is not accurate when smaller than SC_LARGE_MINCLASS.
+	 */
+	assert(usize_from_size >= SC_LARGE_MINCLASS);
+	return usize_from_size;
 }
 
 static inline unsigned
diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index 7ac0ae95..5885daa6 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -20,8 +20,9 @@ struct emap_s {
 };
 
 /* Used to pass rtree lookup context down the path. */
-typedef struct emap_alloc_ctx_t emap_alloc_ctx_t;
-struct emap_alloc_ctx_t {
+typedef struct emap_alloc_ctx_s emap_alloc_ctx_t;
+struct emap_alloc_ctx_s {
+	size_t usize;
 	szind_t szind;
 	bool slab;
 };
@@ -230,16 +231,66 @@ emap_edata_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr) {
 	return rtree_read(tsdn, &emap->rtree, rtree_ctx, (uintptr_t)ptr).edata;
 }
 
+JEMALLOC_ALWAYS_INLINE void
+emap_alloc_ctx_init(emap_alloc_ctx_t *alloc_ctx, szind_t szind, bool slab,
+    size_t usize) {
+	alloc_ctx->szind = szind;
+	alloc_ctx->slab = slab;
+	/*
+	 * When config_limit_usize_gap disabled, alloc_ctx->usize
+	 * should not be accessed.
+	 */
+	if (config_limit_usize_gap) {
+		alloc_ctx->usize = usize;
+		assert(sz_limit_usize_gap_enabled() ||
+		    usize == sz_index2size(szind));
+	} else if (config_debug) {
+		alloc_ctx->usize = SC_LARGE_MAXCLASS + 1;
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+emap_alloc_ctx_usize_get(emap_alloc_ctx_t *alloc_ctx) {
+	assert(alloc_ctx->szind < SC_NSIZES);
+	if (!config_limit_usize_gap || alloc_ctx->slab) {
+		assert(!config_limit_usize_gap ||
+		    alloc_ctx->usize == sz_index2size(alloc_ctx->szind));
+		return sz_index2size(alloc_ctx->szind);
+	}
+	assert(sz_limit_usize_gap_enabled() ||
+	    alloc_ctx->usize == sz_index2size(alloc_ctx->szind));
+	assert(alloc_ctx->usize <= SC_LARGE_MAXCLASS);
+	return alloc_ctx->usize;
+}
+
 /* Fills in alloc_ctx with the info in the map. */
 JEMALLOC_ALWAYS_INLINE void
 emap_alloc_ctx_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
     emap_alloc_ctx_t *alloc_ctx) {
 	EMAP_DECLARE_RTREE_CTX;
 
-	rtree_metadata_t metadata = rtree_metadata_read(tsdn, &emap->rtree,
-	    rtree_ctx, (uintptr_t)ptr);
-	alloc_ctx->szind = metadata.szind;
-	alloc_ctx->slab = metadata.slab;
+	if (config_limit_usize_gap) {
+		rtree_contents_t contents = rtree_read(tsdn, &emap->rtree,
+		    rtree_ctx, (uintptr_t)ptr);
+		/*
+		 * If the alloc is invalid, do not calculate usize since edata
+		 * could be corrupted.
+		 */
+		if (contents.metadata.szind == SC_NSIZES ||
+		    contents.edata == NULL) {
+			emap_alloc_ctx_init(alloc_ctx, contents.metadata.szind,
+			    contents.metadata.slab, 0);
+			return;
+		}
+		emap_alloc_ctx_init(alloc_ctx, contents.metadata.szind,
+		    contents.metadata.slab, edata_usize_get(contents.edata));
+	} else {
+		rtree_metadata_t metadata = rtree_metadata_read(tsdn,
+		    &emap->rtree, rtree_ctx, (uintptr_t)ptr);
+		/* alloc_ctx->usize will not be read/write in this case. */
+		emap_alloc_ctx_init(alloc_ctx, metadata.szind, metadata.slab,
+		    SC_LARGE_MAXCLASS + 1);
+	}
 }
 
 /* The pointer must be mapped. */
@@ -293,8 +344,15 @@ emap_alloc_ctx_try_lookup_fast(tsd_t *tsd, emap_t *emap, const void *ptr,
 	if (err) {
 		return true;
 	}
+	/*
+	 * Small allocs using the fastpath can always use index to get the
+	 * usize.  Therefore, do not set alloc_ctx->usize here.
+	 */
 	alloc_ctx->szind = metadata.szind;
 	alloc_ctx->slab = metadata.slab;
+	if (config_debug) {
+		alloc_ctx->usize = SC_LARGE_MAXCLASS + 1;
+	}
 	return false;
 }
 
diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index 7ba92112..a8a845ec 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -20,8 +20,14 @@
  * an observable property of any given region of address space).  It's just
  * hugepage-sized and hugepage-aligned; it's *potentially* huge.
  */
+
+/*
+ * The max enumeration num should not exceed 2^16 - 1, see comments in edata.h
+ * for ESET_ENUMERATE_MAX_NUM for more details.
+ */
+#define PSSET_ENUMERATE_MAX_NUM 32
 typedef struct hpdata_s hpdata_t;
-ph_structs(hpdata_age_heap, hpdata_t);
+ph_structs(hpdata_age_heap, hpdata_t, PSSET_ENUMERATE_MAX_NUM);
 struct hpdata_s {
 	/*
 	 * We likewise follow the edata convention of mangling names and forcing
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 742d599d..e76eaaf4 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -475,6 +475,12 @@
 /* If defined, use __int128 for optimization. */
 #undef JEMALLOC_HAVE_INT128
 
+/*
+ * If defined, the gap between any two contiguous usizes should not exceed
+ * PAGE.
+ */
+#undef LIMIT_USIZE_GAP
+
 #include "jemalloc/internal/jemalloc_internal_overrides.h"
 
 #endif /* JEMALLOC_INTERNAL_DEFS_H_ */
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index 2c6b58f7..8c6df450 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -39,6 +39,7 @@ extern atomic_zu_t zero_realloc_count;
 extern bool opt_cache_oblivious;
 extern unsigned opt_debug_double_free_max_scan;
 extern size_t opt_calloc_madvise_threshold;
+extern bool opt_limit_usize_gap;
 
 extern const char *opt_malloc_conf_symlink;
 extern const char *opt_malloc_conf_env_var;
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index 854aec1e..c7ef9161 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -425,8 +425,9 @@ maybe_check_alloc_ctx(tsd_t *tsd, void *ptr, emap_alloc_ctx_t *alloc_ctx) {
                 if (alloc_ctx->szind != dbg_ctx.szind) {
                         safety_check_fail_sized_dealloc(
                             /* current_dealloc */ true, ptr,
-                            /* true_size */ sz_index2size(dbg_ctx.szind),
-                            /* input_size */ sz_index2size(alloc_ctx->szind));
+                            /* true_size */ emap_alloc_ctx_usize_get(&dbg_ctx),
+                            /* input_size */ emap_alloc_ctx_usize_get(
+                            alloc_ctx));
                         return true;
                 }
                 if (alloc_ctx->slab != dbg_ctx.slab) {
diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in
index a59c3489..ef637a2d 100644
--- a/include/jemalloc/internal/jemalloc_preamble.h.in
+++ b/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -276,4 +276,12 @@ static const bool have_memcntl =
 #endif
     ;
 
+static const bool config_limit_usize_gap =
+#ifdef LIMIT_USIZE_GAP
+    true
+#else
+    false
+#endif
+    ;
+
 #endif /* JEMALLOC_PREAMBLE_H */
diff --git a/include/jemalloc/internal/ph.h b/include/jemalloc/internal/ph.h
index ef9634be..05376004 100644
--- a/include/jemalloc/internal/ph.h
+++ b/include/jemalloc/internal/ph.h
@@ -75,6 +75,16 @@ struct ph_s {
 	size_t auxcount;
 };
 
+typedef struct ph_enumerate_vars_s ph_enumerate_vars_t;
+struct ph_enumerate_vars_s {
+	uint16_t front;
+	uint16_t rear;
+	uint16_t queue_size;
+	uint16_t visited_num;
+	uint16_t max_visit_num;
+	uint16_t max_queue_size;
+};
+
 JEMALLOC_ALWAYS_INLINE phn_link_t *
 phn_link_get(void *phn, size_t offset) {
 	return (phn_link_t *)(((char *)phn) + offset);
@@ -414,14 +424,98 @@ ph_remove(ph_t *ph, void *phn, size_t offset, ph_cmp_t cmp) {
 	}
 }
 
-#define ph_structs(a_prefix, a_type)					\
+JEMALLOC_ALWAYS_INLINE void
+ph_enumerate_vars_init(ph_enumerate_vars_t *vars, uint16_t max_visit_num,
+    uint16_t max_queue_size) {
+	vars->queue_size = 0;
+	vars->visited_num = 0;
+	vars->front = 0;
+	vars->rear = 0;
+	vars->max_visit_num = max_visit_num;
+	vars->max_queue_size = max_queue_size;
+	assert(vars->max_visit_num > 0);
+	/*
+	 * max_queue_size must be able to support max_visit_num, which means
+	 * the queue will not overflow before reaching max_visit_num.
+	 */
+	assert(vars->max_queue_size >= (vars->max_visit_num + 1)/2);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+ph_enumerate_queue_push(void *phn, void **bfs_queue,
+    ph_enumerate_vars_t *vars) {
+	assert(vars->queue_size < vars->max_queue_size);
+	bfs_queue[vars->rear] = phn;
+	vars->rear = (vars->rear + 1) % vars->max_queue_size;
+	(vars->queue_size) ++;
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+ph_enumerate_queue_pop(void **bfs_queue, ph_enumerate_vars_t *vars) {
+	assert(vars->queue_size > 0);
+	assert(vars->queue_size <= vars->max_queue_size);
+	void *ret = bfs_queue[vars->front];
+	vars->front = (vars->front + 1) % vars->max_queue_size;
+	(vars->queue_size) --;
+	return ret;
+}
+
+
+/*
+ * The two functions below offer a solution to enumerate the pairing heap.
+ * Whe enumerating, always call ph_enumerate_prepare first to prepare the queue
+ * needed for BFS.  Next, call ph_enumerate_next to get the next element in
+ * the enumeration.  When enumeration ends, ph_enumerate_next returns NULL and
+ * should not be called again.  Enumeration ends when all elements in the heap
+ * has been enumerated or the number of visited elements exceed
+ * max_visit_num.
+ */
+JEMALLOC_ALWAYS_INLINE void
+ph_enumerate_prepare(ph_t *ph, void **bfs_queue, ph_enumerate_vars_t *vars,
+    uint16_t max_visit_num, uint16_t max_queue_size) {
+	ph_enumerate_vars_init(vars, max_visit_num, max_queue_size);
+	ph_enumerate_queue_push(ph->root, bfs_queue, vars);
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+ph_enumerate_next(ph_t *ph, size_t offset, void **bfs_queue,
+    ph_enumerate_vars_t *vars) {
+	if (vars->queue_size == 0) {
+		return NULL;
+	}
+
+	(vars->visited_num) ++;
+	if (vars->visited_num > vars->max_visit_num) {
+		return NULL;
+	}
+
+	void *ret = ph_enumerate_queue_pop(bfs_queue, vars);
+	assert(ret != NULL);
+	void *left = phn_lchild_get(ret, offset);
+	void *right = phn_next_get(ret, offset);
+	if (left) {
+		ph_enumerate_queue_push(left, bfs_queue, vars);
+	}
+	if (right) {
+		ph_enumerate_queue_push(right, bfs_queue, vars);
+	}
+	return ret;
+}
+
+#define ph_structs(a_prefix, a_type, a_max_queue_size)			\
 typedef struct {							\
 	phn_link_t link;						\
 } a_prefix##_link_t;							\
 									\
 typedef struct {							\
 	ph_t ph;							\
-} a_prefix##_t;
+} a_prefix##_t;								\
+									\
+typedef struct {							\
+	void *bfs_queue[a_max_queue_size];				\
+	ph_enumerate_vars_t vars;					\
+} a_prefix##_enumerate_helper_t;
+
 
 /*
  * The ph_proto() macro generates function prototypes that correspond to the
@@ -436,7 +530,12 @@ a_attr a_type *a_prefix##_any(a_prefix##_t *ph);			\
 a_attr void a_prefix##_insert(a_prefix##_t *ph, a_type *phn);		\
 a_attr a_type *a_prefix##_remove_first(a_prefix##_t *ph);		\
 a_attr void a_prefix##_remove(a_prefix##_t *ph, a_type *phn);		\
-a_attr a_type *a_prefix##_remove_any(a_prefix##_t *ph);
+a_attr a_type *a_prefix##_remove_any(a_prefix##_t *ph);			\
+a_attr void a_prefix##_enumerate_prepare(a_prefix##_t *ph,		\
+    a_prefix##_enumerate_helper_t *helper, uint16_t max_visit_num,	\
+    uint16_t max_queue_size);						\
+a_attr a_type *a_prefix##_enumerate_next(a_prefix##_t *ph,		\
+    a_prefix##_enumerate_helper_t *helper);
 
 /* The ph_gen() macro generates a type-specific pairing heap implementation. */
 #define ph_gen(a_attr, a_prefix, a_type, a_field, a_cmp)		\
@@ -491,6 +590,21 @@ a_prefix##_remove_any(a_prefix##_t *ph) {				\
 		a_prefix##_remove(ph, ret);				\
 	}								\
 	return ret;							\
+}									\
+									\
+a_attr void								\
+a_prefix##_enumerate_prepare(a_prefix##_t *ph,				\
+    a_prefix##_enumerate_helper_t *helper, uint16_t max_visit_num,	\
+    uint16_t max_queue_size) {						\
+	ph_enumerate_prepare(&ph->ph, helper->bfs_queue, &helper->vars,	\
+	    max_visit_num, max_queue_size);				\
+}									\
+									\
+a_attr a_type *								\
+a_prefix##_enumerate_next(a_prefix##_t *ph,				\
+    a_prefix##_enumerate_helper_t *helper) {				\
+	return ph_enumerate_next(&ph->ph, offsetof(a_type, a_field),	\
+	    helper->bfs_queue, &helper->vars);				\
 }
 
 #endif /* JEMALLOC_INTERNAL_PH_H */
diff --git a/include/jemalloc/internal/sc.h b/include/jemalloc/internal/sc.h
index 770835cc..098e47b7 100644
--- a/include/jemalloc/internal/sc.h
+++ b/include/jemalloc/internal/sc.h
@@ -286,6 +286,24 @@
 #  endif
 #endif
 
+/*
+ * When config_limit_usize_gap is enabled, the gaps between two contiguous
+ * size classes should not exceed PAGE.  This means there should be no concept
+ * of size classes for sizes > SC_SMALL_MAXCLASS (or >= SC_LARGE_MINCLASS).
+ * However, between SC_LARGE_MINCLASS (SC_NGROUP * PAGE) and
+ * 2 * SC_NGROUP * PAGE, the size class also happens to be aligned with PAGE.
+ * Since tcache relies on size classes to work and it greatly increases the
+ * perf of allocs & deallocs, we extend the existence of size class to
+ * 2 * SC_NGROUP * PAGE ONLY for the tcache module.  This means for all other
+ * modules, there is no size class for sizes >= SC_LARGE_MINCLASS.  Yet for
+ * tcache, the threshold is moved up to 2 * SC_NGROUP * PAGE, which is
+ * USIZE_GROW_SLOW_THRESHOLD defined below.  With the default SC_NGROUP being
+ * 2, and PAGE being 4KB, the threshold for tcache (USIZE_GROW_SLOW_THRESHOLD)
+ * is 32KB.
+ */
+#define LG_USIZE_GROW_SLOW_THRESHOLD (SC_LG_NGROUP + LG_PAGE + 1)
+#define USIZE_GROW_SLOW_THRESHOLD (1U << LG_USIZE_GROW_SLOW_THRESHOLD)
+
 #define SC_SLAB_MAXREGS (1U << SC_LG_SLAB_MAXREGS)
 
 typedef struct sc_s sc_t;
diff --git a/include/jemalloc/internal/sz.h b/include/jemalloc/internal/sz.h
index a2d2debc..6c0a1f0c 100644
--- a/include/jemalloc/internal/sz.h
+++ b/include/jemalloc/internal/sz.h
@@ -54,6 +54,15 @@ extern size_t sz_large_pad;
 
 extern void sz_boot(const sc_data_t *sc_data, bool cache_oblivious);
 
+JEMALLOC_ALWAYS_INLINE bool
+sz_limit_usize_gap_enabled() {
+#ifdef LIMIT_USIZE_GAP
+	return opt_limit_usize_gap;
+#else
+	return false;
+#endif
+}
+
 JEMALLOC_ALWAYS_INLINE pszind_t
 sz_psz2ind(size_t psz) {
 	assert(psz > 0);
@@ -257,11 +266,34 @@ sz_index2size_lookup(szind_t index) {
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-sz_index2size(szind_t index) {
+sz_index2size_unsafe(szind_t index) {
 	assert(index < SC_NSIZES);
 	return sz_index2size_lookup(index);
 }
 
+JEMALLOC_ALWAYS_INLINE size_t
+sz_index2size(szind_t index) {
+	assert(!sz_limit_usize_gap_enabled() ||
+	    index <= sz_size2index(USIZE_GROW_SLOW_THRESHOLD));
+	size_t size = sz_index2size_unsafe(index);
+	/*
+	 * With limit_usize_gap enabled, the usize above
+	 * SC_LARGE_MINCLASS should grow by PAGE.  However, for sizes
+	 * in [SC_LARGE_MINCLASS, USIZE_GROW_SLOW_THRESHOLD], the
+	 * usize would not change because the size class gap in this
+	 * range is just the same as PAGE.  Although we use
+	 * SC_LARGE_MINCLASS as the threshold in most places, we
+	 * allow tcache and sec to cache up to
+	 * USIZE_GROW_SLOW_THRESHOLD to minimize the side effect of
+	 * not having size classes for larger sizes.  Thus, we assert
+	 * the size is no larger than USIZE_GROW_SLOW_THRESHOLD here
+	 * instead of SC_LARGE_MINCLASS.
+	 */
+	assert(!sz_limit_usize_gap_enabled() ||
+	    size <= USIZE_GROW_SLOW_THRESHOLD);
+	return size;
+}
+
 JEMALLOC_ALWAYS_INLINE void
 sz_size2index_usize_fastpath(size_t size, szind_t *ind, size_t *usize) {
 	if (util_compile_time_const(size)) {
@@ -296,7 +328,7 @@ sz_s2u_compute(size_t size) {
 		    (ZU(1) << lg_ceil));
 	}
 #endif
-	{
+	if (size <= SC_SMALL_MAXCLASS || !sz_limit_usize_gap_enabled()) {
 		size_t x = lg_floor((size<<1)-1);
 		size_t lg_delta = (x < SC_LG_NGROUP + LG_QUANTUM + 1)
 		    ?  LG_QUANTUM : x - SC_LG_NGROUP - 1;
@@ -304,11 +336,22 @@ sz_s2u_compute(size_t size) {
 		size_t delta_mask = delta - 1;
 		size_t usize = (size + delta_mask) & ~delta_mask;
 		return usize;
+	} else {
+		/*
+		 * With sz_limit_usize_gap_enabled() == true, usize of a large
+		 * allocation is calculated by ceiling size to the smallest
+		 * multiple of PAGE to minimize the memory overhead, especially
+		 * when using hugepages.
+		 */
+		size_t usize = PAGE_CEILING(size);
+		assert(usize - size < PAGE);
+		return usize;
 	}
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
 sz_s2u_lookup(size_t size) {
+	assert(!config_limit_usize_gap || size < SC_LARGE_MINCLASS);
 	size_t ret = sz_index2size_lookup(sz_size2index_lookup(size));
 
 	assert(ret == sz_s2u_compute(size));
diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h
index eebad79f..f13ff748 100644
--- a/include/jemalloc/internal/tcache_types.h
+++ b/include/jemalloc/internal/tcache_types.h
@@ -19,7 +19,11 @@ typedef struct tcaches_s tcaches_t;
 /* NOLINTNEXTLINE(performance-no-int-to-ptr) */
 #define TCACHES_ELM_NEED_REINIT ((tcache_t *)(uintptr_t)1)
 
-#define TCACHE_LG_MAXCLASS_LIMIT 23 /* tcache_max = 8M */
+#ifdef LIMIT_USIZE_GAP
+    #define TCACHE_LG_MAXCLASS_LIMIT LG_USIZE_GROW_SLOW_THRESHOLD
+#else
+    #define TCACHE_LG_MAXCLASS_LIMIT 23 /* tcache_max = 8M */
+#endif
 #define TCACHE_MAXCLASS_LIMIT ((size_t)1 << TCACHE_LG_MAXCLASS_LIMIT)
 #define TCACHE_NBINS_MAX (SC_NBINS + SC_NGROUP *			\
     (TCACHE_LG_MAXCLASS_LIMIT - SC_LG_LARGE_MINCLASS) + 1)
diff --git a/src/arena.c b/src/arena.c
index ab6006d7..54ecc403 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -145,8 +145,18 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 		assert(nmalloc - ndalloc <= SIZE_T_MAX);
 		size_t curlextents = (size_t)(nmalloc - ndalloc);
 		lstats[i].curlextents += curlextents;
-		astats->allocated_large +=
-		    curlextents * sz_index2size(SC_NBINS + i);
+
+		if (config_limit_usize_gap) {
+			uint64_t active_bytes = locked_read_u64(tsdn,
+			    LOCKEDINT_MTX(arena->stats.mtx),
+			    &arena->stats.lstats[i].active_bytes);
+			locked_inc_u64_unsynchronized(
+			    &lstats[i].active_bytes, active_bytes);
+			astats->allocated_large += active_bytes;
+		} else {
+			astats->allocated_large +=
+			    curlextents * sz_index2size(SC_NBINS + i);
+		}
 	}
 
 	pa_shard_stats_merge(tsdn, &arena->pa_shard, &astats->pa_shard_stats,
@@ -315,6 +325,11 @@ arena_large_malloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) {
 		LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
 			&arena->stats.lstats[hindex].nmalloc, 1);
+		if (config_limit_usize_gap) {
+			locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
+			    &arena->stats.lstats[hindex].active_bytes,
+			    usize);
+		}
 		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 	}
 }
@@ -338,6 +353,11 @@ arena_large_dalloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) {
 		LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
 			&arena->stats.lstats[hindex].ndalloc, 1);
+		if (config_limit_usize_gap) {
+			locked_dec_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
+			    &arena->stats.lstats[hindex].active_bytes,
+			    usize);
+		}
 		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 	}
 }
@@ -802,7 +822,7 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 		assert(alloc_ctx.szind != SC_NSIZES);
 
 		if (config_stats || (config_prof && opt_prof)) {
-			usize = sz_index2size(alloc_ctx.szind);
+			usize = emap_alloc_ctx_usize_get(&alloc_ctx);
 			assert(usize == isalloc(tsd_tsdn(tsd), ptr));
 		}
 		/* Remove large allocation from prof sample set. */
@@ -1346,7 +1366,7 @@ arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind,
 		assert(sz_can_use_slab(size));
 		return arena_malloc_small(tsdn, arena, ind, zero);
 	} else {
-		return large_malloc(tsdn, arena, sz_index2size(ind), zero);
+		return large_malloc(tsdn, arena, sz_s2u(size), zero);
 	}
 }
 
diff --git a/src/ctl.c b/src/ctl.c
index 1ebcbf8e..73d4cb66 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -168,6 +168,7 @@ CTL_PROTO(opt_prof_sys_thread_name)
 CTL_PROTO(opt_prof_time_res)
 CTL_PROTO(opt_lg_san_uaf_align)
 CTL_PROTO(opt_zero_realloc)
+CTL_PROTO(opt_limit_usize_gap)
 CTL_PROTO(opt_malloc_conf_symlink)
 CTL_PROTO(opt_malloc_conf_env_var)
 CTL_PROTO(opt_malloc_conf_global_var)
@@ -557,6 +558,7 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("zero_realloc"),	CTL(opt_zero_realloc)},
 	{NAME("debug_double_free_max_scan"),
 		CTL(opt_debug_double_free_max_scan)},
+	{NAME("limit_usize_gap"),	CTL(opt_limit_usize_gap)},
 	{NAME("malloc_conf"),	CHILD(named, opt_malloc_conf)}
 };
 
@@ -2341,6 +2343,8 @@ CTL_RO_NL_CGEN(config_uaf_detection, opt_lg_san_uaf_align,
     opt_lg_san_uaf_align, ssize_t)
 CTL_RO_NL_GEN(opt_zero_realloc,
     zero_realloc_mode_names[opt_zero_realloc_action], const char *)
+CTL_RO_NL_CGEN(config_limit_usize_gap, opt_limit_usize_gap, opt_limit_usize_gap,
+    bool)
 
 /* malloc_conf options */
 CTL_RO_NL_CGEN(opt_malloc_conf_symlink, opt_malloc_conf_symlink,
@@ -3364,8 +3368,8 @@ arenas_bin_i_index(tsdn_t *tsdn, const size_t *mib,
 }
 
 CTL_RO_NL_GEN(arenas_nlextents, SC_NSIZES - SC_NBINS, unsigned)
-CTL_RO_NL_GEN(arenas_lextent_i_size, sz_index2size(SC_NBINS+(szind_t)mib[2]),
-    size_t)
+CTL_RO_NL_GEN(arenas_lextent_i_size,
+    sz_index2size_unsafe(SC_NBINS+(szind_t)mib[2]), size_t)
 static const ctl_named_node_t *
 arenas_lextent_i_index(tsdn_t *tsdn, const size_t *mib,
     size_t miblen, size_t i) {
diff --git a/src/eset.c b/src/eset.c
index 6f8f335e..7dc9cce7 100644
--- a/src/eset.c
+++ b/src/eset.c
@@ -155,6 +155,71 @@ eset_remove(eset_t *eset, edata_t *edata) {
 	    cur_extents_npages - (size >> LG_PAGE), ATOMIC_RELAXED);
 }
 
+edata_t *
+eset_enumerate_alignment_search(eset_t *eset, size_t size, pszind_t bin_ind,
+    size_t alignment) {
+	if (edata_heap_empty(&eset->bins[bin_ind].heap)) {
+		return NULL;
+	}
+
+	edata_t *edata = NULL;
+	edata_heap_enumerate_helper_t helper;
+	edata_heap_enumerate_prepare(&eset->bins[bin_ind].heap, &helper,
+	    ESET_ENUMERATE_MAX_NUM, sizeof(helper.bfs_queue)/sizeof(void *));
+	while ((edata =
+	    edata_heap_enumerate_next(&eset->bins[bin_ind].heap, &helper)) !=
+	    NULL) {
+		uintptr_t base = (uintptr_t)edata_base_get(edata);
+		size_t candidate_size = edata_size_get(edata);
+		if (candidate_size < size) {
+			continue;
+		}
+
+		uintptr_t next_align = ALIGNMENT_CEILING((uintptr_t)base,
+		    PAGE_CEILING(alignment));
+		if (base > next_align || base + candidate_size <= next_align) {
+			/* Overflow or not crossing the next alignment. */
+			continue;
+		}
+
+		size_t leadsize = next_align - base;
+		if (candidate_size - leadsize >= size) {
+			return edata;
+		}
+	}
+
+	return NULL;
+}
+
+edata_t *
+eset_enumerate_search(eset_t *eset, size_t size, pszind_t bin_ind,
+    bool exact_only, edata_cmp_summary_t *ret_summ) {
+	if (edata_heap_empty(&eset->bins[bin_ind].heap)) {
+		return NULL;
+	}
+
+	edata_t *ret = NULL, *edata = NULL;
+	edata_heap_enumerate_helper_t helper;
+	edata_heap_enumerate_prepare(&eset->bins[bin_ind].heap, &helper,
+	    ESET_ENUMERATE_MAX_NUM, sizeof(helper.bfs_queue)/sizeof(void *));
+	while ((edata =
+	    edata_heap_enumerate_next(&eset->bins[bin_ind].heap, &helper)) !=
+	    NULL) {
+		if ((!exact_only && edata_size_get(edata) >= size) ||
+		    (exact_only && edata_size_get(edata) == size)) {
+			edata_cmp_summary_t temp_summ =
+			    edata_cmp_summary_get(edata);
+			if (ret == NULL || edata_cmp_summary_comp(temp_summ,
+			    *ret_summ) < 0) {
+				ret = edata;
+				*ret_summ = temp_summ;
+			}
+		}
+	}
+
+	return ret;
+}
+
 /*
  * Find an extent with size [min_size, max_size) to satisfy the alignment
  * requirement.  For each size, try only the first extent in the heap.
@@ -162,8 +227,19 @@ eset_remove(eset_t *eset, edata_t *edata) {
 static edata_t *
 eset_fit_alignment(eset_t *eset, size_t min_size, size_t max_size,
     size_t alignment) {
-        pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(min_size));
-        pszind_t pind_max = sz_psz2ind(sz_psz_quantize_ceil(max_size));
+	pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(min_size));
+	pszind_t pind_max = sz_psz2ind(sz_psz_quantize_ceil(max_size));
+
+	/* See comments in eset_first_fit for why we enumerate search below. */
+	pszind_t pind_prev = sz_psz2ind(sz_psz_quantize_floor(min_size));
+	if (sz_limit_usize_gap_enabled() && pind != pind_prev) {
+		edata_t *ret = NULL;
+		ret = eset_enumerate_alignment_search(eset, min_size, pind_prev,
+		    alignment);
+		if (ret != NULL) {
+			return ret;
+		}
+	}
 
 	for (pszind_t i =
 	    (pszind_t)fb_ffs(eset->bitmap, ESET_NPSIZES, (size_t)pind);
@@ -211,8 +287,43 @@ eset_first_fit(eset_t *eset, size_t size, bool exact_only,
 	pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(size));
 
 	if (exact_only) {
-		return edata_heap_empty(&eset->bins[pind].heap) ? NULL :
-		    edata_heap_first(&eset->bins[pind].heap);
+		if (sz_limit_usize_gap_enabled()) {
+			pszind_t pind_prev =
+			    sz_psz2ind(sz_psz_quantize_floor(size));
+			return eset_enumerate_search(eset, size, pind_prev,
+			    /* exact_only */ true, &ret_summ);
+		} else {
+			return edata_heap_empty(&eset->bins[pind].heap) ? NULL:
+			    edata_heap_first(&eset->bins[pind].heap);
+		}
+	}
+
+	/*
+	 * Each element in the eset->bins is a heap corresponding to a size
+	 * class.  When sz_limit_usize_gap_enabled() is false, all heaps after
+	 * pind (including pind itself) will surely satisfy the rquests while
+	 * heaps before pind cannot satisfy the request because usize is
+	 * calculated based on size classes then.  However, when
+	 * sz_limit_usize_gap_enabled() is true, usize is calculated by ceiling
+	 * user requested size to the closest multiple of PAGE.  This means in
+	 * the heap before pind, i.e., pind_prev, there may exist extents able
+	 * to satisfy the request and we should enumerate the heap when
+	 * pind_prev != pind.
+	 *
+	 * For example, when PAGE=4KB and the user requested size is 1MB + 4KB,
+	 * usize would be 1.25MB when sz_limit_usize_gap_enabled() is false.
+	 * pind points to the heap containing extents ranging in
+	 * [1.25MB, 1.5MB).  Thus, searching starting from pind will not miss
+	 * any candidates.  When sz_limit_usize_gap_enabled() is true, the
+	 * usize would be 1MB + 4KB and pind still points to the same heap.
+	 * In this case, the heap pind_prev points to, which contains extents
+	 * in the range [1MB, 1.25MB), may contain candidates satisfying the
+	 * usize and thus should be enumerated.
+	 */
+	pszind_t pind_prev = sz_psz2ind(sz_psz_quantize_floor(size));
+	if (sz_limit_usize_gap_enabled() && pind != pind_prev){
+		ret = eset_enumerate_search(eset, size, pind_prev,
+		    /* exact_only */ false, &ret_summ);
 	}
 
 	for (pszind_t i =
diff --git a/src/hpa.c b/src/hpa.c
index 932cf201..2a5d7e1f 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -706,7 +706,7 @@ hpa_alloc_batch_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
     bool *deferred_work_generated) {
 	assert(size <= HUGEPAGE);
 	assert(size <= shard->opts.slab_max_alloc ||
-	    size == sz_index2size(sz_size2index(size)));
+	    size == sz_s2u(size));
 	bool oom = false;
 
 	size_t nsuccess = hpa_try_alloc_batch_no_grow(tsdn, shard, size, &oom,
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 31d4cb27..67456bb7 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -123,6 +123,13 @@ zero_realloc_action_t opt_zero_realloc_action =
 
 atomic_zu_t zero_realloc_count = ATOMIC_INIT(0);
 
+bool opt_limit_usize_gap =
+#ifdef LIMIT_USIZE_GAP
+    true;
+#else
+    false;
+#endif
+
 const char *const zero_realloc_mode_names[] = {
 	"alloc",
 	"free",
@@ -1578,8 +1585,8 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    "hpa_sec_nshards", 0, 0, CONF_CHECK_MIN,
 			    CONF_DONT_CHECK_MAX, true);
 			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.max_alloc,
-			    "hpa_sec_max_alloc", PAGE, 0, CONF_CHECK_MIN,
-			    CONF_DONT_CHECK_MAX, true);
+			    "hpa_sec_max_alloc", PAGE, USIZE_GROW_SLOW_THRESHOLD,
+			    CONF_CHECK_MIN, CONF_CHECK_MAX, true);
 			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.max_bytes,
 			    "hpa_sec_max_bytes", PAGE, 0, CONF_CHECK_MIN,
 			    CONF_DONT_CHECK_MAX, true);
@@ -1763,6 +1770,11 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    "san_guard_large", 0, SIZE_T_MAX,
 			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, false)
 
+			if (config_limit_usize_gap) {
+				CONF_HANDLE_BOOL(opt_limit_usize_gap,
+				    "limit_usize_gap");
+			}
+
 			CONF_ERROR("Invalid conf pair", k, klen, v, vlen);
 #undef CONF_ERROR
 #undef CONF_CONTINUE
@@ -2182,6 +2194,17 @@ static bool
 malloc_init_hard(void) {
 	tsd_t *tsd;
 
+	if (config_limit_usize_gap) {
+		assert(TCACHE_MAXCLASS_LIMIT <= USIZE_GROW_SLOW_THRESHOLD);
+		assert(SC_LOOKUP_MAXCLASS <= USIZE_GROW_SLOW_THRESHOLD);
+		/*
+		 * This asserts an extreme case where TINY_MAXCLASS is larger
+		 * than LARGE_MINCLASS.  It could only happen if some constants
+		 * are configured miserably wrong.
+		 */
+		assert(SC_LG_TINY_MAXCLASS <=
+		    (size_t)1ULL << (LG_PAGE + SC_LG_NGROUP));
+	}
 #if defined(_WIN32) && _WIN32_WINNT < 0x0600
 	_init_init_lock();
 #endif
@@ -2376,7 +2399,8 @@ aligned_usize_get(size_t size, size_t alignment, size_t *usize, szind_t *ind,
 			if (unlikely(*ind >= SC_NSIZES)) {
 				return true;
 			}
-			*usize = sz_index2size(*ind);
+			*usize = sz_limit_usize_gap_enabled()? sz_s2u(size):
+			    sz_index2size(*ind);
 			assert(*usize > 0 && *usize <= SC_LARGE_MAXCLASS);
 			return false;
 		}
@@ -2924,7 +2948,7 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) {
 	    &alloc_ctx);
 	assert(alloc_ctx.szind != SC_NSIZES);
 
-	size_t usize = sz_index2size(alloc_ctx.szind);
+	size_t usize = emap_alloc_ctx_usize_get(&alloc_ctx);
 	if (config_prof && opt_prof) {
 		prof_free(tsd, ptr, usize, &alloc_ctx);
 	}
@@ -2956,35 +2980,41 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 	assert(malloc_initialized() || IS_INITIALIZER);
 
 	emap_alloc_ctx_t alloc_ctx;
+	szind_t szind = sz_size2index(usize);
 	if (!config_prof) {
-		alloc_ctx.szind = sz_size2index(usize);
-		alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS);
+		emap_alloc_ctx_init(&alloc_ctx, szind, (szind < SC_NBINS),
+		    usize);
 	} else {
 		if (likely(!prof_sample_aligned(ptr))) {
 			/*
 			 * When the ptr is not page aligned, it was not sampled.
 			 * usize can be trusted to determine szind and slab.
 			 */
-			alloc_ctx.szind = sz_size2index(usize);
-			alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS);
+			emap_alloc_ctx_init(&alloc_ctx, szind,
+			    (szind < SC_NBINS), usize);
 		} else if (opt_prof) {
+			/*
+			 * Small sampled allocs promoted can still get correct
+			 * usize here.  Check comments in edata_usize_get.
+			 */
 			emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global,
 			    ptr, &alloc_ctx);
 
 			if (config_opt_safety_checks) {
 				/* Small alloc may have !slab (sampled). */
+				size_t true_size =
+				    emap_alloc_ctx_usize_get(&alloc_ctx);
 				if (unlikely(alloc_ctx.szind !=
 				    sz_size2index(usize))) {
 					safety_check_fail_sized_dealloc(
 					    /* current_dealloc */ true, ptr,
-					    /* true_size */ sz_index2size(
-					    alloc_ctx.szind),
+					    /* true_size */ true_size,
 					    /* input_size */ usize);
 				}
 			}
 		} else {
-			alloc_ctx.szind = sz_size2index(usize);
-			alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS);
+			emap_alloc_ctx_init(&alloc_ctx, szind,
+			    (szind < SC_NBINS), usize);
 		}
 	}
 	bool fail = maybe_check_alloc_ctx(tsd, ptr, &alloc_ctx);
@@ -3486,7 +3516,7 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 	emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr,
 	    &alloc_ctx);
 	assert(alloc_ctx.szind != SC_NSIZES);
-	old_usize = sz_index2size(alloc_ctx.szind);
+	old_usize = emap_alloc_ctx_usize_get(&alloc_ctx);
 	assert(old_usize == isalloc(tsd_tsdn(tsd), ptr));
 	if (aligned_usize_get(size, alignment, &usize, NULL, false)) {
 		goto label_oom;
@@ -3756,7 +3786,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr,
 	    &alloc_ctx);
 	assert(alloc_ctx.szind != SC_NSIZES);
-	old_usize = sz_index2size(alloc_ctx.szind);
+	old_usize = emap_alloc_ctx_usize_get(&alloc_ctx);
 	assert(old_usize == isalloc(tsd_tsdn(tsd), ptr));
 	/*
 	 * The API explicitly absolves itself of protecting against (size +
diff --git a/src/prof_data.c b/src/prof_data.c
index 39af0c90..437673ee 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -513,7 +513,13 @@ void prof_unbias_map_init(void) {
 	/* See the comment in prof_sample_new_event_wait */
 #ifdef JEMALLOC_PROF
 	for (szind_t i = 0; i < SC_NSIZES; i++) {
-		double sz = (double)sz_index2size(i);
+		/*
+		 * When limit_usize_gap is enabled, the unbiased calculation
+		 * here is not as accurate as it was because usize now changes
+		 * in a finer grain while the unbiased_sz is still calculated
+		 * using the old way.
+		 */
+		double sz = (double)sz_index2size_unsafe(i);
 		double rate = (double)(ZU(1) << lg_prof_sample);
 		double div_val = 1.0 - exp(-sz / rate);
 		double unbiased_sz = sz / div_val;
diff --git a/src/psset.c b/src/psset.c
index 9a833193..e617f426 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -337,18 +337,50 @@ psset_update_end(psset_t *psset, hpdata_t *ps) {
 	hpdata_assert_consistent(ps);
 }
 
+hpdata_t *
+psset_enumerate_search(psset_t *psset, pszind_t pind, size_t size) {
+	if (hpdata_age_heap_empty(&psset->pageslabs[pind])) {
+		return NULL;
+	}
+
+	hpdata_t *ps = NULL;
+	hpdata_age_heap_enumerate_helper_t helper;
+	hpdata_age_heap_enumerate_prepare(&psset->pageslabs[pind], &helper,
+	    PSSET_ENUMERATE_MAX_NUM, sizeof(helper.bfs_queue) / sizeof(void *));
+
+	while ((ps = hpdata_age_heap_enumerate_next(&psset->pageslabs[pind],
+	    &helper))) {
+		if (hpdata_longest_free_range_get(ps) >= size) {
+			return ps;
+		}
+	}
+
+	return NULL;
+}
+
 hpdata_t *
 psset_pick_alloc(psset_t *psset, size_t size) {
 	assert((size & PAGE_MASK) == 0);
 	assert(size <= HUGEPAGE);
 
 	pszind_t min_pind = sz_psz2ind(sz_psz_quantize_ceil(size));
+	hpdata_t *ps = NULL;
+
+	/* See comments in eset_first_fit for why we enumerate search below. */
+	pszind_t pind_prev = sz_psz2ind(sz_psz_quantize_floor(size));
+	if (sz_limit_usize_gap_enabled() && pind_prev < min_pind) {
+		ps = psset_enumerate_search(psset, pind_prev, size);
+		if (ps != NULL) {
+			return ps;
+		}
+	}
+
 	pszind_t pind = (pszind_t)fb_ffs(psset->pageslab_bitmap, PSSET_NPSIZES,
 	    (size_t)min_pind);
 	if (pind == PSSET_NPSIZES) {
 		return hpdata_empty_list_first(&psset->empty);
 	}
-	hpdata_t *ps = hpdata_age_heap_first(&psset->pageslabs[pind]);
+	ps = hpdata_age_heap_first(&psset->pageslabs[pind]);
 	if (ps == NULL) {
 		return NULL;
 	}
diff --git a/src/sec.c b/src/sec.c
index 19d69ff4..8827d1bd 100644
--- a/src/sec.c
+++ b/src/sec.c
@@ -24,6 +24,13 @@ bool
 sec_init(tsdn_t *tsdn, sec_t *sec, base_t *base, pai_t *fallback,
     const sec_opts_t *opts) {
 	assert(opts->max_alloc >= PAGE);
+	/*
+	 * Same as tcache, sec do not cache allocs/dallocs larger than
+	 * USIZE_GROW_SLOW_THRESHOLD because the usize above this increases
+	 * by PAGE and the number of usizes is too large.
+	 */
+	assert(!sz_limit_usize_gap_enabled() ||
+	    opts->max_alloc <= USIZE_GROW_SLOW_THRESHOLD);
 
 	size_t max_alloc = PAGE_FLOOR(opts->max_alloc);
 	pszind_t npsizes = sz_psz2ind(max_alloc) + 1;
diff --git a/src/tcache.c b/src/tcache.c
index 15da14da..270d38ac 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -1047,7 +1047,8 @@ tcache_bin_flush_impl_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin
 				ndeferred++;
 				continue;
 			}
-			if (large_dalloc_safety_checks(edata, ptr, binind)) {
+			if (large_dalloc_safety_checks(edata, ptr,
+			    sz_index2size(binind))) {
 				/* See the comment in isfree. */
 				continue;
 			}
diff --git a/test/integration/rallocx.c b/test/integration/rallocx.c
index 68b8f381..85d9238b 100644
--- a/test/integration/rallocx.c
+++ b/test/integration/rallocx.c
@@ -49,7 +49,7 @@ TEST_BEGIN(test_grow_and_shrink) {
 	size_t tsz;
 #define NCYCLES 3
 	unsigned i, j;
-#define NSZS 1024
+#define NSZS 64
 	size_t szs[NSZS];
 #define MAXSZ ZU(12 * 1024 * 1024)
 
diff --git a/test/test.sh.in b/test/test.sh.in
index b4fbb355..a4ee9396 100644
--- a/test/test.sh.in
+++ b/test/test.sh.in
@@ -43,6 +43,7 @@ for t in $@; do
     # per test shell script to ignore the @JEMALLOC_CPREFIX@ detail).
     enable_fill=@enable_fill@ \
     enable_prof=@enable_prof@ \
+    limit_usize_gap=@limit_usize_gap@ \
     . @srcroot@${t}.sh && \
     export_malloc_conf && \
     $JEMALLOC_TEST_PREFIX ${t}@exe@ @abs_srcroot@ @abs_objroot@
diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
index 8ef0786c..09536b29 100644
--- a/test/unit/arena_reset.c
+++ b/test/unit/arena_reset.c
@@ -78,7 +78,8 @@ vsalloc(tsdn_t *tsdn, const void *ptr) {
 		return 0;
 	}
 
-	return sz_index2size(full_alloc_ctx.szind);
+	return config_limit_usize_gap? edata_usize_get(full_alloc_ctx.edata):
+	    sz_index2size(full_alloc_ctx.szind);
 }
 
 static unsigned
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index 50b96a87..6c42729a 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -5,7 +5,7 @@
 
 #define SHARD_IND 111
 
-#define ALLOC_MAX (HUGEPAGE / 4)
+#define ALLOC_MAX (HUGEPAGE)
 
 typedef struct test_data_s test_data_t;
 struct test_data_s {
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 02fedaa7..296b7bff 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -332,6 +332,7 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(bool, prof_sys_thread_name, prof);
 	TEST_MALLCTL_OPT(ssize_t, lg_san_uaf_align, uaf_detection);
 	TEST_MALLCTL_OPT(unsigned, debug_double_free_max_scan, always);
+	TEST_MALLCTL_OPT(bool, limit_usize_gap, limit_usize_gap);
 
 #undef TEST_MALLCTL_OPT
 }
diff --git a/test/unit/ph.c b/test/unit/ph.c
index 28f5e488..0339f993 100644
--- a/test/unit/ph.c
+++ b/test/unit/ph.c
@@ -2,8 +2,9 @@
 
 #include "jemalloc/internal/ph.h"
 
+#define BFS_ENUMERATE_MAX 30
 typedef struct node_s node_t;
-ph_structs(heap, node_t);
+ph_structs(heap, node_t, BFS_ENUMERATE_MAX);
 
 struct node_s {
 #define NODE_MAGIC 0x9823af7e
@@ -239,6 +240,22 @@ TEST_BEGIN(test_ph_random) {
 			expect_false(heap_empty(&heap),
 			    "Heap should not be empty");
 
+			/* Enumerate nodes. */
+			heap_enumerate_helper_t helper;
+			uint16_t max_queue_size = sizeof(helper.bfs_queue)
+			    / sizeof(void *);
+			expect_u_eq(max_queue_size, BFS_ENUMERATE_MAX,
+			    "Incorrect bfs queue length initialized");
+			assert(max_queue_size == BFS_ENUMERATE_MAX);
+			heap_enumerate_prepare(&heap, &helper,
+			    BFS_ENUMERATE_MAX, max_queue_size);
+			size_t node_count = 0;
+			while(heap_enumerate_next(&heap, &helper)) {
+				node_count ++;
+			}
+			expect_lu_eq(node_count, j,
+			    "Unexpected enumeration results.");
+
 			/* Remove nodes. */
 			switch (i % 6) {
 			case 0:
diff --git a/test/unit/sec.c b/test/unit/sec.c
index 0b5e1c31..cfef043f 100644
--- a/test/unit/sec.c
+++ b/test/unit/sec.c
@@ -412,7 +412,8 @@ TEST_BEGIN(test_expand_shrink_delegate) {
 
 	bool deferred_work_generated = false;
 
-	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ 10 * PAGE,
+	test_sec_init(&sec, &ta.pai, /* nshards */ 1,
+	    /* max_alloc */ USIZE_GROW_SLOW_THRESHOLD,
 	    /* max_bytes */ 1000 * PAGE);
 	edata_t *edata = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
 	    /* zero */ false, /* guarded */ false, /* frequent_reuse */ false,
diff --git a/test/unit/size_classes.c b/test/unit/size_classes.c
index 9e8a408f..24913803 100644
--- a/test/unit/size_classes.c
+++ b/test/unit/size_classes.c
@@ -26,7 +26,8 @@ TEST_BEGIN(test_size_classes) {
 	size_t size_class, max_size_class;
 	szind_t index, gen_index, max_index;
 
-	max_size_class = get_max_size_class();
+	max_size_class = sz_limit_usize_gap_enabled()? SC_SMALL_MAXCLASS:
+	    get_max_size_class();
 	max_index = sz_size2index(max_size_class);
 
 	for (index = 0, size_class = sz_index2size(index); index < max_index ||
@@ -79,6 +80,40 @@ TEST_BEGIN(test_size_classes) {
 }
 TEST_END
 
+TEST_BEGIN(test_grow_slow_size_classes) {
+	test_skip_if(!sz_limit_usize_gap_enabled());
+
+	size_t size = SC_LARGE_MINCLASS;
+	size_t target_usize = SC_LARGE_MINCLASS;
+	size_t max_size = get_max_size_class();
+	size_t increase[3] = {PAGE - 1, 1, 1};
+	while (size <= max_size) {
+		size_t usize = sz_s2u(size);
+		expect_zu_eq(usize, target_usize,
+		    "sz_s2u() does not generate usize as expected.");
+		size += increase[0];
+		usize = sz_s2u(size);
+		target_usize += PAGE;
+		expect_zu_eq(usize, target_usize,
+		    "sz_s2u() does not generate usize as expected.");
+		size += increase[1];
+		usize = sz_s2u(size);
+		expect_zu_eq(usize, target_usize,
+		    "sz_s2u() does not generate usize as expected.");
+		size += increase[2];
+		usize = sz_s2u(size);
+		target_usize += PAGE;
+		expect_zu_eq(usize, target_usize,
+		    "sz_s2u() does not generate usize as expected.");
+		if (target_usize << 1 < target_usize) {
+			break;
+		}
+		target_usize = target_usize << 1;
+		size = target_usize;
+	}
+}
+TEST_END
+
 TEST_BEGIN(test_psize_classes) {
 	size_t size_class, max_psz;
 	pszind_t pind, max_pind;
@@ -182,6 +217,7 @@ int
 main(void) {
 	return test(
 	    test_size_classes,
+	    test_grow_slow_size_classes,
 	    test_psize_classes,
 	    test_overflow);
 }
diff --git a/test/unit/size_classes.sh b/test/unit/size_classes.sh
new file mode 100644
index 00000000..93d5e8d1
--- /dev/null
+++ b/test/unit/size_classes.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${limit_usize_gap}" = "x1" ] ; then
+  export MALLOC_CONF="limit_usize_gap:true"
+fi
diff --git a/test/unit/stats.c b/test/unit/stats.c
index 203a71b5..584a582f 100644
--- a/test/unit/stats.c
+++ b/test/unit/stats.c
@@ -202,17 +202,22 @@ TEST_END
 
 TEST_BEGIN(test_stats_arenas_large) {
 	void *p;
-	size_t sz, allocated;
+	size_t sz, allocated, allocated_before;
 	uint64_t epoch, nmalloc, ndalloc;
+	size_t malloc_size = (1U << (SC_LG_LARGE_MINCLASS + 1)) + 1;
 	int expected = config_stats ? 0 : ENOENT;
 
-	p = mallocx((1U << SC_LG_LARGE_MINCLASS), MALLOCX_ARENA(0));
+	sz = sizeof(size_t);
+	expect_d_eq(mallctl("stats.arenas.0.large.allocated",
+	    (void *)&allocated_before, &sz, NULL, 0), expected,
+	    "Unexpected mallctl() result");
+
+	p = mallocx(malloc_size, MALLOCX_ARENA(0));
 	expect_ptr_not_null(p, "Unexpected mallocx() failure");
 
 	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
 	    0, "Unexpected mallctl() failure");
 
-	sz = sizeof(size_t);
 	expect_d_eq(mallctl("stats.arenas.0.large.allocated",
 	    (void *)&allocated, &sz, NULL, 0), expected,
 	    "Unexpected mallctl() result");
@@ -223,8 +228,10 @@ TEST_BEGIN(test_stats_arenas_large) {
 	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
 
 	if (config_stats) {
-		expect_zu_gt(allocated, 0,
+		expect_zu_ge(allocated_before, 0,
 		    "allocated should be greater than zero");
+		expect_zu_ge(allocated - allocated_before, sz_s2u(malloc_size),
+		    "the diff between allocated should be greater than the allocation made");
 		expect_u64_gt(nmalloc, 0,
 		    "nmalloc should be greater than zero");
 		expect_u64_ge(nmalloc, ndalloc,

From 6035d4a8d369d158ca299c10773e05796e1d18ad Mon Sep 17 00:00:00 2001
From: Guangli Dai <gdai@meta.com>
Date: Mon, 27 Jan 2025 23:09:51 -0800
Subject: [PATCH 263/395] Cache extra extents in the dirty pool from
 ecache_alloc_grow

---
 include/jemalloc/internal/sz.h | 19 +++++---
 src/pac.c                      | 80 ++++++++++++++++++++++++++++++++--
 test/unit/arena_decay.c        |  9 +++-
 3 files changed, 96 insertions(+), 12 deletions(-)

diff --git a/include/jemalloc/internal/sz.h b/include/jemalloc/internal/sz.h
index 6c0a1f0c..1122461c 100644
--- a/include/jemalloc/internal/sz.h
+++ b/include/jemalloc/internal/sz.h
@@ -311,6 +311,17 @@ sz_size2index_usize_fastpath(size_t size, szind_t *ind, size_t *usize) {
 	}
 }
 
+JEMALLOC_ALWAYS_INLINE size_t
+sz_s2u_compute_using_delta(size_t size) {
+	size_t x = lg_floor((size<<1)-1);
+	size_t lg_delta = (x < SC_LG_NGROUP + LG_QUANTUM + 1)
+	    ?  LG_QUANTUM : x - SC_LG_NGROUP - 1;
+	size_t delta = ZU(1) << lg_delta;
+	size_t delta_mask = delta - 1;
+	size_t usize = (size + delta_mask) & ~delta_mask;
+	return usize;
+}
+
 JEMALLOC_ALWAYS_INLINE size_t
 sz_s2u_compute(size_t size) {
 	if (unlikely(size > SC_LARGE_MAXCLASS)) {
@@ -329,13 +340,7 @@ sz_s2u_compute(size_t size) {
 	}
 #endif
 	if (size <= SC_SMALL_MAXCLASS || !sz_limit_usize_gap_enabled()) {
-		size_t x = lg_floor((size<<1)-1);
-		size_t lg_delta = (x < SC_LG_NGROUP + LG_QUANTUM + 1)
-		    ?  LG_QUANTUM : x - SC_LG_NGROUP - 1;
-		size_t delta = ZU(1) << lg_delta;
-		size_t delta_mask = delta - 1;
-		size_t usize = (size + delta_mask) & ~delta_mask;
-		return usize;
+		return sz_s2u_compute_using_delta(size);
 	} else {
 		/*
 		 * With sz_limit_usize_gap_enabled() == true, usize of a large
diff --git a/src/pac.c b/src/pac.c
index 57a0c953..3523ef3d 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -112,10 +112,27 @@ pac_may_have_muzzy(pac_t *pac) {
 	return pac_decay_ms_get(pac, extent_state_muzzy) != 0;
 }
 
+size_t pac_alloc_retained_batched_size(size_t size) {
+	if (size > SC_LARGE_MAXCLASS) {
+		/*
+		 * A valid input with usize SC_LARGE_MAXCLASS could still
+		 * reach here because of sz_large_pad.  Such a request is valid
+		 * but we should not further increase it.  Thus, directly
+		 * return size for such cases.
+		 */
+		return size;
+	}
+	size_t batched_size = sz_s2u_compute_using_delta(size);
+	size_t next_hugepage_size = HUGEPAGE_CEILING(size);
+	return batched_size > next_hugepage_size? next_hugepage_size:
+	    batched_size;
+}
+
 static edata_t *
 pac_alloc_real(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size,
     size_t alignment, bool zero, bool guarded) {
 	assert(!guarded || alignment <= PAGE);
+	size_t newly_mapped_size = 0;
 
 	edata_t *edata = ecache_alloc(tsdn, pac, ehooks, &pac->ecache_dirty,
 	    NULL, size, alignment, zero, guarded);
@@ -124,14 +141,69 @@ pac_alloc_real(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size,
 		edata = ecache_alloc(tsdn, pac, ehooks, &pac->ecache_muzzy,
 		    NULL, size, alignment, zero, guarded);
 	}
+
+	/*
+	 * We batched allocate a larger extent when limit_usize_gap is enabled
+	 * because the reuse of extents in the dirty pool is worse without size
+	 * classes for large allocs.  For instance, when limit_usize_gap is not
+	 * enabled, 1.1MB, 1.15MB, and 1.2MB allocs will all be ceiled to
+	 * 1.25MB and can reuse the same buffer if they are alloc & dalloc
+	 * sequentially.  However, with limit_usize_gap enabled, they cannot
+	 * reuse the same buffer and their sequential allocs & dallocs will
+	 * result in three different extents.  Thus, we cache extra mergeable
+	 * extents in the dirty pool to improve the reuse.  We skip this
+	 * optimization if both maps_coalesce and opt_retain are disabled
+	 * because VM is not cheap enough to be used aggressively and extents
+	 * cannot be merged at will (only extents from the same VirtualAlloc
+	 * can be merged).  Note that it could still be risky to cache more
+	 * extents when either mpas_coalesce or opt_retain is enabled.  Yet
+	 * doing so is still beneficial in improving the reuse of extents
+	 * with some limits.  This choice should be reevaluated if
+	 * pac_alloc_retained_batched_size is changed to be more aggressive.
+	 */
+	if (sz_limit_usize_gap_enabled() && edata == NULL &&
+	    (maps_coalesce || opt_retain)) {
+		size_t batched_size = pac_alloc_retained_batched_size(size);
+		/*
+		 * Note that ecache_alloc_grow will try to retrieve virtual
+		 * memory from both retained pool and directly from OS through
+		 * extent_alloc_wrapper if the retained pool has no qualified
+		 * extents.  This is also why the overcaching still works even
+		 * with opt_retain off.
+		 */
+		edata = ecache_alloc_grow(tsdn, pac, ehooks,
+		    &pac->ecache_retained, NULL, batched_size,
+		    alignment, zero, guarded);
+
+		if (edata != NULL && batched_size > size) {
+			edata_t *trail = extent_split_wrapper(tsdn, pac,
+			    ehooks, edata, size, batched_size - size,
+			    /* holding_core_locks */ false);
+			if (trail == NULL) {
+				ecache_dalloc(tsdn, pac, ehooks,
+				    &pac->ecache_retained, edata);
+				edata = NULL;
+			} else {
+				ecache_dalloc(tsdn, pac, ehooks,
+				    &pac->ecache_dirty, trail);
+			}
+		}
+
+		if (edata != NULL) {
+			newly_mapped_size = batched_size;
+		}
+	}
+
 	if (edata == NULL) {
 		edata = ecache_alloc_grow(tsdn, pac, ehooks,
 		    &pac->ecache_retained, NULL, size, alignment, zero,
 		    guarded);
-		if (config_stats && edata != NULL) {
-			atomic_fetch_add_zu(&pac->stats->pac_mapped, size,
-			    ATOMIC_RELAXED);
-		}
+		newly_mapped_size = size;
+	}
+
+	if (config_stats && newly_mapped_size != 0) {
+		atomic_fetch_add_zu(&pac->stats->pac_mapped,
+		    newly_mapped_size, ATOMIC_RELAXED);
 	}
 
 	return edata;
diff --git a/test/unit/arena_decay.c b/test/unit/arena_decay.c
index 10d1a6b1..00a38326 100644
--- a/test/unit/arena_decay.c
+++ b/test/unit/arena_decay.c
@@ -410,7 +410,14 @@ TEST_BEGIN(test_decay_never) {
 	/* Verify that each deallocation generates additional dirty pages. */
 	size_t pdirty_prev = get_arena_pdirty(arena_ind);
 	size_t pmuzzy_prev = get_arena_pmuzzy(arena_ind);
-	expect_zu_eq(pdirty_prev, 0, "Unexpected dirty pages");
+	/*
+	 * With limit_usize_gap enabled, some more extents
+	 * are cached in the dirty pool, making the assumption below
+	 * not true.
+	 */
+	if (!sz_limit_usize_gap_enabled()) {
+		expect_zu_eq(pdirty_prev, 0, "Unexpected dirty pages");
+	}
 	expect_zu_eq(pmuzzy_prev, 0, "Unexpected muzzy pages");
 	for (unsigned i = 0; i < sizeof(sizes)/sizeof(size_t); i++) {
 		dallocx(ptrs[i], flags);

From 70f019cd3abc5dfc67df1b8a2c460bc5e8221ae2 Mon Sep 17 00:00:00 2001
From: Guangli Dai <gdai@meta.com>
Date: Tue, 14 May 2024 14:27:17 -0700
Subject: [PATCH 264/395] Enable limit-usize-gap in CI tests.

Considering the new usize calculation will be default soon, add the
config option in for Travis, Cirrus and appveyor.
---
 .appveyor.yml         |  12 ++-
 .cirrus.yml           |   2 +-
 .travis.yml           | 218 +++++++++++++++++++++---------------------
 scripts/gen_travis.py |   1 +
 4 files changed, 119 insertions(+), 114 deletions(-)

diff --git a/.appveyor.yml b/.appveyor.yml
index d31f9aed..dedc7867 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -5,27 +5,31 @@ environment:
   - MSYSTEM: MINGW64
     CPU: x86_64
     MSVC: amd64
-    CONFIG_FLAGS: --enable-debug
+    CONFIG_FLAGS: --enable-debug --enable-limit-usize-gap
   - MSYSTEM: MINGW64
     CPU: x86_64
-    CONFIG_FLAGS: --enable-debug
+    CONFIG_FLAGS: --enable-debug --enable-limit-usize-gap
   - MSYSTEM: MINGW32
     CPU: i686
     MSVC: x86
-    CONFIG_FLAGS: --enable-debug
+    CONFIG_FLAGS: --enable-debug --enable-limit-usize-gap
   - MSYSTEM: MINGW32
     CPU: i686
-    CONFIG_FLAGS: --enable-debug
+    CONFIG_FLAGS: --enable-debug --enable-limit-usize-gap
   - MSYSTEM: MINGW64
     CPU: x86_64
     MSVC: amd64
+    CONFIG_FLAGS: --enable-limit-usize-gap
   - MSYSTEM: MINGW64
     CPU: x86_64
+    CONFIG_FLAGS: --enable-limit-usize-gap
   - MSYSTEM: MINGW32
     CPU: i686
     MSVC: x86
+    CONFIG_FLAGS: --enable-limit-usize-gap
   - MSYSTEM: MINGW32
     CPU: i686
+    CONFIG_FLAGS: --enable-limit-usize-gap
 
 install:
   - set PATH=c:\msys64\%MSYSTEM%\bin;c:\msys64\usr\bin;%PATH%
diff --git a/.cirrus.yml b/.cirrus.yml
index 585aa42f..8051272c 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -40,7 +40,7 @@ task:
     # We don't perfectly track freebsd stdlib.h definitions.  This is fine when
     # we count as a system header, but breaks otherwise, like during these
     # tests.
-    - ./configure --with-jemalloc-prefix=ci_ ${DEBUG_CONFIG} ${PROF_CONFIG} ${UNCOMMON_CONFIG}
+    - ./configure --with-jemalloc-prefix=ci_ --enable-limit-usize-gap ${DEBUG_CONFIG} ${PROF_CONFIG} ${UNCOMMON_CONFIG}
     - export JFLAG=`sysctl -n kern.smp.cpus`
     - gmake -j${JFLAG}
     - gmake -j${JFLAG} tests
diff --git a/.travis.yml b/.travis.yml
index ceda8989..a32755c6 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -12,331 +12,331 @@ jobs:
   include:
     - os: windows
       arch: amd64
-      env: CC=gcc CXX=g++ EXTRA_CFLAGS="-fcommon"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-limit-usize-gap" EXTRA_CFLAGS="-fcommon"
     - os: windows
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-fcommon"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-limit-usize-gap" EXTRA_CFLAGS="-fcommon"
     - os: windows
       arch: amd64
-      env: CC=cl.exe CXX=cl.exe
+      env: CC=cl.exe CXX=cl.exe CONFIGURE_FLAGS="--enable-limit-usize-gap"
     - os: windows
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes EXTRA_CFLAGS="-fcommon"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-limit-usize-gap" EXTRA_CFLAGS="-fcommon"
     - os: windows
       arch: amd64
-      env: CC=cl.exe CXX=cl.exe CONFIGURE_FLAGS="--enable-debug"
+      env: CC=cl.exe CXX=cl.exe CONFIGURE_FLAGS="--enable-debug --enable-limit-usize-gap"
     - os: windows
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-fcommon"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug --enable-limit-usize-gap" EXTRA_CFLAGS="-fcommon"
     - os: windows
       arch: amd64
-      env: CC=cl.exe CXX=cl.exe CROSS_COMPILE_32BIT=yes
+      env: CC=cl.exe CXX=cl.exe CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-limit-usize-gap"
     - os: windows
       arch: amd64
-      env: CC=cl.exe CXX=cl.exe CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug"
+      env: CC=cl.exe CXX=cl.exe CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug --enable-limit-usize-gap"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--enable-debug --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--enable-prof --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--disable-stats --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--disable-libdl --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--enable-opt-safety-checks --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-debug --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-prof --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-stats --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-libdl --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-opt-safety-checks --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-prof --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --disable-stats --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --disable-libdl --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-opt-safety-checks --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=dss:primary --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --disable-stats --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --disable-libdl --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-opt-safety-checks --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=dss:primary --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --disable-libdl --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --enable-opt-safety-checks --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=dss:primary --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --enable-opt-safety-checks --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=dss:primary --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=dss:primary --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=dss:primary --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,dss:primary --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary,percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary,percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary,background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu,background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: arm64
-      env: CC=gcc CXX=g++ EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: arm64
-      env: CC=clang CXX=clang++ EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: arm64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: arm64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: arm64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: arm64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: arm64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: arm64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: arm64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-lg-hugepage=29" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-lg-hugepage=29 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: arm64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: arm64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: arm64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: arm64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: osx
       arch: amd64
-      env: CC=gcc CXX=g++ EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
     - os: osx
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
     - os: osx
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
     - os: osx
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
     - os: osx
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
     - os: osx
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
     - os: osx
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
     - os: osx
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
     - os: osx
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
     # Development build
     - os: linux
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --disable-cache-oblivious --enable-stats --enable-log --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
index 43457967..a49bb83b 100755
--- a/scripts/gen_travis.py
+++ b/scripts/gen_travis.py
@@ -191,6 +191,7 @@ def format_job(os, arch, combination):
     if len(malloc_conf) > 0:
         configure_flags.append('--with-malloc-conf=' + ','.join(malloc_conf))
 
+    configure_flags.append('--enable-limit-usize-gap')
     if not compilers:
         compiler = GCC.value
     else:

From 22440a0207cd7d7c624c78723ca1eeb8a4353e79 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 4 Feb 2025 18:31:11 -0800
Subject: [PATCH 265/395] Implement process_madvise support.

Add opt.process_madvise_max_batch which determines if process_madvise is enabled
(non-zero) and the max # of regions in each batch.  Added another limiting
factor which is the space to reserve on stack, which results in the max batch of
128.
---
 configure.ac                                  | 11 +++
 include/jemalloc/internal/extent.h            | 12 ++++
 .../internal/jemalloc_internal_defs.h.in      |  3 +
 .../jemalloc/internal/jemalloc_preamble.h.in  |  7 ++
 include/jemalloc/internal/pages.h             |  1 +
 include/jemalloc/internal/typed_list.h        |  4 ++
 src/ctl.c                                     |  4 ++
 src/extent.c                                  | 36 ++++++++--
 src/jemalloc.c                                |  5 ++
 src/pac.c                                     | 67 ++++++++++++++++++-
 src/pages.c                                   | 58 ++++++++++++++++
 src/stats.c                                   |  1 +
 test/unit/mallctl.c                           |  1 +
 13 files changed, 204 insertions(+), 6 deletions(-)

diff --git a/configure.ac b/configure.ac
index a55a5a08..eb500db9 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2544,6 +2544,17 @@ if test "x${je_cv_madvise}" = "xyes" ; then
   if test "x${je_cv_madv_collapse}" = "xyes" ; then
     AC_DEFINE([JEMALLOC_HAVE_MADVISE_COLLAPSE], [ ], [ ])
   fi
+
+  dnl Check for process_madvise
+  JE_COMPILABLE([process_madvise(2)], [
+#include <sys/mman.h>
+#include <sys/syscall.h>
+], [
+	syscall(SYS_process_madvise, 0, (void *)0, 0, 0, 0);
+], [je_cv_process_madvise])
+  if test "x${je_cv_process_madvise}" = "xyes" ; then
+    AC_DEFINE([JEMALLOC_HAVE_PROCESS_MADVISE], [ ], [ ])
+  fi
 else
   dnl Check for posix_madvise.
   JE_COMPILABLE([posix_madvise], [
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index 17feb703..be61db8d 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -21,6 +21,16 @@
 #define LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT 6
 extern size_t opt_lg_extent_max_active_fit;
 
+#define PROCESS_MADVISE_MAX_BATCH_DEFAULT 0
+extern size_t opt_process_madvise_max_batch;
+
+#ifdef JEMALLOC_HAVE_PROCESS_MADVISE
+/* The iovec is on stack.  Limit the max batch to avoid stack overflow. */
+#define PROCESS_MADVISE_MAX_BATCH_LIMIT (VARIABLE_ARRAY_SIZE_MAX / sizeof(struct iovec))
+#else
+#define PROCESS_MADVISE_MAX_BATCH_LIMIT 0
+#endif
+
 edata_t *ecache_alloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *expand_edata, size_t size, size_t alignment,
     bool zero, bool guarded);
@@ -42,6 +52,8 @@ edata_t *extent_alloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     bool growing_retained);
 void extent_dalloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *edata);
+void extent_dalloc_wrapper_purged(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
+    edata_t *edata);
 void extent_destroy_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *edata);
 bool extent_commit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index e76eaaf4..2e47438a 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -345,6 +345,9 @@
  */
 #undef JEMALLOC_MADVISE_NOCORE
 
+/* Defined if process_madvise(2) is available. */
+#undef JEMALLOC_HAVE_PROCESS_MADVISE
+
 /* Defined if mprotect(2) is available. */
 #undef JEMALLOC_HAVE_MPROTECT
 
diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in
index ef637a2d..eba475a6 100644
--- a/include/jemalloc/internal/jemalloc_preamble.h.in
+++ b/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -87,6 +87,13 @@ static const bool have_madvise_huge =
     false
 #endif
     ;
+static const bool have_process_madvise =
+#ifdef JEMALLOC_HAVE_PROCESS_MADVISE
+    true
+#else
+    false
+#endif
+    ;
 static const bool config_fill =
 #ifdef JEMALLOC_FILL
     true
diff --git a/include/jemalloc/internal/pages.h b/include/jemalloc/internal/pages.h
index 0dcf96dc..366bc30b 100644
--- a/include/jemalloc/internal/pages.h
+++ b/include/jemalloc/internal/pages.h
@@ -121,6 +121,7 @@ bool pages_commit(void *addr, size_t size);
 bool pages_decommit(void *addr, size_t size);
 bool pages_purge_lazy(void *addr, size_t size);
 bool pages_purge_forced(void *addr, size_t size);
+bool pages_purge_process_madvise(void *vec, size_t ven_len, size_t total_bytes);
 bool pages_huge(void *addr, size_t size);
 bool pages_nohuge(void *addr, size_t size);
 bool pages_collapse(void *addr, size_t size);
diff --git a/include/jemalloc/internal/typed_list.h b/include/jemalloc/internal/typed_list.h
index 6535055a..7c4826fc 100644
--- a/include/jemalloc/internal/typed_list.h
+++ b/include/jemalloc/internal/typed_list.h
@@ -22,6 +22,10 @@ static inline el_type *							\
 list_type##_last(const list_type##_t *list) {				\
 	return ql_last(&list->head, linkage);				\
 }									\
+static inline el_type *							\
+list_type##_next(const list_type##_t *list, el_type *item) {		\
+	return ql_next(&list->head, item, linkage);			\
+}									\
 static inline void							\
 list_type##_append(list_type##_t *list, el_type *item) {		\
 	ql_elm_new(item, linkage);					\
diff --git a/src/ctl.c b/src/ctl.c
index 73d4cb66..c55d9719 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -169,6 +169,7 @@ CTL_PROTO(opt_prof_time_res)
 CTL_PROTO(opt_lg_san_uaf_align)
 CTL_PROTO(opt_zero_realloc)
 CTL_PROTO(opt_limit_usize_gap)
+CTL_PROTO(opt_process_madvise_max_batch)
 CTL_PROTO(opt_malloc_conf_symlink)
 CTL_PROTO(opt_malloc_conf_env_var)
 CTL_PROTO(opt_malloc_conf_global_var)
@@ -559,6 +560,7 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("debug_double_free_max_scan"),
 		CTL(opt_debug_double_free_max_scan)},
 	{NAME("limit_usize_gap"),	CTL(opt_limit_usize_gap)},
+	{NAME("process_madvise_max_batch"), CTL(opt_process_madvise_max_batch)},
 	{NAME("malloc_conf"),	CHILD(named, opt_malloc_conf)}
 };
 
@@ -2316,6 +2318,8 @@ CTL_RO_NL_GEN(opt_lg_tcache_flush_large_div, opt_lg_tcache_flush_large_div,
 CTL_RO_NL_GEN(opt_thp, thp_mode_names[opt_thp], const char *)
 CTL_RO_NL_GEN(opt_lg_extent_max_active_fit, opt_lg_extent_max_active_fit,
     size_t)
+CTL_RO_NL_GEN(opt_process_madvise_max_batch, opt_process_madvise_max_batch,
+    size_t)
 CTL_RO_NL_CGEN(config_prof, opt_prof, opt_prof, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_prefix, opt_prof_prefix, const char *)
 CTL_RO_NL_CGEN(config_prof, opt_prof_active, opt_prof_active, bool)
diff --git a/src/extent.c b/src/extent.c
index 30942491..e61b7f9c 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -12,6 +12,13 @@
 /* Data. */
 
 size_t opt_lg_extent_max_active_fit = LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT;
+size_t opt_process_madvise_max_batch =
+#ifdef JEMALLOC_HAVE_PROCESS_MADVISE
+    PROCESS_MADVISE_MAX_BATCH_DEFAULT;
+#else
+    0
+#endif
+    ;
 
 static bool extent_commit_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length, bool growing_retained);
@@ -1032,6 +1039,29 @@ extent_alloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	return edata;
 }
 
+static void
+extent_dalloc_wrapper_finish(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
+    edata_t *edata) {
+	if (config_prof) {
+		extent_gdump_sub(tsdn, edata);
+	}
+	extent_record(tsdn, pac, ehooks, &pac->ecache_retained, edata);
+}
+
+void
+extent_dalloc_wrapper_purged(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
+    edata_t *edata) {
+	assert(edata_pai_get(edata) == EXTENT_PAI_PAC);
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+
+	/* Verify that will not go down the dalloc / munmap route. */
+	assert(ehooks_dalloc_will_fail(ehooks));
+
+	edata_zeroed_set(edata, true);
+	extent_dalloc_wrapper_finish(tsdn, pac, ehooks, edata);
+}
+
 void
 extent_dalloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *edata) {
@@ -1077,11 +1107,7 @@ extent_dalloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	}
 	edata_zeroed_set(edata, zeroed);
 
-	if (config_prof) {
-		extent_gdump_sub(tsdn, edata);
-	}
-
-	extent_record(tsdn, pac, ehooks, &pac->ecache_retained, edata);
+	extent_dalloc_wrapper_finish(tsdn, pac, ehooks, edata);
 }
 
 void
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 67456bb7..9f4bc785 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1361,6 +1361,11 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    "muzzy_decay_ms", -1, NSTIME_SEC_MAX * KQU(1000) <
 			    QU(SSIZE_MAX) ? NSTIME_SEC_MAX * KQU(1000) :
 			    SSIZE_MAX);
+			CONF_HANDLE_SIZE_T(opt_process_madvise_max_batch,
+			    "process_madvise_max_batch", 0,
+			    PROCESS_MADVISE_MAX_BATCH_LIMIT,
+			    CONF_DONT_CHECK_MIN, CONF_CHECK_MAX,
+			    /* clip */ true)
 			CONF_HANDLE_BOOL(opt_stats_print, "stats_print")
 			if (CONF_MATCH("stats_print_opts")) {
 				init_opt_stats_opts(v, vlen,
diff --git a/src/pac.c b/src/pac.c
index 3523ef3d..12c1e444 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -435,6 +435,44 @@ pac_stash_decayed(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache,
 	return nstashed;
 }
 
+static bool
+decay_with_process_madvise(edata_list_inactive_t *decay_extents) {
+	cassert(have_process_madvise);
+	assert(opt_process_madvise_max_batch > 0);
+#ifndef JEMALLOC_HAVE_PROCESS_MADVISE
+	return true;
+#else
+	assert(opt_process_madvise_max_batch <=
+	    PROCESS_MADVISE_MAX_BATCH_LIMIT);
+	size_t len = opt_process_madvise_max_batch;
+	VARIABLE_ARRAY(struct iovec, vec, len);
+
+	size_t cur = 0, total_bytes = 0;
+	for (edata_t *edata = edata_list_inactive_first(decay_extents);
+	     edata != NULL;
+	     edata = edata_list_inactive_next(decay_extents, edata)) {
+		size_t pages_bytes = edata_size_get(edata);
+		vec[cur].iov_base = edata_base_get(edata);
+		vec[cur].iov_len = pages_bytes;
+		total_bytes += pages_bytes;
+		cur++;
+		if (cur == len) {
+			bool err = pages_purge_process_madvise(vec, len,
+			    total_bytes);
+			if (err) {
+				return true;
+			}
+			cur = 0;
+			total_bytes = 0;
+		}
+	}
+	if (cur > 0) {
+		return pages_purge_process_madvise(vec, cur, total_bytes);
+	}
+	return false;
+#endif
+}
+
 static size_t
 pac_decay_stashed(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
     pac_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay,
@@ -450,6 +488,28 @@ pac_decay_stashed(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
 	bool try_muzzy = !fully_decay
 	    && pac_decay_ms_get(pac, extent_state_muzzy) != 0;
 
+	bool purge_to_retained = !try_muzzy ||
+	    ecache->state == extent_state_muzzy;
+	/*
+	 * Attempt process_madvise only if 1) enabled, 2) purging to retained,
+	 * and 3) not using custom hooks.
+	 */
+	bool try_process_madvise = (opt_process_madvise_max_batch > 0) &&
+	    purge_to_retained && ehooks_dalloc_will_fail(ehooks);
+
+	bool already_purged;
+	if (try_process_madvise) {
+		/*
+		 * If anything unexpected happened during process_madvise
+		 * (e.g. not supporting MADV_DONTNEED, or partial success for
+		 * some reason), we will consider nothing is purged and fallback
+		 * to the regular madvise.
+		 */
+		already_purged = !decay_with_process_madvise(decay_extents);
+	} else {
+		already_purged = false;
+	}
+
 	for (edata_t *edata = edata_list_inactive_first(decay_extents); edata !=
 	    NULL; edata = edata_list_inactive_first(decay_extents)) {
 		edata_list_inactive_remove(decay_extents, edata);
@@ -473,7 +533,12 @@ pac_decay_stashed(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
 			}
 			JEMALLOC_FALLTHROUGH;
 		case extent_state_muzzy:
-			extent_dalloc_wrapper(tsdn, pac, ehooks, edata);
+			if (already_purged) {
+				extent_dalloc_wrapper_purged(tsdn, pac, ehooks,
+				    edata);
+			} else {
+				extent_dalloc_wrapper(tsdn, pac, ehooks, edata);
+			}
 			nunmapped += npages;
 			break;
 		case extent_state_active:
diff --git a/src/pages.c b/src/pages.c
index 26fd8d5d..babfd50f 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -617,6 +617,58 @@ pages_dodump(void *addr, size_t size) {
 #endif
 }
 
+#ifdef JEMALLOC_HAVE_PROCESS_MADVISE
+#include <sys/mman.h>
+#include <sys/syscall.h>
+static int pidfd;
+
+static bool
+init_process_madvise(void) {
+	if (opt_process_madvise_max_batch == 0) {
+		return false;
+	}
+
+	if (opt_process_madvise_max_batch > PROCESS_MADVISE_MAX_BATCH_LIMIT) {
+		opt_process_madvise_max_batch = PROCESS_MADVISE_MAX_BATCH_LIMIT;
+	}
+	pid_t pid = getpid();
+	pidfd = syscall(SYS_pidfd_open, pid, 0);
+	if (pidfd == -1) {
+		return true;
+	}
+
+	return false;
+}
+
+static bool
+pages_purge_process_madvise_impl(void *vec, size_t vec_len,
+    size_t total_bytes) {
+	size_t purged_bytes = (size_t)syscall(SYS_process_madvise, pidfd,
+	    (struct iovec *)vec, vec_len, MADV_DONTNEED, 0);
+
+	return purged_bytes != total_bytes;
+}
+
+#else
+
+static bool
+init_process_madvise(void) {
+	return false;
+}
+
+static bool
+pages_purge_process_madvise_impl(void *vec, size_t vec_len,
+    size_t total_bytes) {
+	not_reached();
+	return true;
+}
+
+#endif
+
+bool
+pages_purge_process_madvise(void *vec, size_t vec_len, size_t total_bytes) {
+	return pages_purge_process_madvise_impl(vec, vec_len, total_bytes);
+}
 
 static size_t
 os_page_detect(void) {
@@ -833,6 +885,12 @@ pages_boot(void) {
 		os_pages_unmap(madv_free_page, PAGE);
 	}
 #endif
+	if (init_process_madvise()) {
+		if (opt_abort) {
+			abort();
+		}
+		return true;
+	}
 
 	return false;
 }
diff --git a/src/stats.c b/src/stats.c
index b28b9942..58874bf8 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1727,6 +1727,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_INT64("stats_interval")
 	OPT_WRITE_CHAR_P("stats_interval_opts")
 	OPT_WRITE_CHAR_P("zero_realloc")
+	OPT_WRITE_SIZE_T("process_madvise_max_batch")
 
 	emitter_dict_end(emitter); /* Close "opt". */
 
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 296b7bff..57aa59e5 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -333,6 +333,7 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(ssize_t, lg_san_uaf_align, uaf_detection);
 	TEST_MALLCTL_OPT(unsigned, debug_double_free_max_scan, always);
 	TEST_MALLCTL_OPT(bool, limit_usize_gap, limit_usize_gap);
+	TEST_MALLCTL_OPT(size_t, process_madvise_max_batch, always);
 
 #undef TEST_MALLCTL_OPT
 }

From ad108d50f1c30700389103ff5fe3ef5f538f804c Mon Sep 17 00:00:00 2001
From: Dmitry Ilvokhin <d@ilvokhin.com>
Date: Tue, 21 Jan 2025 07:20:15 -0800
Subject: [PATCH 266/395] Extend purging algorithm with peak demand tracking

Implementation inspired by idea described in "Beyond malloc efficiency
to fleet efficiency: a hugepage-aware memory allocator" paper [1].

Primary idea is to track maximum number (peak) of active pages in use
with sliding window and then use this number to decide how many dirty
pages we would like to keep.

We are trying to estimate maximum amount of active memory we'll need in
the near future. We do so by projecting future active memory demand
(based on peak active memory usage we observed in the past within
sliding window) and adding slack on top of it (an overhead is reasonable
to have in exchange of higher hugepages coverage). When peak demand
tracking is off, projection of future active memory is active memory we
are having right now.

Estimation is essentially the same as `nactive_max * (1 + dirty_mult)`.

Peak demand purging algorithm controlled by two config options. Option
`hpa_peak_demand_window_ms` controls duration of sliding window we track
maximum active memory usage in and option `hpa_dirty_mult` controls
amount of slack we are allowed to have as a percent from maximum active
memory usage. By default `hpa_peak_demand_window_ms == 0` now and we
have same behaviour (ratio based purging) that we had before this
commit.

[1]: https://storage.googleapis.com/gweb-research2023-media/pubtools/6170.pdf
---
 Makefile.in                                   |   2 +
 include/jemalloc/internal/hpa.h               |   4 +
 include/jemalloc/internal/hpa_opts.h          |  14 +-
 include/jemalloc/internal/peak_demand.h       |  55 ++++++
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |   1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |   3 +
 .../projects/vc2017/jemalloc/jemalloc.vcxproj |   1 +
 .../vc2017/jemalloc/jemalloc.vcxproj.filters  |   3 +
 .../projects/vc2019/jemalloc/jemalloc.vcxproj |   1 +
 .../vc2019/jemalloc/jemalloc.vcxproj.filters  |   3 +
 .../projects/vc2022/jemalloc/jemalloc.vcxproj |   1 +
 .../vc2022/jemalloc/jemalloc.vcxproj.filters  |   3 +
 src/ctl.c                                     |   5 +
 src/hpa.c                                     |  53 +++++-
 src/jemalloc.c                                |   5 +
 src/peak_demand.c                             |  74 ++++++++
 src/stats.c                                   |   1 +
 test/unit/hpa.c                               | 174 +++++++++++++++---
 test/unit/mallctl.c                           |   1 +
 test/unit/peak_demand.c                       | 162 ++++++++++++++++
 20 files changed, 537 insertions(+), 29 deletions(-)
 create mode 100644 include/jemalloc/internal/peak_demand.h
 create mode 100644 src/peak_demand.c
 create mode 100644 test/unit/peak_demand.c

diff --git a/Makefile.in b/Makefile.in
index 1914fc28..b4102d0b 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -137,6 +137,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/pai.c \
 	$(srcroot)src/pac.c \
 	$(srcroot)src/pages.c \
+	$(srcroot)src/peak_demand.c \
 	$(srcroot)src/peak_event.c \
 	$(srcroot)src/prof.c \
 	$(srcroot)src/prof_data.c \
@@ -252,6 +253,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/pack.c \
 	$(srcroot)test/unit/pages.c \
 	$(srcroot)test/unit/peak.c \
+	$(srcroot)test/unit/peak_demand.c \
 	$(srcroot)test/unit/ph.c \
 	$(srcroot)test/unit/prng.c \
 	$(srcroot)test/unit/prof_accum.c \
diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index d788d051..a384d04a 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -10,6 +10,7 @@
 #include "jemalloc/internal/hpa_opts.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/pai.h"
+#include "jemalloc/internal/peak_demand.h"
 #include "jemalloc/internal/psset.h"
 
 typedef struct hpa_central_s hpa_central_t;
@@ -147,6 +148,9 @@ struct hpa_shard_s {
 	 * Last time we performed purge on this shard.
 	 */
 	nstime_t last_purge;
+
+	/* Peak active memory sliding window statistics. */
+	peak_demand_t peak_demand;
 };
 
 bool hpa_hugepage_size_exceeds_limit();
diff --git a/include/jemalloc/internal/hpa_opts.h b/include/jemalloc/internal/hpa_opts.h
index 42246172..816bb577 100644
--- a/include/jemalloc/internal/hpa_opts.h
+++ b/include/jemalloc/internal/hpa_opts.h
@@ -27,7 +27,8 @@ struct hpa_shard_opts_s {
 
 	/*
 	 * The HPA purges whenever the number of pages exceeds dirty_mult *
-	 * active_pages.  This may be set to (fxp_t)-1 to disable purging.
+	 * peak_active_pages.  This may be set to (fxp_t)-1 to disable
+	 * purging.
 	 */
 	fxp_t dirty_mult;
 
@@ -59,6 +60,13 @@ struct hpa_shard_opts_s {
 	 * Maximum number of hugepages to purge on each purging attempt.
 	 */
 	ssize_t experimental_max_purge_nhp;
+
+	/*
+	 * Sliding window duration to track active memory demand statistics.
+	 * This might be set to 0, to disable sliding window statistics
+	 * tracking and use current number of active pages for purging instead.
+	 */
+	uint64_t peak_demand_window_ms;
 };
 
 #define HPA_SHARD_OPTS_DEFAULT {					\
@@ -83,7 +91,9 @@ struct hpa_shard_opts_s {
 	/* min_purge_interval_ms */					\
 	5 * 1000,							\
 	/* experimental_max_purge_nhp */				\
-	-1								\
+	-1,								\
+	/* peak_demand_window_ms */					\
+	0								\
 }
 
 #endif /* JEMALLOC_INTERNAL_HPA_OPTS_H */
diff --git a/include/jemalloc/internal/peak_demand.h b/include/jemalloc/internal/peak_demand.h
new file mode 100644
index 00000000..2664cbec
--- /dev/null
+++ b/include/jemalloc/internal/peak_demand.h
@@ -0,0 +1,55 @@
+#ifndef JEMALLOC_INTERNAL_PEAK_DEMAND_H
+#define JEMALLOC_INTERNAL_PEAK_DEMAND_H
+
+#include "jemalloc/internal/jemalloc_preamble.h"
+
+/*
+ * Implementation of peak active memory demand tracking.
+ *
+ * Inspired by "Beyond malloc efficiency to fleet efficiency: a hugepage-aware
+ * memory allocator" whitepaper.
+ * https://storage.googleapis.com/gweb-research2023-media/pubtools/6170.pdf
+ *
+ * End goal is to track peak active memory usage over specified time interval.
+ * We do so by dividing this time interval into disjoint subintervals and
+ * storing value of maximum memory usage for each subinterval in a circular
+ * buffer.  Nanoseconds resolution timestamp uniquely maps into epoch, which is
+ * used as an index to access circular buffer.
+ */
+
+#define PEAK_DEMAND_LG_BUCKETS 4
+/*
+ * Number of buckets should be power of 2 to ensure modulo operation is
+ * optimized to bit masking by the compiler.
+ */
+#define PEAK_DEMAND_NBUCKETS (1 << PEAK_DEMAND_LG_BUCKETS)
+
+typedef struct peak_demand_s peak_demand_t;
+struct peak_demand_s {
+	/*
+	 * Absolute value of current epoch, monotonically increases over time.  Epoch
+	 * value modulo number of buckets used as an index to access nactive_max
+	 * array.
+	 */
+	uint64_t epoch;
+
+	/* How many nanoseconds each epoch approximately takes. */
+	uint64_t epoch_interval_ns;
+
+	/*
+	 * Circular buffer to track maximum number of active pages for each
+	 * epoch.
+	 */
+	size_t nactive_max[PEAK_DEMAND_NBUCKETS];
+};
+
+void peak_demand_init(peak_demand_t *peak_demand, uint64_t interval_ms);
+
+/* Updates peak demand statistics with current number of active pages. */
+void peak_demand_update(peak_demand_t *peak_demand, const nstime_t *now,
+    size_t nactive);
+
+/* Returns maximum number of active pages in sliding window. */
+size_t peak_demand_nactive_max(peak_demand_t *peak_demand);
+
+#endif /* JEMALLOC_INTERNAL_PEAK_DEMAND_H */
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index c43b30b1..97a95fbf 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -76,6 +76,7 @@
     <ClCompile Include="..\..\..\..\src\pai.c" />
     <ClCompile Include="..\..\..\..\src\pac.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
+    <ClCompile Include="..\..\..\..\src\peak_demand.c" />
     <ClCompile Include="..\..\..\..\src\peak_event.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
     <ClCompile Include="..\..\..\..\src\prof_data.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index f091475e..1a89369e 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -112,6 +112,9 @@
     <ClCompile Include="..\..\..\..\src\pages.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\peak_demand.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\peak_event.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index a195f6b3..8529438c 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -76,6 +76,7 @@
     <ClCompile Include="..\..\..\..\src\pai.c" />
     <ClCompile Include="..\..\..\..\src\pac.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
+    <ClCompile Include="..\..\..\..\src\peak_demand.c" />
     <ClCompile Include="..\..\..\..\src\peak_event.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
     <ClCompile Include="..\..\..\..\src\prof_data.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index f091475e..1a89369e 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -112,6 +112,9 @@
     <ClCompile Include="..\..\..\..\src\pages.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\peak_demand.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\peak_event.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
index cd16005d..eace48ba 100644
--- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
@@ -76,6 +76,7 @@
     <ClCompile Include="..\..\..\..\src\pai.c" />
     <ClCompile Include="..\..\..\..\src\pac.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
+    <ClCompile Include="..\..\..\..\src\peak_demand.c" />
     <ClCompile Include="..\..\..\..\src\peak_event.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
     <ClCompile Include="..\..\..\..\src\prof_data.c" />
diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
index f091475e..1a89369e 100644
--- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
@@ -112,6 +112,9 @@
     <ClCompile Include="..\..\..\..\src\pages.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\peak_demand.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\peak_event.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
index 2d8c4be6..98085cfd 100644
--- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
@@ -76,6 +76,7 @@
     <ClCompile Include="..\..\..\..\src\pai.c" />
     <ClCompile Include="..\..\..\..\src\pac.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
+    <ClCompile Include="..\..\..\..\src\peak_demand.c" />
     <ClCompile Include="..\..\..\..\src\peak_event.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
     <ClCompile Include="..\..\..\..\src\prof_data.c" />
diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
index f091475e..1a89369e 100644
--- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
@@ -112,6 +112,9 @@
     <ClCompile Include="..\..\..\..\src\pages.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\peak_demand.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\peak_event.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/ctl.c b/src/ctl.c
index c55d9719..2c941ae8 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -106,6 +106,7 @@ CTL_PROTO(opt_hpa_hugify_delay_ms)
 CTL_PROTO(opt_hpa_hugify_sync)
 CTL_PROTO(opt_hpa_min_purge_interval_ms)
 CTL_PROTO(opt_experimental_hpa_max_purge_nhp)
+CTL_PROTO(opt_hpa_peak_demand_window_ms)
 CTL_PROTO(opt_hpa_dirty_mult)
 CTL_PROTO(opt_hpa_sec_nshards)
 CTL_PROTO(opt_hpa_sec_max_alloc)
@@ -487,6 +488,8 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("hpa_min_purge_interval_ms"), CTL(opt_hpa_min_purge_interval_ms)},
 	{NAME("experimental_hpa_max_purge_nhp"),
 		CTL(opt_experimental_hpa_max_purge_nhp)},
+	{NAME("hpa_peak_demand_window_ms"),
+	    CTL(opt_hpa_peak_demand_window_ms)},
 	{NAME("hpa_dirty_mult"), CTL(opt_hpa_dirty_mult)},
 	{NAME("hpa_sec_nshards"),	CTL(opt_hpa_sec_nshards)},
 	{NAME("hpa_sec_max_alloc"),	CTL(opt_hpa_sec_max_alloc)},
@@ -2255,6 +2258,8 @@ CTL_RO_NL_GEN(opt_hpa_min_purge_interval_ms, opt_hpa_opts.min_purge_interval_ms,
     uint64_t)
 CTL_RO_NL_GEN(opt_experimental_hpa_max_purge_nhp,
     opt_hpa_opts.experimental_max_purge_nhp, ssize_t)
+CTL_RO_NL_GEN(opt_hpa_peak_demand_window_ms,
+    opt_hpa_opts.peak_demand_window_ms, uint64_t)
 
 /*
  * This will have to change before we publicly document this option; fxp_t and
diff --git a/src/hpa.c b/src/hpa.c
index 2a5d7e1f..c01dde13 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -63,6 +63,11 @@ hpa_supported(void) {
 	return true;
 }
 
+static bool
+hpa_peak_demand_tracking_enabled(hpa_shard_t *shard) {
+	return shard->opts.peak_demand_window_ms > 0;
+}
+
 static void
 hpa_do_consistency_checks(hpa_shard_t *shard) {
 	assert(shard->base != NULL);
@@ -217,6 +222,11 @@ hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap,
 	shard->stats.nhugify_failures = 0;
 	shard->stats.ndehugifies = 0;
 
+	if (hpa_peak_demand_tracking_enabled(shard)) {
+		peak_demand_init(&shard->peak_demand,
+		    shard->opts.peak_demand_window_ms);
+	}
+
 	/*
 	 * Fill these in last, so that if an hpa_shard gets used despite
 	 * initialization failing, we'll at least crash instead of just
@@ -294,8 +304,37 @@ hpa_ndirty_max(tsdn_t *tsdn, hpa_shard_t *shard) {
 	if (shard->opts.dirty_mult == (fxp_t)-1) {
 		return (size_t)-1;
 	}
-	return fxp_mul_frac(psset_nactive(&shard->psset),
-	    shard->opts.dirty_mult);
+	/*
+	 * We are trying to estimate maximum amount of active memory we'll
+	 * need in the near future.  We do so by projecting future active
+	 * memory demand (based on peak active memory usage we observed in the
+	 * past within sliding window) and adding slack on top of it (an
+	 * overhead is reasonable to have in exchange of higher hugepages
+	 * coverage).  When peak demand tracking is off, projection of future
+	 * active memory is active memory we are having right now.
+	 *
+	 * Estimation is essentially the same as nactive_max * (1 +
+	 * dirty_mult), but expressed differently to factor in necessary
+	 * implementation details.
+	 */
+	size_t nactive = psset_nactive(&shard->psset);
+	size_t nactive_max = nactive;
+	if (hpa_peak_demand_tracking_enabled(shard)) {
+		/*
+		 * We release shard->mtx, when we do a syscall to purge dirty
+		 * memory, so someone might grab shard->mtx, allocate memory
+		 * from this shard and update psset's nactive counter, before
+		 * peak_demand_update(...) was called and we'll get
+		 * peak_demand_nactive_max(...) <= nactive as a result.
+		 */
+		size_t peak = peak_demand_nactive_max(&shard->peak_demand);
+		if (peak > nactive_max) {
+			nactive_max = peak;
+		}
+	}
+	size_t slack = fxp_mul_frac(nactive_max, shard->opts.dirty_mult);
+	size_t estimation = nactive_max + slack;
+	return estimation - nactive;
 }
 
 static bool
@@ -548,6 +587,16 @@ static void
 hpa_shard_maybe_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard,
     bool forced) {
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+
+	/* Update active memory demand statistics. */
+	if (hpa_peak_demand_tracking_enabled(shard)) {
+		nstime_t now;
+		shard->central->hooks.curtime(&now,
+		    /* first_reading */ true);
+		peak_demand_update(&shard->peak_demand, &now,
+		    psset_nactive(&shard->psset));
+	}
+
 	if (!forced && shard->opts.deferral_allowed) {
 		return;
 	}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 9f4bc785..d08771f8 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1568,6 +1568,11 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    opt_hpa_opts.experimental_max_purge_nhp,
 			    "experimental_hpa_max_purge_nhp", -1, SSIZE_MAX);
 
+			CONF_HANDLE_UINT64_T(
+			    opt_hpa_opts.peak_demand_window_ms,
+			    "hpa_peak_demand_window_ms", 0, 0,
+			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, false);
+
 			if (CONF_MATCH("hpa_dirty_mult")) {
 				if (CONF_MATCH_VALUE("-1")) {
 					opt_hpa_opts.dirty_mult = (fxp_t)-1;
diff --git a/src/peak_demand.c b/src/peak_demand.c
new file mode 100644
index 00000000..49f28930
--- /dev/null
+++ b/src/peak_demand.c
@@ -0,0 +1,74 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/peak_demand.h"
+
+void
+peak_demand_init(peak_demand_t *peak_demand, uint64_t interval_ms) {
+	assert(interval_ms > 0);
+	peak_demand->epoch = 0;
+	uint64_t interval_ns = interval_ms * 1000 * 1000;
+	peak_demand->epoch_interval_ns = interval_ns / PEAK_DEMAND_NBUCKETS;
+	memset(peak_demand->nactive_max, 0, sizeof(peak_demand->nactive_max));
+}
+
+static uint64_t
+peak_demand_epoch_ind(peak_demand_t *peak_demand) {
+	return peak_demand->epoch % PEAK_DEMAND_NBUCKETS;
+}
+
+static nstime_t
+peak_demand_next_epoch_advance(peak_demand_t *peak_demand) {
+	uint64_t epoch = peak_demand->epoch;
+	uint64_t ns = (epoch + 1) * peak_demand->epoch_interval_ns;
+	nstime_t next;
+	nstime_init(&next, ns);
+	return next;
+}
+
+static uint64_t
+peak_demand_maybe_advance_epoch(peak_demand_t *peak_demand,
+    const nstime_t *now) {
+	nstime_t next_epoch_advance =
+	    peak_demand_next_epoch_advance(peak_demand);
+	if (nstime_compare(now, &next_epoch_advance) < 0) {
+		return peak_demand_epoch_ind(peak_demand);
+	}
+	uint64_t next_epoch = nstime_ns(now) / peak_demand->epoch_interval_ns;
+	assert(next_epoch > peak_demand->epoch);
+	/*
+	 * If we missed more epochs, than capacity of circular buffer
+	 * (PEAK_DEMAND_NBUCKETS), re-write no more than PEAK_DEMAND_NBUCKETS
+	 * items as we don't want to zero out same item multiple times.
+	 */
+	if (peak_demand->epoch + PEAK_DEMAND_NBUCKETS < next_epoch) {
+		peak_demand->epoch = next_epoch - PEAK_DEMAND_NBUCKETS;
+	}
+	while (peak_demand->epoch < next_epoch) {
+		++peak_demand->epoch;
+		uint64_t ind = peak_demand_epoch_ind(peak_demand);
+		peak_demand->nactive_max[ind] = 0;
+	}
+	return peak_demand_epoch_ind(peak_demand);
+}
+
+void
+peak_demand_update(peak_demand_t *peak_demand, const nstime_t *now,
+    size_t nactive) {
+	uint64_t ind = peak_demand_maybe_advance_epoch(peak_demand, now);
+	size_t *epoch_nactive = &peak_demand->nactive_max[ind];
+	if (nactive > *epoch_nactive) {
+		*epoch_nactive = nactive;
+	}
+}
+
+size_t
+peak_demand_nactive_max(peak_demand_t *peak_demand) {
+	size_t nactive_max = peak_demand->nactive_max[0];
+	for (int i = 1; i < PEAK_DEMAND_NBUCKETS; ++i) {
+		if (peak_demand->nactive_max[i] > nactive_max) {
+			nactive_max = peak_demand->nactive_max[i];
+		}
+	}
+	return nactive_max;
+}
diff --git a/src/stats.c b/src/stats.c
index 58874bf8..bd0167fb 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1657,6 +1657,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_BOOL("hpa_hugify_sync")
 	OPT_WRITE_UINT64("hpa_min_purge_interval_ms")
 	OPT_WRITE_SSIZE_T("experimental_hpa_max_purge_nhp")
+	OPT_WRITE_UINT64("hpa_peak_demand_window_ms")
 	if (je_mallctl("opt.hpa_dirty_mult", (void *)&u32v, &u32sz, NULL, 0)
 	    == 0) {
 		/*
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index 6c42729a..ceed9bd8 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -37,26 +37,9 @@ static hpa_shard_opts_t test_hpa_shard_opts_default = {
 	/* min_purge_interval_ms */
 	5 * 1000,
 	/* experimental_max_purge_nhp */
-	-1
-};
-
-static hpa_shard_opts_t test_hpa_shard_opts_purge = {
-	/* slab_max_alloc */
-	HUGEPAGE,
-	/* hugification_threshold */
-	0.9 * HUGEPAGE,
-	/* dirty_mult */
-	FXP_INIT_PERCENT(11),
-	/* deferral_allowed */
-	true,
-	/* hugify_delay_ms */
-	0,
-	/* hugify_sync */
-	false,
-	/* min_purge_interval_ms */
-	5 * 1000,
-	/* experimental_max_purge_nhp */
-	-1
+	-1,
+	/* peak_demand_window_ms */
+	0
 };
 
 static hpa_shard_t *
@@ -480,8 +463,14 @@ TEST_END
 TEST_BEGIN(test_purge_no_infinite_loop) {
 	test_skip_if(!hpa_supported());
 
-	hpa_shard_t *shard = create_test_data(&hpa_hooks_default,
-	    &test_hpa_shard_opts_purge);
+	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
+	opts.slab_max_alloc = HUGEPAGE;
+	opts.hugification_threshold = 0.9 * HUGEPAGE;
+	opts.dirty_mult = FXP_INIT_PERCENT(11);
+	opts.deferral_allowed = true;
+	opts.hugify_delay_ms = 0;
+
+	hpa_shard_t *shard = create_test_data(&hpa_hooks_default, &opts);
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 
 	/*
@@ -489,8 +478,7 @@ TEST_BEGIN(test_purge_no_infinite_loop) {
 	 * criteria for huge page and at the same time do not allow hugify page
 	 * without triggering a purge.
 	 */
-	const size_t npages =
-	    test_hpa_shard_opts_purge.hugification_threshold / PAGE + 1;
+	const size_t npages = opts.hugification_threshold / PAGE + 1;
 	const size_t size = npages * PAGE;
 
 	bool deferred_work_generated = false;
@@ -733,6 +721,140 @@ TEST_BEGIN(test_experimental_max_purge_nhp) {
 }
 TEST_END
 
+TEST_BEGIN(test_demand_purge_slack) {
+	test_skip_if(!hpa_supported());
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
+	opts.deferral_allowed = true;
+	/* Allow 10% of slack. */
+	opts.dirty_mult = FXP_INIT_PERCENT(10);
+	/* Peak demand sliding window duration is 10 seconds. */
+	opts.peak_demand_window_ms = 10 * 1000;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+
+	bool deferred_work_generated = false;
+
+	nstime_init(&defer_curtime, 0);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	enum {NALLOCS = 16 * HUGEPAGE_PAGES};
+	edata_t *edatas[NALLOCS];
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+
+	/* Deallocate 5 hugepages out of 16. */
+	for (int i = 0; i < 5 * (int)HUGEPAGE_PAGES; i++) {
+		pai_dalloc(tsdn, &shard->pai, edatas[i],
+		    &deferred_work_generated);
+	}
+	nstime_init2(&defer_curtime, 6, 0);
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
+	/*
+	 * Peak demand within sliding window is 16 hugepages, so we don't need
+	 * to purge anything just yet.
+	 */
+	expect_zu_eq(0, ndefer_purge_calls, "Purged too early");
+
+	nstime_init2(&defer_curtime, 12, 0);
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	expect_zu_eq(11, ndefer_hugify_calls, "Expect hugification");
+	ndefer_hugify_calls = 0;
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
+	/*
+	 * 12 seconds passed now, peak demand is 11 hugepages, we allowed to
+	 * keep 11 * 0.1 (hpa_dirty_mult) = 1.1 dirty hugepages, but we
+	 * have 5 dirty hugepages, so we should purge 4 of them.
+	 */
+	expect_zu_eq(4, ndefer_purge_calls, "Expect purges");
+	ndefer_purge_calls = 0;
+
+	destroy_test_data(shard);
+}
+TEST_END
+
+TEST_BEGIN(test_demand_purge_tight) {
+	test_skip_if(!hpa_supported());
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
+	opts.deferral_allowed = true;
+	/* No slack allowed. */
+	opts.dirty_mult = FXP_INIT_PERCENT(0);
+	/* Peak demand sliding window duration is 10 seconds. */
+	opts.peak_demand_window_ms = 10 * 1000;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+
+	bool deferred_work_generated = false;
+
+	nstime_init(&defer_curtime, 0);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	enum {NALLOCS = 16 * HUGEPAGE_PAGES};
+	edata_t *edatas[NALLOCS];
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+
+	/* Deallocate 5 hugepages out of 16. */
+	for (int i = 0; i < 5 * (int)HUGEPAGE_PAGES; i++) {
+		pai_dalloc(tsdn, &shard->pai, edatas[i],
+		    &deferred_work_generated);
+	}
+	nstime_init2(&defer_curtime, 6, 0);
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
+	/*
+	 * Peak demand within sliding window is 16 hugepages, to purge anything
+	 * just yet.
+	 */
+	expect_zu_eq(0, ndefer_purge_calls, "Purged too early");
+
+	nstime_init2(&defer_curtime, 12, 0);
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	expect_zu_eq(11, ndefer_hugify_calls, "Expect hugification");
+	ndefer_hugify_calls = 0;
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
+	/*
+	 * 12 seconds passed now, peak demand is 11 hugepages.  We have
+	 * hpa_dirty_mult = 0, so we allowed to keep 11 * 0 = 0 dirty
+	 * hugepages, but we have 5, all of them should be purged.
+	 */
+	expect_zu_eq(5, ndefer_purge_calls, "Expect purges");
+	ndefer_purge_calls = 0;
+
+	destroy_test_data(shard);
+}
+TEST_END
+
 int
 main(void) {
 	/*
@@ -756,5 +878,7 @@ main(void) {
 	    test_no_min_purge_interval,
 	    test_min_purge_interval,
 	    test_purge,
-	    test_experimental_max_purge_nhp);
+	    test_experimental_max_purge_nhp,
+	    test_demand_purge_slack,
+	    test_demand_purge_tight);
 }
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 57aa59e5..366b992b 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -295,6 +295,7 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(size_t, hpa_sec_bytes_after_flush, always);
 	TEST_MALLCTL_OPT(size_t, hpa_sec_batch_fill_extra, always);
 	TEST_MALLCTL_OPT(ssize_t, experimental_hpa_max_purge_nhp, always);
+	TEST_MALLCTL_OPT(uint64_t, hpa_peak_demand_window_ms, always);
 	TEST_MALLCTL_OPT(unsigned, narenas, always);
 	TEST_MALLCTL_OPT(const char *, percpu_arena, always);
 	TEST_MALLCTL_OPT(size_t, oversize_threshold, always);
diff --git a/test/unit/peak_demand.c b/test/unit/peak_demand.c
new file mode 100644
index 00000000..ca2506b8
--- /dev/null
+++ b/test/unit/peak_demand.c
@@ -0,0 +1,162 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/peak_demand.h"
+
+TEST_BEGIN(test_peak_demand_init) {
+	peak_demand_t peak_demand;
+	/*
+	 * Exact value doesn't matter here as we don't advance epoch in this
+	 * test.
+	 */
+	uint64_t interval_ms = 1000;
+	peak_demand_init(&peak_demand, interval_ms);
+
+	expect_zu_eq(peak_demand_nactive_max(&peak_demand), 0,
+	    "Unexpected ndirty_max value after initialization");
+}
+TEST_END
+
+TEST_BEGIN(test_peak_demand_update_basic) {
+	peak_demand_t peak_demand;
+	/* Make each bucket exactly one second to simplify math. */
+	uint64_t interval_ms = 1000 * PEAK_DEMAND_NBUCKETS;
+	peak_demand_init(&peak_demand, interval_ms);
+
+	nstime_t now;
+
+	nstime_init2(&now, /* sec */ 0, /* nsec */ 0);
+	peak_demand_update(&peak_demand, &now, /* nactive */ 1024);
+
+	nstime_init2(&now, /* sec */ 1, /* nsec */ 0);
+	peak_demand_update(&peak_demand, &now, /* nactive */ 512);
+
+	nstime_init2(&now, /* sec */ 2, /* nsec */ 0);
+	peak_demand_update(&peak_demand, &now, /* nactive */ 256);
+
+	expect_zu_eq(peak_demand_nactive_max(&peak_demand), 1024, "");
+}
+TEST_END
+
+TEST_BEGIN(test_peak_demand_update_skip_epochs) {
+	peak_demand_t peak_demand;
+	uint64_t interval_ms = 1000 * PEAK_DEMAND_NBUCKETS;
+	peak_demand_init(&peak_demand, interval_ms);
+
+	nstime_t now;
+
+	nstime_init2(&now, /* sec */ 0, /* nsec */ 0);
+	peak_demand_update(&peak_demand, &now, /* nactive */ 1024);
+
+	nstime_init2(&now, /* sec */ PEAK_DEMAND_NBUCKETS - 1, /* nsec */ 0);
+	peak_demand_update(&peak_demand, &now, /* nactive */ 512);
+
+	nstime_init2(&now, /* sec */ 2 * (PEAK_DEMAND_NBUCKETS - 1),
+	    /* nsec */ 0);
+	peak_demand_update(&peak_demand, &now, /* nactive */ 256);
+
+	/*
+	 * Updates are not evenly spread over time.  When we update at
+	 * 2 * (PEAK_DEMAND_NBUCKETS - 1) second, 1024 value is already out of
+	 * sliding window, but 512 is still present.
+	 */
+	expect_zu_eq(peak_demand_nactive_max(&peak_demand), 512, "");
+}
+TEST_END
+
+TEST_BEGIN(test_peak_demand_update_rewrite_optimization) {
+	peak_demand_t peak_demand;
+	uint64_t interval_ms = 1000 * PEAK_DEMAND_NBUCKETS;
+	peak_demand_init(&peak_demand, interval_ms);
+
+	nstime_t now;
+
+	nstime_init2(&now, /* sec */ 0, /* nsec */ 0);
+	peak_demand_update(&peak_demand, &now, /* nactive */ 1024);
+
+	nstime_init2(&now, /* sec */ 0, /* nsec */ UINT64_MAX);
+	/*
+	 * This update should take reasonable time if optimization is working
+	 * correctly, otherwise we'll loop from 0 to UINT64_MAX and this test
+	 * will take a long time to finish.
+	 */
+	peak_demand_update(&peak_demand, &now, /* nactive */ 512);
+
+	expect_zu_eq(peak_demand_nactive_max(&peak_demand), 512, "");
+}
+TEST_END
+
+TEST_BEGIN(test_peak_demand_update_out_of_interval) {
+	peak_demand_t peak_demand;
+	uint64_t interval_ms = 1000 * PEAK_DEMAND_NBUCKETS;
+	peak_demand_init(&peak_demand, interval_ms);
+
+	nstime_t now;
+
+	nstime_init2(&now, /* sec */ 0 * PEAK_DEMAND_NBUCKETS, /* nsec */ 0);
+	peak_demand_update(&peak_demand, &now, /* nactive */ 1024);
+
+	nstime_init2(&now, /* sec */ 1 * PEAK_DEMAND_NBUCKETS, /* nsec */ 0);
+	peak_demand_update(&peak_demand, &now, /* nactive */ 512);
+
+	nstime_init2(&now, /* sec */ 2 * PEAK_DEMAND_NBUCKETS, /* nsec */ 0);
+	peak_demand_update(&peak_demand, &now, /* nactive */ 256);
+
+	/*
+	 * Updates frequency is lower than tracking interval, so we should
+	 * have only last value.
+	 */
+	expect_zu_eq(peak_demand_nactive_max(&peak_demand), 256, "");
+}
+TEST_END
+
+TEST_BEGIN(test_peak_demand_update_static_epoch) {
+	peak_demand_t peak_demand;
+	uint64_t interval_ms = 1000 * PEAK_DEMAND_NBUCKETS;
+	peak_demand_init(&peak_demand, interval_ms);
+
+	nstime_t now;
+	nstime_init_zero(&now);
+
+	/* Big enough value to overwrite values in circular buffer. */
+	size_t nactive_max = 2 * PEAK_DEMAND_NBUCKETS;
+	for (size_t nactive = 0; nactive <= nactive_max; ++nactive) {
+		/*
+		 * We should override value in the same bucket as now value
+		 * doesn't change between iterations.
+		 */
+		peak_demand_update(&peak_demand, &now, nactive);
+	}
+
+	expect_zu_eq(peak_demand_nactive_max(&peak_demand), nactive_max, "");
+}
+TEST_END
+
+TEST_BEGIN(test_peak_demand_update_epoch_advance) {
+	peak_demand_t peak_demand;
+	uint64_t interval_ms = 1000 * PEAK_DEMAND_NBUCKETS;
+	peak_demand_init(&peak_demand, interval_ms);
+
+	nstime_t now;
+	/* Big enough value to overwrite values in circular buffer. */
+	size_t nactive_max = 2 * PEAK_DEMAND_NBUCKETS;
+	for (size_t nactive = 0; nactive <= nactive_max; ++nactive) {
+		uint64_t sec = nactive;
+		nstime_init2(&now, sec, /* nsec */ 0);
+		peak_demand_update(&peak_demand, &now, nactive);
+	}
+
+	expect_zu_eq(peak_demand_nactive_max(&peak_demand), nactive_max, "");
+}
+TEST_END
+
+int
+main(void) {
+	return test_no_reentrancy(
+	    test_peak_demand_init,
+	    test_peak_demand_update_basic,
+	    test_peak_demand_update_skip_epochs,
+	    test_peak_demand_update_rewrite_optimization,
+	    test_peak_demand_update_out_of_interval,
+	    test_peak_demand_update_static_epoch,
+	    test_peak_demand_update_epoch_advance);
+}

From 773b5809f9ab3f7c525badbe7587f8ab8ee20d41 Mon Sep 17 00:00:00 2001
From: Guangli Dai <gdai@meta.com>
Date: Wed, 5 Mar 2025 17:58:20 -0800
Subject: [PATCH 267/395] Fix frame pointer based unwinder to handle changing
 stack range

---
 include/jemalloc/internal/prof_sys.h |   2 +-
 src/prof_stack_range.c               | 281 ++++++++++++++-------------
 src/prof_sys.c                       | 117 ++++++++---
 3 files changed, 233 insertions(+), 167 deletions(-)

diff --git a/include/jemalloc/internal/prof_sys.h b/include/jemalloc/internal/prof_sys.h
index 3377ba92..42284b38 100644
--- a/include/jemalloc/internal/prof_sys.h
+++ b/include/jemalloc/internal/prof_sys.h
@@ -20,7 +20,7 @@ void prof_fdump_impl(tsd_t *tsd);
 void prof_idump_impl(tsd_t *tsd);
 bool prof_mdump_impl(tsd_t *tsd, const char *filename);
 void prof_gdump_impl(tsd_t *tsd);
-uintptr_t prof_thread_stack_start(uintptr_t stack_end);
+int prof_thread_stack_range(uintptr_t fp, uintptr_t *low, uintptr_t *high);
 
 /* Used in unit tests. */
 typedef int (prof_sys_thread_name_read_t)(char *buf, size_t limit);
diff --git a/src/prof_stack_range.c b/src/prof_stack_range.c
index 6a99b56f..f5e5c044 100644
--- a/src/prof_stack_range.c
+++ b/src/prof_stack_range.c
@@ -4,158 +4,163 @@
 #include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/prof_sys.h"
 
-#if defined (__linux__) && defined(JEMALLOC_HAVE_GETTID)
+#if defined(__linux__) && defined(JEMALLOC_HAVE_GETTID)
 
-#include <errno.h>
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h> // strtoul
-#include <string.h>
-#include <unistd.h>
+#    include <errno.h>
+#    include <fcntl.h>
+#    include <stdio.h>
+#    include <stdlib.h>  // strtoul
+#    include <string.h>
+#    include <unistd.h>
 
-static int prof_mapping_containing_addr(
-    uintptr_t addr,
-    const char* maps_path,
-    uintptr_t* mm_start,
-    uintptr_t* mm_end) {
-  int ret = ENOENT; // not found
-  *mm_start = *mm_end = 0;
-
-  // Each line of /proc/<pid>/maps is:
-  // <start>-<end> <perms> <offset> <dev> <inode> <pathname>
-  //
-  // The fields we care about are always within the first 34 characters so
-  // as long as `buf` contains the start of a mapping line it can always be
-  // parsed.
-  static const int kMappingFieldsWidth = 34;
-
-  int fd = -1;
-  char buf[4096];
-  ssize_t remaining = 0; // actual number of bytes read to buf
-  char* line = NULL;
-
-  while (1) {
-    if (fd < 0) {
-      // case 0: initial open of maps file
-      fd = malloc_open(maps_path, O_RDONLY);
-      if (fd < 0) {
-        return errno;
-      }
-
-      remaining = malloc_read_fd(fd, buf, sizeof(buf));
-      if (remaining <= 0) {
-        break;
-      }
-      line = buf;
-    } else if (line == NULL) {
-      // case 1: no newline found in buf
-      remaining = malloc_read_fd(fd, buf, sizeof(buf));
-      if (remaining <= 0) {
-        break;
-      }
-      line = memchr(buf, '\n', remaining);
-      if (line != NULL) {
-        line++; // advance to character after newline
-        remaining -= (line - buf);
-      }
-    } else if (line != NULL && remaining < kMappingFieldsWidth) {
-      // case 2: found newline but insufficient characters remaining in buf
-
-      // fd currently points to the character immediately after the last
-      // character in buf. Seek fd to the character after the newline.
-      if (malloc_lseek(fd, -remaining, SEEK_CUR) == -1) {
-        ret = errno;
-        break;
-      }
-
-      remaining = malloc_read_fd(fd, buf, sizeof(buf));
-      if (remaining <= 0) {
-        break;
-      }
-      line = buf;
-    } else {
-      // case 3: found newline and sufficient characters to parse
-
-      // parse <start>-<end>
-      char* tmp = line;
-      uintptr_t start_addr = strtoul(tmp, &tmp, 16);
-      if (addr >= start_addr) {
-        tmp++; // advance to character after '-'
-        uintptr_t end_addr = strtoul(tmp, &tmp, 16);
-        if (addr < end_addr) {
-          *mm_start = start_addr;
-          *mm_end = end_addr;
-          ret = 0;
-          break;
+/*
+ * Converts a string representing a hexadecimal number to an unsigned long long
+ * integer. Functionally equivalent to strtoull() (for base 16) but faster for
+ * that case.
+ *
+ * @param nptr Pointer to the string to be converted.
+ * @param endptr Pointer to a pointer to character, which will be set to the
+ * character in `nptr` where parsing stopped. Can be NULL.
+ * @return The converted unsigned long long integer value.
+ */
+static inline unsigned long long int
+strtoull_hex(const char *nptr, char **endptr) {
+    unsigned long long int val = 0;
+    int ii = 0;
+    for (; ii < 16; ++ii) {
+        char c = nptr[ii];
+        if (c >= '0' && c <= '9') {
+            val = (val << 4) + (c - '0');
+        } else if (c >= 'a' && c <= 'f') {
+            val = (val << 4) + (c - 'a' + 10);
+        } else {
+            break;
         }
-      }
-
-      // Advance to character after next newline in the current buf.
-      char* prev_line = line;
-      line = memchr(line, '\n', remaining);
-      if (line != NULL) {
-        line++; // advance to character after newline
-        remaining -= (line - prev_line);
-      }
     }
-  }
-
-  malloc_close(fd);
-  return ret;
+    if (endptr) {
+        *endptr = (char *)(nptr + ii);
+    }
+    return val;
 }
 
-static uintptr_t prof_main_thread_stack_start(const char* stat_path) {
-  uintptr_t stack_start = 0;
+static int
+prof_mapping_containing_addr(uintptr_t addr, const char *maps_path,
+  uintptr_t *mm_start, uintptr_t *mm_end) {
+    int ret = ENOENT; /* not found */
+    *mm_start = *mm_end = 0;
 
-  int fd = malloc_open(stat_path, O_RDONLY);
-  if (fd < 0) {
-    return 0;
-  }
+    /*
+     * Each line of /proc/<pid>/maps is:
+     * <start>-<end> <perms> <offset> <dev> <inode> <pathname>
+     *
+     * The fields we care about are always within the first 34 characters so
+     * as long as `buf` contains the start of a mapping line it can always be
+     * parsed.
+     */
+    static const int kMappingFieldsWidth = 34;
 
-  char buf[512];
-  ssize_t n = malloc_read_fd(fd, buf, sizeof(buf) - 1);
-  if (n >= 0) {
-    buf[n] = '\0';
-    if (sscanf(
-            buf,
-            "%*d (%*[^)]) %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %*u %*u %*d %*d %*d %*d %*d %*d %*u %*u %*d %*u %*u %*u %"FMTuPTR,
-            &stack_start) != 1) {
+    int fd = -1;
+    char buf[4096];
+    ssize_t remaining = 0; /* actual number of bytes read to buf */
+    char *line = NULL;
+
+    while (1) {
+        if (fd < 0) {
+            /* case 0: initial open of maps file */
+            fd = malloc_open(maps_path, O_RDONLY);
+            if (fd < 0) {
+                return errno;
+            }
+
+            remaining = malloc_read_fd(fd, buf, sizeof(buf));
+            if (remaining <= 0) {
+                ret = errno;
+                break;
+            }
+            line = buf;
+        } else if (line == NULL) {
+            /* case 1: no newline found in buf */
+            remaining = malloc_read_fd(fd, buf, sizeof(buf));
+            if (remaining <= 0) {
+                ret = errno;
+                break;
+            }
+            line = memchr(buf, '\n', remaining);
+            if (line != NULL) {
+                line++;  /* advance to character after newline */
+                remaining -= (line - buf);
+            }
+        } else if (line != NULL && remaining < kMappingFieldsWidth) {
+            /*
+             * case 2: found newline but insufficient characters remaining in
+             * buf
+             */
+            memcpy(buf, line,
+              remaining);  /* copy remaining characters to start of buf */
+            line = buf;
+
+            size_t count =
+              malloc_read_fd(fd, buf + remaining, sizeof(buf) - remaining);
+            if (count <= 0) {
+                ret = errno;
+                break;
+            }
+
+            remaining += count;  /* actual number of bytes read to buf */
+        } else {
+            /* case 3: found newline and sufficient characters to parse */
+
+            /* parse <start>-<end> */
+            char *tmp = line;
+            uintptr_t start_addr = (uintptr_t)strtoull_hex(tmp, &tmp);
+            if (addr >= start_addr) {
+                tmp++;  /* advance to character after '-' */
+                uintptr_t end_addr = (uintptr_t)strtoull_hex(tmp, NULL);
+                if (addr < end_addr) {
+                    *mm_start = start_addr;
+                    *mm_end = end_addr;
+                    ret = 0;
+                    break;
+                }
+            }
+
+            /* Advance to character after next newline in the current buf. */
+            char *prev_line = line;
+            line = memchr(line, '\n', remaining);
+            if (line != NULL) {
+                line++;  /* advance to character after newline */
+                remaining -= (line - prev_line);
+            }
+        }
     }
-  }
-  malloc_close(fd);
-  return stack_start;
+
+    malloc_close(fd);
+    return ret;
 }
 
-uintptr_t prof_thread_stack_start(uintptr_t stack_end) {
-  pid_t pid = getpid();
-  pid_t tid = gettid();
-  if (pid == tid) {
-    char stat_path[32]; // "/proc/<pid>/stat"
-    malloc_snprintf(stat_path, sizeof(stat_path), "/proc/%d/stat", pid);
-    return prof_main_thread_stack_start(stat_path);
-  } else {
-    // NOTE: Prior to kernel 4.5 an entry for every thread stack was included in
-    // /proc/<pid>/maps as [STACK:<tid>]. Starting with kernel 4.5 only the main
-    // thread stack remains as the [stack] mapping. For other thread stacks the
-    // mapping is still visible in /proc/<pid>/task/<tid>/maps (though not
-    // labeled as [STACK:tid]).
-    // https://lists.ubuntu.com/archives/kernel-team/2016-March/074681.html
-    char maps_path[64]; // "/proc/<pid>/task/<tid>/maps"
-    malloc_snprintf(maps_path, sizeof(maps_path), "/proc/%d/task/%d/maps", pid, tid);
-
-    uintptr_t mm_start, mm_end;
-    if (prof_mapping_containing_addr(
-            stack_end, maps_path, &mm_start, &mm_end) != 0) {
-      return 0;
-    }
-    return mm_end;
-  }
+int
+prof_thread_stack_range(uintptr_t fp, uintptr_t *low, uintptr_t *high) {
+    /*
+     * NOTE: Prior to kernel 4.5 an entry for every thread stack was included in
+     * /proc/<pid>/maps as [STACK:<tid>]. Starting with kernel 4.5 only the main
+     * thread stack remains as the [stack] mapping. For other thread stacks the
+     * mapping is still visible in /proc/<pid>/task/<tid>/maps (though not
+     * labeled as [STACK:tid]).
+     * https://lists.ubuntu.com/archives/kernel-team/2016-March/074681.html
+    */
+    char maps_path[64];  // "/proc/<pid>/task/<tid>/maps"
+    malloc_snprintf(maps_path, sizeof(maps_path), "/proc/%d/task/%d/maps",
+      getpid(), gettid());
+    return prof_mapping_containing_addr(fp, maps_path, low, high);
 }
 
 #else
 
-uintptr_t prof_thread_stack_start(UNUSED uintptr_t stack_end) {
-  return 0;
+int
+prof_thread_stack_range(
+  UNUSED uintptr_t addr, uintptr_t *stack_start, uintptr_t *stack_end) {
+    *stack_start = *stack_end = 0;
+    return ENOENT;
 }
 
-#endif // __linux__
+#endif  // __linux__
diff --git a/src/prof_sys.c b/src/prof_sys.c
index f0bc8b4b..642d8c89 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -23,6 +23,11 @@
 #define _Unwind_Backtrace JEMALLOC_TEST_HOOK(_Unwind_Backtrace, test_hooks_libc_hook)
 #endif
 
+#ifdef JEMALLOC_PROF_FRAME_POINTER
+// execinfo backtrace() as fallback unwinder
+#include <execinfo.h>
+#endif
+
 /******************************************************************************/
 
 malloc_mutex_t prof_dump_filename_mtx;
@@ -102,41 +107,97 @@ prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
 #elif (defined(JEMALLOC_PROF_FRAME_POINTER))
 JEMALLOC_DIAGNOSTIC_PUSH
 JEMALLOC_DIAGNOSTIC_IGNORE_FRAME_ADDRESS
+
+struct stack_range {
+	uintptr_t start;
+	uintptr_t end;
+};
+
+struct thread_unwind_info {
+	struct stack_range stack_range;
+	bool fallback;
+};
+static __thread struct thread_unwind_info unwind_info = {
+	.stack_range = {
+		.start = 0,
+		.end = 0,
+	},
+	.fallback = false,
+}; /* thread local */
+
 static void
 prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
-  // stack_start - highest possible valid stack address (assumption: stacks grow downward)
-  //   stack_end - current stack frame and lowest possible valid stack address
-  //               (all earlier frames will be at higher addresses than this)
+	/* fp: 		current stack frame pointer
+	 *
+	 * stack_range:	readable stack memory range for the current thread.
+	 *		Used to validate frame addresses during stack unwinding.
+	 *		For most threads there is a single valid stack range
+	 *		that is fixed at thread creation time.  This may not be
+	 *		the case when folly fibers or boost contexts are used.
+	 *		In those cases fall back to using execinfo backtrace()
+	 *		(DWARF unwind).
+	 */
 
-  // always safe to get the current stack frame address
-  void** stack_end = (void**)__builtin_frame_address(0);
-  if (stack_end == NULL) {
-    *len = 0;
-    return;
-  }
+	/* always safe to get the current stack frame address */
+	uintptr_t fp = (uintptr_t)__builtin_frame_address(0);
 
-  static __thread void **stack_start = (void **)0;  // thread local
-  if (stack_start == 0 || stack_end >= stack_start) {
-    stack_start = (void**)prof_thread_stack_start((uintptr_t)stack_end);
-  }
+	/* new thread - get the stack range */
+	if (!unwind_info.fallback &&
+	    unwind_info.stack_range.start == unwind_info.stack_range.end) {
+		if (prof_thread_stack_range(fp, &unwind_info.stack_range.start,
+		    &unwind_info.stack_range.end) != 0) {
+			unwind_info.fallback = true;
+		} else {
+			assert(fp >= unwind_info.stack_range.start
+			    && fp < unwind_info.stack_range.end);
+		}
+	}
 
-  if (stack_start == 0 || stack_end >= stack_start) {
-    *len = 0;
-    return;
-  }
+	if (unwind_info.fallback) {
+		goto label_fallback;
+	}
 
-  unsigned ii = 0;
-  void** fp = (void**)stack_end;
-  while (fp < stack_start && ii < max_len) {
-    vec[ii++] = fp[1];
-    void** fp_prev = fp;
-    fp = fp[0];
-    if (unlikely(fp <= fp_prev)) { // sanity check forward progress
-      break;
-    }
-  }
-  *len = ii;
+	unsigned ii = 0;
+	while (ii < max_len && fp != 0) {
+		if (fp < unwind_info.stack_range.start ||
+		    fp >= unwind_info.stack_range.end) {
+			/*
+			 * Determining the stack range from procfs can be
+			 * relatively expensive especially for programs with
+			 * many threads / shared libraries.  If the stack
+			 * range has changed, it is likely to change again
+			 * in the future (fibers or some other stack
+			 * manipulation).  So fall back to backtrace for this
+			 * thread.
+			 */
+			unwind_info.fallback = true;
+			goto label_fallback;
+		}
+		void* ip = ((void **)fp)[1];
+		if (ip == 0) {
+			break;
+		}
+		vec[ii++] = ip;
+		fp = ((uintptr_t *)fp)[0];
+	}
+	*len = ii;
+	return;
+
+label_fallback:
+	/*
+	 * Using the backtrace from execinfo.h here.  Note that it may get
+	 * redirected to libunwind when a libunwind not built with build-time
+	 * flag --disable-weak-backtrace is linked.
+	 */
+	assert(unwind_info.fallback);
+	int nframes = backtrace(vec, max_len);
+	if (nframes > 0) {
+		*len = nframes;
+	} else {
+		*len = 0;
+	}
 }
+
 JEMALLOC_DIAGNOSTIC_POP
 #elif (defined(JEMALLOC_PROF_GCC))
 JEMALLOC_DIAGNOSTIC_PUSH

From 81f35e0b55c52cb0c3e1171afd477e1cb66fafaf Mon Sep 17 00:00:00 2001
From: Guangli Dai <gdai@meta.com>
Date: Fri, 7 Mar 2025 11:08:03 -0800
Subject: [PATCH 268/395] Modify Travis tests to use frameptr when profiling

---
 .travis.yml           | 42 ++++++++++++++++++++++++++++++++++++++++++
 scripts/gen_travis.py |  8 +++++++-
 2 files changed, 49 insertions(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index a32755c6..387b36cf 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -61,6 +61,9 @@ jobs:
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
@@ -94,6 +97,9 @@ jobs:
     - os: linux
       arch: amd64
       env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+    - os: linux
+      arch: amd64
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
       env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
@@ -124,6 +130,9 @@ jobs:
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: amd64
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
@@ -151,6 +160,9 @@ jobs:
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-prof --enable-prof-frameptr --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
@@ -175,6 +187,9 @@ jobs:
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof --enable-prof-frameptr --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
@@ -196,6 +211,9 @@ jobs:
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --enable-prof --enable-prof-frameptr --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
@@ -214,6 +232,9 @@ jobs:
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --enable-prof --enable-prof-frameptr --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
@@ -229,6 +250,9 @@ jobs:
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --enable-prof --enable-prof-frameptr --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
@@ -241,6 +265,9 @@ jobs:
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --enable-prof --enable-prof-frameptr --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
@@ -253,6 +280,18 @@ jobs:
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr --with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr --with-malloc-conf=dss:primary --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr --with-malloc-conf=percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr --with-malloc-conf=background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,dss:primary --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
@@ -298,6 +337,9 @@ jobs:
     - os: linux
       arch: arm64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-lg-hugepage=29 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: arm64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: arm64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
index a49bb83b..6f8ee505 100755
--- a/scripts/gen_travis.py
+++ b/scripts/gen_travis.py
@@ -265,7 +265,13 @@ def generate_linux(arch):
     if arch != ARM64:
         exclude += [LARGE_HUGEPAGE]
 
-    return generate_jobs(os, arch, exclude, max_unusual_opts)
+    linux_configure_flags = list(configure_flag_unusuals)
+    linux_configure_flags.append(Option.as_configure_flag("--enable-prof --enable-prof-frameptr"))
+
+    linux_unusuals = (compilers_unusual + feature_unusuals
+                    + linux_configure_flags + malloc_conf_unusuals)
+
+    return generate_jobs(os, arch, exclude, max_unusual_opts, linux_unusuals)
 
 
 def generate_macos(arch):

From 86bbabac32775bdf414318e57e626febb9b6eac1 Mon Sep 17 00:00:00 2001
From: Audrey Dutcher <audrey@rhelmot.io>
Date: Thu, 6 Mar 2025 22:24:27 -0700
Subject: [PATCH 269/395] background_thread: add fallback for pthread_create
 dlsym

If jemalloc is linked into a shared library, the RTLD_NEXT dlsym call
may fail since RTLD_NEXT is only specified to search all objects after
the current one in the loading order, and the pthread library may be
earlier in the load order. Instead of failing immediately, attempt one
more time to find pthread_create via RTLD_GLOBAL.

Errors cascading from this were observed on FreeBSD 14.1.
---
 src/background_thread.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/background_thread.c b/src/background_thread.c
index 30c3111c..511febac 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -63,6 +63,9 @@ pthread_create_fptr_init(void) {
 	 */
 #ifdef JEMALLOC_HAVE_DLSYM
 	pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create");
+	if (pthread_create_fptr == NULL) {
+		pthread_create_fptr = dlsym(RTLD_DEFAULT, "pthread_create");
+	}
 #else
 	pthread_create_fptr = NULL;
 #endif

From e1a77ec5583702429fbe7c42e7ad37dfd5517cce Mon Sep 17 00:00:00 2001
From: Shirui Cheng <sherrycheng@meta.com>
Date: Wed, 5 Mar 2025 13:00:57 -0800
Subject: [PATCH 270/395] Support THP with Huge Arena in PAC

---
 include/jemalloc/internal/arena_externs.h     |  3 +
 .../internal/jemalloc_internal_externs.h      |  2 +
 include/jemalloc/internal/pac.h               | 25 +++++++
 src/arena.c                                   | 19 +++++-
 src/base.c                                    | 46 ++++++++++++-
 src/ctl.c                                     |  3 +
 src/extent.c                                  | 66 +++++++++++++++++--
 src/jemalloc.c                                |  1 +
 src/stats.c                                   |  1 +
 9 files changed, 158 insertions(+), 8 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index e915c97a..8dd5b015 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -30,6 +30,9 @@ extern emap_t arena_emap_global;
 extern size_t opt_oversize_threshold;
 extern size_t oversize_threshold;
 
+extern bool opt_huge_arena_pac_thp;
+extern pac_thp_t huge_arena_pac_thp;
+
 /*
  * arena_bin_offsets[binind] is the offset of the first bin shard for size class
  * binind.
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index 8c6df450..83a37baf 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -62,6 +62,8 @@ extern unsigned manual_arena_base;
  */
 extern atomic_p_t arenas[];
 
+extern unsigned huge_arena_ind;
+
 void *a0malloc(size_t size);
 void a0dalloc(void *ptr);
 void *bootstrap_malloc(size_t size);
diff --git a/include/jemalloc/internal/pac.h b/include/jemalloc/internal/pac.h
index 0b173a58..243e97f3 100644
--- a/include/jemalloc/internal/pac.h
+++ b/include/jemalloc/internal/pac.h
@@ -125,6 +125,31 @@ struct pac_s {
 	atomic_zu_t extent_sn_next;
 };
 
+typedef struct pac_thp_s pac_thp_t;
+struct pac_thp_s {
+	/*
+	 * opt_thp controls THP for user requested allocations. Settings
+	 * "always", "never" and "default" are available if THP is supported
+	 * by the OS and the default extent hooks are used:
+	 * - "always" and "never" are convered by pages_set_thp_state() in
+	 *   ehooks_default_alloc_impl().
+	 * - "default" makes no change for all the other auto arenas except
+	 *   the huge arena. For the huge arena, we might also look at
+	 *   opt_metadata_thp to decide whether to use THP or not.
+	 *   This is a temporary remedy before HPA is fully supported.
+	 */
+	bool thp_madvise;
+	/* Below fields are protected by the lock. */
+	malloc_mutex_t lock;
+	bool auto_thp_switched;
+	atomic_u_t n_thp_lazy;
+	/*
+	 * List that tracks HUGEPAGE aligned regions that're lazily hugified
+	 * in auto thp mode.
+	 */
+	edata_list_active_t thp_lazy_list;
+};
+
 bool pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
     edata_cache_t *edata_cache, nstime_t *cur_time, size_t oversize_threshold,
     ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms, pac_stats_t *pac_stats,
diff --git a/src/arena.c b/src/arena.c
index 54ecc403..84d4e14c 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -46,7 +46,15 @@ size_t oversize_threshold = OVERSIZE_THRESHOLD_DEFAULT;
 
 uint32_t arena_bin_offsets[SC_NBINS];
 
-static unsigned huge_arena_ind;
+/*
+ * a0 is used to handle huge requests before malloc init completes. After
+ * that,the huge_arena_ind is updated to point to the actual huge arena,
+ * which is the last one of the auto arenas.
+ */
+unsigned huge_arena_ind = 0;
+bool opt_huge_arena_pac_thp = false;
+pac_thp_t huge_arena_pac_thp = {.thp_madvise = false,
+    .auto_thp_switched = false, .n_thp_lazy = ATOMIC_INIT(0)};
 
 const arena_config_t arena_config_default = {
 	/* .extent_hooks = */ (extent_hooks_t *)&ehooks_default_extent_hooks,
@@ -1898,6 +1906,7 @@ arena_choose_huge(tsd_t *tsd) {
 bool
 arena_init_huge(arena_t *a0) {
 	bool huge_enabled;
+	assert(huge_arena_ind == 0);
 
 	/* The threshold should be large size class. */
 	if (opt_oversize_threshold > SC_LARGE_MAXCLASS ||
@@ -1908,10 +1917,18 @@ arena_init_huge(arena_t *a0) {
 	} else {
 		/* Reserve the index for the huge arena. */
 		huge_arena_ind = narenas_total_get();
+		assert(huge_arena_ind != 0);
 		oversize_threshold = opt_oversize_threshold;
 		/* a0 init happened before malloc_conf_init. */
 		atomic_store_zu(&a0->pa_shard.pac.oversize_threshold,
 		    oversize_threshold, ATOMIC_RELAXED);
+		/* Initialize huge arena THP settings for PAC. */
+		(&huge_arena_pac_thp)->thp_madvise = opt_huge_arena_pac_thp &&
+		    metadata_thp_enabled() && (opt_thp == thp_mode_default) &&
+		    (init_system_thp_mode == thp_mode_default);
+		malloc_mutex_init(&(&huge_arena_pac_thp)->lock, "pac_thp",
+		    WITNESS_RANK_LEAF, malloc_mutex_rank_exclusive);
+		edata_list_active_init(&(&huge_arena_pac_thp)->thp_lazy_list);
 		huge_enabled = true;
 	}
 
diff --git a/src/base.c b/src/base.c
index ac8598eb..13367697 100644
--- a/src/base.c
+++ b/src/base.c
@@ -55,9 +55,6 @@ base_map(tsdn_t *tsdn, ehooks_t *ehooks, unsigned ind, size_t size) {
 	}
 	if (ehooks_are_default(ehooks)) {
 		addr = extent_alloc_mmap(NULL, size, alignment, &zero, &commit);
-		if (have_madvise_huge && addr) {
-			pages_set_thp_state(addr, size);
-		}
 	} else {
 		addr = ehooks_alloc(tsdn, ehooks, NULL, size, alignment, &zero,
 		    &commit);
@@ -153,6 +150,40 @@ base_get_num_blocks(base_t *base, bool with_new_block) {
 	return n_blocks;
 }
 
+static void
+huge_arena_auto_thp_switch(tsdn_t *tsdn, pac_thp_t *pac_thp) {
+	assert(opt_huge_arena_pac_thp);
+	assert(!pac_thp->auto_thp_switched);
+
+	arena_t *huge_arena;
+	if (huge_arena_ind == 0 || (huge_arena = arena_get(tsdn, huge_arena_ind,
+	    false)) == NULL) {
+		/* Huge arena hasn't been init yet, simply turn the switch on. */
+		pac_thp->auto_thp_switched = true;
+		return;
+	}
+
+	assert(huge_arena != NULL);
+	edata_list_active_t *pending_list;
+	malloc_mutex_lock(tsdn, &pac_thp->lock);
+	pending_list = &pac_thp->thp_lazy_list;
+	pac_thp->auto_thp_switched = true;
+	malloc_mutex_unlock(tsdn, &pac_thp->lock);
+
+	unsigned cnt = 0;
+	edata_t *edata;
+	ql_foreach(edata, &pending_list->head, ql_link_active) {
+		assert(edata != NULL);
+		void *addr = edata_addr_get(edata);
+		size_t size = edata_size_get(edata);
+		assert(HUGEPAGE_ADDR2BASE(addr) == addr);
+		assert(HUGEPAGE_CEILING(size) == size && size != 0);
+		pages_huge(addr, size);
+		cnt++;
+	}
+	assert(cnt == atomic_load_u(&pac_thp->n_thp_lazy, ATOMIC_RELAXED));
+}
+
 static void
 base_auto_thp_switch(tsdn_t *tsdn, base_t *base) {
 	assert(opt_metadata_thp == metadata_thp_auto);
@@ -187,6 +218,15 @@ base_auto_thp_switch(tsdn_t *tsdn, base_t *base) {
 		block = block->next;
 		assert(block == NULL || (base_ind_get(base) == 0));
 	}
+
+	/* Handle the THP auto switch for the huge arena. */
+	if (!huge_arena_pac_thp.thp_madvise || base_ind_get(base) != 0) {
+		/* Only b0 metadata auto thp switch do the trigger. */
+		return;
+	}
+	malloc_mutex_unlock(tsdn, &base->mtx);
+	huge_arena_auto_thp_switch(tsdn, &huge_arena_pac_thp);
+	malloc_mutex_lock(tsdn, &base->mtx);
 }
 
 static void *
diff --git a/src/ctl.c b/src/ctl.c
index 2c941ae8..1d7eace6 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -113,6 +113,7 @@ CTL_PROTO(opt_hpa_sec_max_alloc)
 CTL_PROTO(opt_hpa_sec_max_bytes)
 CTL_PROTO(opt_hpa_sec_bytes_after_flush)
 CTL_PROTO(opt_hpa_sec_batch_fill_extra)
+CTL_PROTO(opt_huge_arena_pac_thp)
 CTL_PROTO(opt_metadata_thp)
 CTL_PROTO(opt_retain)
 CTL_PROTO(opt_dss)
@@ -498,6 +499,7 @@ static const ctl_named_node_t opt_node[] = {
 		CTL(opt_hpa_sec_bytes_after_flush)},
 	{NAME("hpa_sec_batch_fill_extra"),
 		CTL(opt_hpa_sec_batch_fill_extra)},
+	{NAME("huge_arena_pac_thp"), CTL(opt_huge_arena_pac_thp)},
 	{NAME("metadata_thp"),	CTL(opt_metadata_thp)},
 	{NAME("retain"),	CTL(opt_retain)},
 	{NAME("dss"),		CTL(opt_dss)},
@@ -2277,6 +2279,7 @@ CTL_RO_NL_GEN(opt_hpa_sec_bytes_after_flush, opt_hpa_sec_opts.bytes_after_flush,
 CTL_RO_NL_GEN(opt_hpa_sec_batch_fill_extra, opt_hpa_sec_opts.batch_fill_extra,
     size_t)
 
+CTL_RO_NL_GEN(opt_huge_arena_pac_thp, opt_huge_arena_pac_thp, bool)
 CTL_RO_NL_GEN(opt_metadata_thp, metadata_thp_mode_names[opt_metadata_thp],
     const char *)
 CTL_RO_NL_GEN(opt_retain, opt_retain, bool)
diff --git a/src/extent.c b/src/extent.c
index e61b7f9c..86b30f82 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -646,6 +646,55 @@ extent_recycle(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 	return edata;
 }
 
+static void
+extent_handle_huge_arena_thp(tsdn_t *tsdn, pac_thp_t *pac_thp,
+    edata_cache_t *edata_cache, void *addr, size_t size) {
+	assert(opt_huge_arena_pac_thp);
+	assert(opt_metadata_thp != metadata_thp_disabled);
+	/*
+	 * With rounding up the given memory region [addr, addr + size) to
+	 * the huge page region that it crosses boundaries with,
+	 * essentially we're aligning the start addr down and the end addr
+	 * up to the nearest HUGEPAGE boundaries. The memory overhead can
+	 * be within the range of [0, 2 * (HUGEPAGE - 1)].
+	 */
+	void *huge_addr = HUGEPAGE_ADDR2BASE(addr);
+	void *huge_end = HUGEPAGE_ADDR2BASE((void *)((byte_t *)addr +
+	    (uintptr_t)(size + HUGEPAGE - 1)));
+	assert((uintptr_t)huge_end > (uintptr_t)huge_addr);
+
+	size_t huge_size = (uintptr_t)huge_end - (uintptr_t)huge_addr;
+	assert(huge_size <= (size + ((HUGEPAGE - 1) << 1)) &&
+		    huge_size >= size);
+
+	if (opt_metadata_thp == metadata_thp_always ||
+	    pac_thp->auto_thp_switched) {
+		pages_huge(huge_addr, huge_size);
+	} else {
+		assert(opt_metadata_thp == metadata_thp_auto);
+		edata_t *edata = edata_cache_get(tsdn, edata_cache);
+
+		malloc_mutex_lock(tsdn, &pac_thp->lock);
+		/* Can happen if the switch is turned on during edata retrieval. */
+		if (pac_thp->auto_thp_switched) {
+			malloc_mutex_unlock(tsdn, &pac_thp->lock);
+			pages_huge(huge_addr, huge_size);
+			if (edata != NULL) {
+				edata_cache_put(tsdn, edata_cache, edata);
+			}
+		} else {
+			if (edata != NULL) {
+				edata_addr_set(edata, huge_addr);
+				edata_size_set(edata, huge_size);
+				edata_list_active_append(&pac_thp->thp_lazy_list, edata);
+				atomic_fetch_add_u(&pac_thp->n_thp_lazy, 1, ATOMIC_RELAXED);
+			}
+			malloc_mutex_unlock(tsdn, &pac_thp->lock);
+		}
+		malloc_mutex_assert_not_owner(tsdn, &pac_thp->lock);
+	}
+}
+
 /*
  * If virtual memory is retained, create increasingly larger extents from which
  * to split requested extents in order to limit the total number of disjoint
@@ -688,10 +737,10 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		goto label_err;
 	}
 
-	edata_init(edata, ecache_ind_get(&pac->ecache_retained), ptr,
-	    alloc_size, false, SC_NSIZES, extent_sn_next(pac),
-	    extent_state_active, zeroed, committed, EXTENT_PAI_PAC,
-	    EXTENT_IS_HEAD);
+	unsigned ind = ecache_ind_get(&pac->ecache_retained);
+	edata_init(edata, ind, ptr, alloc_size, false, SC_NSIZES,
+	    extent_sn_next(pac), extent_state_active, zeroed, committed,
+	    EXTENT_PAI_PAC, EXTENT_IS_HEAD);
 
 	if (extent_register_no_gdump_add(tsdn, pac, edata)) {
 		edata_cache_put(tsdn, pac->edata_cache, edata);
@@ -767,6 +816,15 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	exp_grow_size_commit(&pac->exp_grow, exp_grow_skip);
 	malloc_mutex_unlock(tsdn, &pac->grow_mtx);
 
+	if (huge_arena_pac_thp.thp_madvise) {
+		/* Avoid using HUGEPAGE when the grow size is less than HUGEPAGE. */
+		if (ind != 0 && ind == huge_arena_ind && ehooks_are_default(ehooks) &&
+		    likely(alloc_size >= HUGEPAGE)) {
+			extent_handle_huge_arena_thp(tsdn, &huge_arena_pac_thp,
+			    pac->edata_cache, ptr, alloc_size);
+		}
+	}
+
 	if (config_prof) {
 		/* Adjust gdump stats now that extent is final size. */
 		extent_gdump_add(tsdn, edata);
diff --git a/src/jemalloc.c b/src/jemalloc.c
index d08771f8..4939d954 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1240,6 +1240,7 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			CONF_HANDLE_BOOL(opt_abort_conf, "abort_conf")
 			CONF_HANDLE_BOOL(opt_cache_oblivious, "cache_oblivious")
 			CONF_HANDLE_BOOL(opt_trust_madvise, "trust_madvise")
+			CONF_HANDLE_BOOL(opt_huge_arena_pac_thp, "huge_arena_pac_thp")
 			if (strncmp("metadata_thp", k, klen) == 0) {
 				int m;
 				bool match = false;
diff --git a/src/stats.c b/src/stats.c
index bd0167fb..6e77977f 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1681,6 +1681,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_SIZE_T("hpa_sec_max_bytes")
 	OPT_WRITE_SIZE_T("hpa_sec_bytes_after_flush")
 	OPT_WRITE_SIZE_T("hpa_sec_batch_fill_extra")
+	OPT_WRITE_BOOL("huge_arena_pac_thp")
 	OPT_WRITE_CHAR_P("metadata_thp")
 	OPT_WRITE_INT64("mutex_max_spin")
 	OPT_WRITE_BOOL_MUTABLE("background_thread", "background_thread")

From a4defdb85434c2027c45c956f4d6d333997a1b50 Mon Sep 17 00:00:00 2001
From: Jay Lee <BusyJayLee@gmail.com>
Date: Fri, 7 Feb 2025 14:12:38 +0800
Subject: [PATCH 271/395] detect false failure of strerror_r

See tikv/jemallocator#108.

In a summary, test on `strerror_r` can fail due to reasons other
than `strerror_r` itself, so add an additional test to determine
the failure is expected.

Signed-off-by: Jay Lee <BusyJayLee@gmail.com>
---
 configure.ac | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/configure.ac b/configure.ac
index eb500db9..1c9c5067 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2835,9 +2835,19 @@ JE_COMPILABLE([strerror_r returns char with gnu source], [
   char *error = strerror_r(EINVAL, buffer, 100);
   printf("%s\n", error);
 ], [je_cv_strerror_r_returns_char_with_gnu_source])
+if test "x${je_cv_strerror_r_returns_char_with_gnu_source}" = "xno" ; then
+  JE_COMPILABLE([strerror_r header only], [
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+], [], [je_cv_strerror_r_header_pass])
+fi
 JE_CFLAGS_RESTORE()
 if test "x${je_cv_strerror_r_returns_char_with_gnu_source}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE], [ ], [ ])
+elif test "x${je_cv_strerror_r_header_pass}" = "xno" ; then
+  AC_MSG_ERROR([cannot determine return type of strerror_r])
 fi
 
 dnl ============================================================================

From 3688dfb5c3b7d94a12e18b753c0fc9c405b77b1f Mon Sep 17 00:00:00 2001
From: Shirui Cheng <sherrycheng@meta.com>
Date: Wed, 19 Mar 2025 17:26:33 -0700
Subject: [PATCH 272/395] fix assertion error in huge_arena_auto_thp_switch()
 when b0 is deleted in unit test

---
 include/jemalloc/internal/arena_externs.h |  2 +-
 src/arena.c                               |  9 +++++--
 src/base.c                                | 30 ++++++++++++++++-------
 src/jemalloc.c                            |  6 ++---
 4 files changed, 32 insertions(+), 15 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 8dd5b015..91fed258 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -106,7 +106,7 @@ unsigned arena_nthreads_get(arena_t *arena, bool internal);
 void arena_nthreads_inc(arena_t *arena, bool internal);
 void arena_nthreads_dec(arena_t *arena, bool internal);
 arena_t *arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config);
-bool arena_init_huge(arena_t *a0);
+bool arena_init_huge(tsdn_t *tsdn, arena_t *a0);
 arena_t *arena_choose_huge(tsd_t *tsd);
 bin_t *arena_bin_choose(tsdn_t *tsdn, arena_t *arena, szind_t binind,
     unsigned *binshard);
diff --git a/src/arena.c b/src/arena.c
index 84d4e14c..0a0c97ef 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1904,7 +1904,7 @@ arena_choose_huge(tsd_t *tsd) {
 }
 
 bool
-arena_init_huge(arena_t *a0) {
+arena_init_huge(tsdn_t *tsdn, arena_t *a0) {
 	bool huge_enabled;
 	assert(huge_arena_ind == 0);
 
@@ -1922,13 +1922,18 @@ arena_init_huge(arena_t *a0) {
 		/* a0 init happened before malloc_conf_init. */
 		atomic_store_zu(&a0->pa_shard.pac.oversize_threshold,
 		    oversize_threshold, ATOMIC_RELAXED);
-		/* Initialize huge arena THP settings for PAC. */
+		/* Initialize huge_arena_pac_thp fields. */
+		base_t *b0 = a0->base;
+		/* Make sure that b0 thp auto-switch won't happen concurrently here. */
+		malloc_mutex_lock(tsdn, &b0->mtx);
 		(&huge_arena_pac_thp)->thp_madvise = opt_huge_arena_pac_thp &&
 		    metadata_thp_enabled() && (opt_thp == thp_mode_default) &&
 		    (init_system_thp_mode == thp_mode_default);
+		(&huge_arena_pac_thp)->auto_thp_switched = b0->auto_thp_switched;
 		malloc_mutex_init(&(&huge_arena_pac_thp)->lock, "pac_thp",
 		    WITNESS_RANK_LEAF, malloc_mutex_rank_exclusive);
 		edata_list_active_init(&(&huge_arena_pac_thp)->thp_lazy_list);
+		malloc_mutex_unlock(tsdn, &b0->mtx);
 		huge_enabled = true;
 	}
 
diff --git a/src/base.c b/src/base.c
index 13367697..52f3d1d3 100644
--- a/src/base.c
+++ b/src/base.c
@@ -153,17 +153,19 @@ base_get_num_blocks(base_t *base, bool with_new_block) {
 static void
 huge_arena_auto_thp_switch(tsdn_t *tsdn, pac_thp_t *pac_thp) {
 	assert(opt_huge_arena_pac_thp);
-	assert(!pac_thp->auto_thp_switched);
-
-	arena_t *huge_arena;
-	if (huge_arena_ind == 0 || (huge_arena = arena_get(tsdn, huge_arena_ind,
-	    false)) == NULL) {
-		/* Huge arena hasn't been init yet, simply turn the switch on. */
-		pac_thp->auto_thp_switched = true;
+#ifdef JEMALLOC_JET
+	if (pac_thp->auto_thp_switched) {
 		return;
 	}
+#else
+	/*
+	 * The switch should be turned on only once when the b0 auto thp switch is
+	 * turned on, unless it's a unit test where b0 gets deleted and then
+	 * recreated.
+	 */
+	assert(!pac_thp->auto_thp_switched);
+#endif
 
-	assert(huge_arena != NULL);
 	edata_list_active_t *pending_list;
 	malloc_mutex_lock(tsdn, &pac_thp->lock);
 	pending_list = &pac_thp->thp_lazy_list;
@@ -221,9 +223,19 @@ base_auto_thp_switch(tsdn_t *tsdn, base_t *base) {
 
 	/* Handle the THP auto switch for the huge arena. */
 	if (!huge_arena_pac_thp.thp_madvise || base_ind_get(base) != 0) {
-		/* Only b0 metadata auto thp switch do the trigger. */
+		/*
+		 * The huge arena THP auto-switch is triggered only by b0 switch,
+		 * provided that the huge arena is initialized. If b0 switch is enabled
+		 * before huge arena is ready, the huge arena switch will be enabled
+		 * during huge_arena_pac_thp initialization.
+		 */
 		return;
 	}
+	/*
+	 * thp_madvise above is by default false and set in arena_init_huge() with
+	 * b0 mtx held. So if we reach here, it means the entire huge_arena_pac_thp
+	 * is initialized and we can safely switch the THP.
+	 */
 	malloc_mutex_unlock(tsdn, &base->mtx);
 	huge_arena_auto_thp_switch(tsdn, &huge_arena_pac_thp);
 	malloc_mutex_lock(tsdn, &base->mtx);
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 4939d954..d7b46d6c 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2097,7 +2097,7 @@ percpu_arena_as_initialized(percpu_arena_mode_t mode) {
 }
 
 static bool
-malloc_init_narenas(void) {
+malloc_init_narenas(tsdn_t *tsdn) {
 	assert(ncpus > 0);
 
 	if (opt_percpu_arena != percpu_arena_disabled) {
@@ -2164,7 +2164,7 @@ malloc_init_narenas(void) {
 		    narenas_auto);
 	}
 	narenas_total_set(narenas_auto);
-	if (arena_init_huge(a0)) {
+	if (arena_init_huge(tsdn, a0)) {
 		narenas_total_inc();
 	}
 	manual_arena_base = narenas_total_get();
@@ -2248,7 +2248,7 @@ malloc_init_hard(void) {
 	/* Set reentrancy level to 1 during init. */
 	pre_reentrancy(tsd, NULL);
 	/* Initialize narenas before prof_boot2 (for allocation). */
-	if (malloc_init_narenas()
+	if (malloc_init_narenas(tsd_tsdn(tsd))
 	    || background_thread_boot1(tsd_tsdn(tsd), b0get())) {
 		UNLOCK_RETURN(tsd_tsdn(tsd), true, true)
 	}

From 80e9001af33558c4ea991fcf5a715f3a7942a40e Mon Sep 17 00:00:00 2001
From: "Kaspar M. Rohrer" <kaspar.rohrer@gmail.com>
Date: Sat, 29 Mar 2025 23:51:20 +0100
Subject: [PATCH 273/395] Move `extern "C" specifications for C++ to where they
 are needed

This should fix errors when compiling C++ code with modules enabled on clang.
---
 include/jemalloc/jemalloc.sh            | 6 ------
 include/jemalloc/jemalloc_protos.h.in   | 8 ++++++++
 include/jemalloc/jemalloc_typedefs.h.in | 8 ++++++++
 3 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/include/jemalloc/jemalloc.sh b/include/jemalloc/jemalloc.sh
index dacd6195..9eaca266 100755
--- a/include/jemalloc/jemalloc.sh
+++ b/include/jemalloc/jemalloc.sh
@@ -6,9 +6,6 @@ cat <<EOF
 #ifndef JEMALLOC_H_
 #define JEMALLOC_H_
 #pragma GCC system_header
-#ifdef __cplusplus
-extern "C" {
-#endif
 
 EOF
 
@@ -21,8 +18,5 @@ for hdr in jemalloc_defs.h jemalloc_rename.h jemalloc_macros.h \
 done
 
 cat <<EOF
-#ifdef __cplusplus
-}
-#endif
 #endif /* JEMALLOC_H_ */
 EOF
diff --git a/include/jemalloc/jemalloc_protos.h.in b/include/jemalloc/jemalloc_protos.h.in
index e474930f..21e4dc57 100644
--- a/include/jemalloc/jemalloc_protos.h.in
+++ b/include/jemalloc/jemalloc_protos.h.in
@@ -3,6 +3,10 @@
  * of namespace management, and should be omitted in application code unless
  * JEMALLOC_NO_DEMANGLE is defined (see jemalloc_mangle@install_suffix@.h).
  */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 extern JEMALLOC_EXPORT const char	*@je_@malloc_conf;
 extern JEMALLOC_EXPORT const char	*@je_@malloc_conf_2_conf_harder;
 extern JEMALLOC_EXPORT void		(*@je_@malloc_message)(void *cbopaque,
@@ -79,3 +83,7 @@ JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
     void JEMALLOC_SYS_NOTHROW	*@je_@pvalloc(size_t size) JEMALLOC_CXX_THROW
     JEMALLOC_ATTR(malloc);
 #endif
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/include/jemalloc/jemalloc_typedefs.h.in b/include/jemalloc/jemalloc_typedefs.h.in
index 1a588743..793ee365 100644
--- a/include/jemalloc/jemalloc_typedefs.h.in
+++ b/include/jemalloc/jemalloc_typedefs.h.in
@@ -1,3 +1,7 @@
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 typedef struct extent_hooks_s extent_hooks_t;
 
 /*
@@ -75,3 +79,7 @@ struct extent_hooks_s {
 	extent_split_t		*split;
 	extent_merge_t		*merge;
 };
+
+#ifdef __cplusplus
+}
+#endif

From f19f49ef3ed34e1a74851f112677a9045a0b15f8 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Wed, 12 Mar 2025 12:26:52 -0700
Subject: [PATCH 274/395] if process_madvise is supported, call it when purging
 hpa

---
 Makefile.in                           |   1 +
 include/jemalloc/internal/hpa_hooks.h |   1 +
 src/hpa.c                             |  53 +++++-
 src/hpa_hooks.c                       |  16 +-
 test/unit/hpa.c                       |  58 +++++-
 test/unit/hpa_vectorized_madvise.c    | 258 ++++++++++++++++++++++++++
 test/unit/hpa_vectorized_madvise.sh   |   3 +
 7 files changed, 387 insertions(+), 3 deletions(-)
 create mode 100644 test/unit/hpa_vectorized_madvise.c
 create mode 100644 test/unit/hpa_vectorized_madvise.sh

diff --git a/Makefile.in b/Makefile.in
index b4102d0b..ee3399ec 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -231,6 +231,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/hash.c \
 	$(srcroot)test/unit/hook.c \
 	$(srcroot)test/unit/hpa.c \
+	$(srcroot)test/unit/hpa_vectorized_madvise.c \
 	$(srcroot)test/unit/hpa_background_thread.c \
 	$(srcroot)test/unit/hpdata.c \
 	$(srcroot)test/unit/huge.c \
diff --git a/include/jemalloc/internal/hpa_hooks.h b/include/jemalloc/internal/hpa_hooks.h
index b04b04f6..d0618f89 100644
--- a/include/jemalloc/internal/hpa_hooks.h
+++ b/include/jemalloc/internal/hpa_hooks.h
@@ -13,6 +13,7 @@ struct hpa_hooks_s {
 	void (*dehugify)(void *ptr, size_t size);
 	void (*curtime)(nstime_t *r_time, bool first_reading);
 	uint64_t (*ms_since)(nstime_t *r_time);
+	bool (*vectorized_purge)(void* vec, size_t vlen, size_t nbytes);
 };
 
 extern const hpa_hooks_t hpa_hooks_default;
diff --git a/src/hpa.c b/src/hpa.c
index c01dde13..adb106cc 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -8,6 +8,16 @@
 
 #define HPA_EDEN_SIZE (128 * HUGEPAGE)
 
+#define HPA_MIN_VAR_VEC_SIZE 8
+#ifdef JEMALLOC_HAVE_PROCESS_MADVISE
+typedef struct iovec hpa_io_vector_t;
+#else
+typedef struct {
+	void *iov_base;
+	size_t iov_len;
+} hpa_io_vector_t;
+#endif
+
 static edata_t *hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
     size_t alignment, bool zero, bool guarded, bool frequent_reuse,
     bool *deferred_work_generated);
@@ -422,6 +432,24 @@ hpa_shard_has_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard) {
 	return to_hugify != NULL || hpa_should_purge(tsdn, shard);
 }
 
+/* If we fail vectorized purge, we will do single */
+static void
+hpa_try_vectorized_purge(hpa_shard_t *shard, hpa_io_vector_t *vec,
+	size_t vlen, size_t nbytes) {
+	bool success = opt_process_madvise_max_batch > 0
+		&& !shard->central->hooks.vectorized_purge(vec, vlen, nbytes);
+	if (!success) {
+		/* On failure, it is safe to purge again (potential perf
+		 * penalty) If kernel can tell exactly which regions
+		 * failed, we could avoid that penalty.
+		 */
+		for (size_t i = 0; i < vlen; ++i) {
+			shard->central->hooks.purge(vec[i].iov_base,
+				vec[i].iov_len);
+		}
+	}
+}
+
 /* Returns whether or not we purged anything. */
 static bool
 hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
@@ -470,14 +498,37 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 	}
 	size_t total_purged = 0;
 	uint64_t purges_this_pass = 0;
+
+	assert(opt_process_madvise_max_batch <=
+		PROCESS_MADVISE_MAX_BATCH_LIMIT);
+	size_t len = opt_process_madvise_max_batch == 0 ?
+		HPA_MIN_VAR_VEC_SIZE : opt_process_madvise_max_batch;
+	VARIABLE_ARRAY(hpa_io_vector_t, vec, len);
+
 	void *purge_addr;
 	size_t purge_size;
+	size_t cur = 0;
+	size_t total_batch_bytes = 0;
 	while (hpdata_purge_next(to_purge, &purge_state, &purge_addr,
 	    &purge_size)) {
+		vec[cur].iov_base = purge_addr;
+		vec[cur].iov_len = purge_size;
 		total_purged += purge_size;
 		assert(total_purged <= HUGEPAGE);
 		purges_this_pass++;
-		shard->central->hooks.purge(purge_addr, purge_size);
+		total_batch_bytes += purge_size;
+		cur++;
+		if (cur == len) {
+			hpa_try_vectorized_purge(shard, vec, len, total_batch_bytes);
+			assert(total_batch_bytes > 0);
+			cur = 0;
+			total_batch_bytes = 0;
+		}
+	}
+
+	/* Batch was not full */
+	if (cur > 0) {
+		hpa_try_vectorized_purge(shard, vec, cur, total_batch_bytes);
 	}
 
 	malloc_mutex_lock(tsdn, &shard->mtx);
diff --git a/src/hpa_hooks.c b/src/hpa_hooks.c
index 4628c14f..072d490e 100644
--- a/src/hpa_hooks.c
+++ b/src/hpa_hooks.c
@@ -10,6 +10,8 @@ static bool hpa_hooks_hugify(void *ptr, size_t size, bool sync);
 static void hpa_hooks_dehugify(void *ptr, size_t size);
 static void hpa_hooks_curtime(nstime_t *r_nstime, bool first_reading);
 static uint64_t hpa_hooks_ms_since(nstime_t *past_nstime);
+static bool hpa_hooks_vectorized_purge(
+	void *vec, size_t vlen, size_t nbytes);
 
 const hpa_hooks_t hpa_hooks_default = {
 	&hpa_hooks_map,
@@ -18,7 +20,8 @@ const hpa_hooks_t hpa_hooks_default = {
 	&hpa_hooks_hugify,
 	&hpa_hooks_dehugify,
 	&hpa_hooks_curtime,
-	&hpa_hooks_ms_since
+	&hpa_hooks_ms_since,
+	&hpa_hooks_vectorized_purge
 };
 
 static void *
@@ -78,3 +81,14 @@ static uint64_t
 hpa_hooks_ms_since(nstime_t *past_nstime) {
 	return nstime_ms_since(past_nstime);
 }
+
+
+/* Return true if we did not purge all nbytes, or on some error */
+static bool
+hpa_hooks_vectorized_purge(void *vec, size_t vlen, size_t nbytes) {
+#ifdef JEMALLOC_HAVE_PROCESS_MADVISE
+    return pages_purge_process_madvise(vec, vlen, nbytes);
+#else
+    return true;
+#endif
+}
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index ceed9bd8..e53ee2ec 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -357,6 +357,16 @@ defer_test_purge(void *ptr, size_t size) {
 	++ndefer_purge_calls;
 }
 
+static bool defer_vectorized_purge_called = false;
+static bool
+defer_vectorized_purge(void *vec, size_t vlen, size_t nbytes) {
+	(void)vec;
+	(void)nbytes;
+	++ndefer_purge_calls;
+	defer_vectorized_purge_called = true;
+	return false;
+}
+
 static size_t ndefer_hugify_calls = 0;
 static bool
 defer_test_hugify(void *ptr, size_t size, bool sync) {
@@ -392,6 +402,7 @@ TEST_BEGIN(test_defer_time) {
 	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
 	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
 
 	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
 	opts.deferral_allowed = true;
@@ -506,6 +517,7 @@ TEST_BEGIN(test_no_min_purge_interval) {
 	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
 	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
 
 	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
 	opts.deferral_allowed = true;
@@ -548,6 +560,7 @@ TEST_BEGIN(test_min_purge_interval) {
 	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
 	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
 
 	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
 	opts.deferral_allowed = true;
@@ -598,6 +611,7 @@ TEST_BEGIN(test_purge) {
 	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
 	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
 
 	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
 	opts.deferral_allowed = true;
@@ -664,6 +678,7 @@ TEST_BEGIN(test_experimental_max_purge_nhp) {
 	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
 	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
 
 	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
 	opts.deferral_allowed = true;
@@ -732,6 +747,7 @@ TEST_BEGIN(test_demand_purge_slack) {
 	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
 	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
 
 	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
 	opts.deferral_allowed = true;
@@ -799,6 +815,7 @@ TEST_BEGIN(test_demand_purge_tight) {
 	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
 	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
 
 	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
 	opts.deferral_allowed = true;
@@ -855,6 +872,44 @@ TEST_BEGIN(test_demand_purge_tight) {
 }
 TEST_END
 
+TEST_BEGIN(test_vectorized_opt_eq_zero) {
+    test_skip_if(!hpa_supported() ||
+		(opt_process_madvise_max_batch != 0));
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
+	opts.deferral_allowed = true;
+	opts.min_purge_interval_ms = 0;
+
+	defer_vectorized_purge_called = false;
+	ndefer_purge_calls = 0;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+	bool deferred_work_generated = false;
+	nstime_init(&defer_curtime, 0);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	edata_t *edata = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		false, false, &deferred_work_generated);
+	expect_ptr_not_null(edata, "Unexpected null edata");
+	pai_dalloc(tsdn, &shard->pai, edata, &deferred_work_generated);
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	expect_false(defer_vectorized_purge_called, "No vec purge");
+	expect_zu_eq(1, ndefer_purge_calls, "Expect purge");
+
+	destroy_test_data(shard);
+}
+TEST_END
+
 int
 main(void) {
 	/*
@@ -880,5 +935,6 @@ main(void) {
 	    test_purge,
 	    test_experimental_max_purge_nhp,
 	    test_demand_purge_slack,
-	    test_demand_purge_tight);
+	    test_demand_purge_tight,
+	    test_vectorized_opt_eq_zero);
 }
diff --git a/test/unit/hpa_vectorized_madvise.c b/test/unit/hpa_vectorized_madvise.c
new file mode 100644
index 00000000..130dc699
--- /dev/null
+++ b/test/unit/hpa_vectorized_madvise.c
@@ -0,0 +1,258 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/hpa.h"
+#include "jemalloc/internal/nstime.h"
+
+#define SHARD_IND 111
+
+#define ALLOC_MAX (HUGEPAGE)
+
+typedef struct test_data_s test_data_t;
+struct test_data_s {
+	/*
+	 * Must be the first member -- we convert back and forth between the
+	 * test_data_t and the hpa_shard_t;
+	 */
+	hpa_shard_t shard;
+	hpa_central_t central;
+	base_t *base;
+	edata_cache_t shard_edata_cache;
+
+	emap_t emap;
+};
+
+static hpa_shard_opts_t test_hpa_shard_opts_default = {
+	/* slab_max_alloc */
+	ALLOC_MAX,
+	/* hugification_threshold */
+	HUGEPAGE,
+	/* dirty_mult */
+	FXP_INIT_PERCENT(25),
+	/* deferral_allowed */
+	false,
+	/* hugify_delay_ms */
+	10 * 1000,
+	/* hugify_sync */
+	false,
+	/* min_purge_interval_ms */
+	5 * 1000,
+	/* experimental_max_purge_nhp */
+	-1,
+	/* peak_demand_window_ms */
+	0
+};
+
+static hpa_shard_t *
+create_test_data(const hpa_hooks_t *hooks, hpa_shard_opts_t *opts) {
+	bool err;
+	base_t *base = base_new(TSDN_NULL, /* ind */ SHARD_IND,
+	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
+	assert_ptr_not_null(base, "");
+
+	test_data_t *test_data = malloc(sizeof(test_data_t));
+	assert_ptr_not_null(test_data, "");
+
+	test_data->base = base;
+
+	err = edata_cache_init(&test_data->shard_edata_cache, base);
+	assert_false(err, "");
+
+	err = emap_init(&test_data->emap, test_data->base, /* zeroed */ false);
+	assert_false(err, "");
+
+	err = hpa_central_init(&test_data->central, test_data->base, hooks);
+	assert_false(err, "");
+
+	err = hpa_shard_init(&test_data->shard, &test_data->central,
+	    &test_data->emap, test_data->base, &test_data->shard_edata_cache,
+	    SHARD_IND, opts);
+	assert_false(err, "");
+
+	return (hpa_shard_t *)test_data;
+}
+
+static void
+destroy_test_data(hpa_shard_t *shard) {
+	test_data_t *test_data = (test_data_t *)shard;
+	base_delete(TSDN_NULL, test_data->base);
+	free(test_data);
+}
+
+static uintptr_t defer_bump_ptr = HUGEPAGE * 123;
+static void *
+defer_test_map(size_t size) {
+	void *result = (void *)defer_bump_ptr;
+	defer_bump_ptr += size;
+	return result;
+}
+
+static void
+defer_test_unmap(void *ptr, size_t size) {
+	(void)ptr;
+	(void)size;
+}
+
+static size_t ndefer_purge_calls = 0;
+static void
+defer_test_purge(void *ptr, size_t size) {
+	(void)ptr;
+	(void)size;
+	++ndefer_purge_calls;
+}
+
+static size_t ndefer_vec_purge_calls = 0;
+static bool
+defer_vectorized_purge(void *vec, size_t vlen, size_t nbytes) {
+	(void)vec;
+	(void)nbytes;
+	++ndefer_vec_purge_calls;
+	return false;
+}
+
+static bool defer_vec_purge_didfail = false;
+static bool defer_vectorized_purge_fail(void *vec, size_t vlen, size_t nbytes) {
+	(void)vec;
+	(void)vlen;
+	(void)nbytes;
+	defer_vec_purge_didfail = true;
+	return true;
+}
+
+static size_t ndefer_hugify_calls = 0;
+static bool
+defer_test_hugify(void *ptr, size_t size, bool sync) {
+	++ndefer_hugify_calls;
+	return false;
+}
+
+static size_t ndefer_dehugify_calls = 0;
+static void
+defer_test_dehugify(void *ptr, size_t size) {
+	++ndefer_dehugify_calls;
+}
+
+static nstime_t defer_curtime;
+static void
+defer_test_curtime(nstime_t *r_time, bool first_reading) {
+	*r_time = defer_curtime;
+}
+
+static uint64_t
+defer_test_ms_since(nstime_t *past_time) {
+	return (nstime_ns(&defer_curtime) - nstime_ns(past_time)) / 1000 / 1000;
+}
+
+TEST_BEGIN(test_vectorized_failure_fallback) {
+	test_skip_if(!hpa_supported() ||
+		(opt_process_madvise_max_batch == 0));
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge_fail;
+	defer_vec_purge_didfail = false;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
+	opts.deferral_allowed = true;
+	opts.min_purge_interval_ms = 0;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+
+	bool deferred_work_generated = false;
+
+	nstime_init(&defer_curtime, 0);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+
+	edata_t *edata = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+	false, false, &deferred_work_generated);
+	expect_ptr_not_null(edata, "Unexpected null edata");
+	pai_dalloc(tsdn, &shard->pai, edata, &deferred_work_generated);
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	expect_true(defer_vec_purge_didfail, "Expect vec purge fail");
+	expect_zu_eq(1, ndefer_purge_calls, "Expect non-vec purge");
+	ndefer_purge_calls = 0;
+
+	destroy_test_data(shard);
+}
+TEST_END
+
+TEST_BEGIN(test_more_regions_purged_from_one_page) {
+	test_skip_if(!hpa_supported() ||
+		(opt_process_madvise_max_batch == 0) ||
+		HUGEPAGE_PAGES <= 4);
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
+	opts.deferral_allowed = true;
+	opts.min_purge_interval_ms = 0;
+	ndefer_vec_purge_calls = 0;
+	ndefer_purge_calls = 0;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+
+	bool deferred_work_generated = false;
+
+	nstime_init(&defer_curtime, 0);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+
+	enum {NALLOCS = 8 * HUGEPAGE_PAGES};
+	edata_t *edatas[NALLOCS];
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	/* Deallocate almost 3 pages out of 8, and to force batching
+	 * leave the 2nd and 4th PAGE in the first 3 hugepages.
+	 */
+	for (int i = 0; i < 3 * (int)HUGEPAGE_PAGES; i++) {
+		int j = i % HUGEPAGE_PAGES;
+		if (j != 1 && j != 3) {
+			pai_dalloc(tsdn, &shard->pai, edatas[i],
+			    &deferred_work_generated);
+		}
+	}
+
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	/*
+	 * Strict minimum purge interval is not set, we should purge as long as
+	 * we have dirty pages.
+	 */
+	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
+
+	/* We purge from 2 huge pages, each one 3 segments. That's 6 non
+	 * vectorized calls, or 2 <= vc <=6 vectorized calls
+	 * (depending on batch size).
+	 */
+	size_t nexpected = 2 * (1 + (3 - 1) / opt_process_madvise_max_batch);
+	expect_zu_eq(nexpected, ndefer_vec_purge_calls, "Expect purge");
+	expect_zu_eq(0, ndefer_purge_calls, "Expect no non-vec purge");
+	ndefer_vec_purge_calls = 0;
+
+	destroy_test_data(shard);
+}
+TEST_END
+
+int
+main(void) {
+	return test_no_reentrancy(
+	    test_vectorized_failure_fallback,
+	    test_more_regions_purged_from_one_page);
+}
diff --git a/test/unit/hpa_vectorized_madvise.sh b/test/unit/hpa_vectorized_madvise.sh
new file mode 100644
index 00000000..c5d66afa
--- /dev/null
+++ b/test/unit/hpa_vectorized_madvise.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+export MALLOC_CONF="process_madvise_max_batch:2"

From f81fb92a8984b767dae10dc54ef48d1d50e6e1de Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 14 Apr 2025 14:15:14 -0700
Subject: [PATCH 275/395] Remove Travis CI macOS configs (not supported
 anymore).

---
 .travis.yml           | 27 ---------------------------
 scripts/gen_travis.py |  3 ++-
 2 files changed, 2 insertions(+), 28 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 387b36cf..5a83d757 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -352,33 +352,6 @@ jobs:
     - os: linux
       arch: arm64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: osx
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
-    - os: osx
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
-    - os: osx
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
-    - os: osx
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
-    - os: osx
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
-    - os: osx
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
-    - os: osx
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
-    - os: osx
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
-    - os: osx
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
     # Development build
     - os: linux
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --disable-cache-oblivious --enable-stats --enable-log --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
index 6f8ee505..fa98f2a2 100755
--- a/scripts/gen_travis.py
+++ b/scripts/gen_travis.py
@@ -347,7 +347,8 @@ def main():
         # generate_linux(PPC64LE),
         generate_linux(ARM64),
 
-        generate_macos(AMD64),
+        # Starting April 1st, 2025, Travis no longer supports OSx/macOS builds
+        # generate_macos(AMD64),
 
         get_manual_jobs(),
     ))

From c20a63a765dcd22f6b91676ab03507dd9d7b3e2d Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@meta.com>
Date: Thu, 10 Apr 2025 15:07:20 -0700
Subject: [PATCH 276/395] Silence the uninitialized warning from clang.

---
 include/jemalloc/internal/jemalloc_internal_inlines_c.h | 2 +-
 include/jemalloc/internal/util.h                        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index c7ef9161..39c196a5 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -496,7 +496,7 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
         assert(tsd_fast(tsd) ||
             *tsd_thread_deallocated_next_event_fastp_get_unsafe(tsd) == 0);
 
-        emap_alloc_ctx_t alloc_ctx;
+        emap_alloc_ctx_t alloc_ctx JEMALLOC_CC_SILENCE_INIT({0, 0, false});
 	size_t usize;
         if (!size_hint) {
                 bool err = emap_alloc_ctx_try_lookup_fast(tsd,
diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h
index 6646386e..35aa26e6 100644
--- a/include/jemalloc/internal/util.h
+++ b/include/jemalloc/internal/util.h
@@ -29,7 +29,7 @@
  * wherever the compiler fails to recognize that the variable is never used
  * uninitialized.
  */
-#define JEMALLOC_CC_SILENCE_INIT(v) = v
+#define JEMALLOC_CC_SILENCE_INIT(...) = __VA_ARGS__
 
 #ifdef __GNUC__
 #  define likely(x)   __builtin_expect(!!(x), 1)

From c23a6bfdf6eed78dbe9c2b39a3798d091843a957 Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@meta.com>
Date: Thu, 10 Apr 2025 15:12:52 -0700
Subject: [PATCH 277/395] Add opt.limit_usize_gap to stats

---
 src/stats.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/stats.c b/src/stats.c
index 6e77977f..db9b9f43 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1730,6 +1730,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_CHAR_P("stats_interval_opts")
 	OPT_WRITE_CHAR_P("zero_realloc")
 	OPT_WRITE_SIZE_T("process_madvise_max_batch")
+	OPT_WRITE_BOOL("limit_usize_gap")
 
 	emitter_dict_end(emitter); /* Close "opt". */
 

From a3910b9802d066a72707d9d77bc981d05b74d761 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 24 Apr 2025 20:21:53 -0700
Subject: [PATCH 278/395] Avoid forced purging during thread-arena migration
 when bg thd is on.

---
 src/jemalloc.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index d7b46d6c..9451df77 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -494,8 +494,12 @@ arena_migrate(tsd_t *tsd, arena_t *oldarena, arena_t *newarena) {
 	arena_nthreads_inc(newarena, false);
 	tsd_arena_set(tsd, newarena);
 
-	if (arena_nthreads_get(oldarena, false) == 0) {
-		/* Purge if the old arena has no associated threads anymore. */
+	if (arena_nthreads_get(oldarena, false) == 0 &&
+	    !background_thread_enabled()) {
+		/*
+		 * Purge if the old arena has no associated threads anymore and
+		 * no background threads.
+		 */
 		arena_decay(tsd_tsdn(tsd), oldarena,
 		    /* is_background_thread */ false, /* all */ true);
 	}

From cfa90dfd80c4b3ca2b2678fb55cfc718bd9f42c6 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Tue, 8 Apr 2025 09:51:53 -0700
Subject: [PATCH 279/395] Refactor hpa purging to prepare for vectorized call
 across multiple pages

---
 include/jemalloc/internal/hpa_utils.h | 82 +++++++++++++++++++++++++++
 src/hpa.c                             | 63 +++++---------------
 2 files changed, 97 insertions(+), 48 deletions(-)
 create mode 100644 include/jemalloc/internal/hpa_utils.h

diff --git a/include/jemalloc/internal/hpa_utils.h b/include/jemalloc/internal/hpa_utils.h
new file mode 100644
index 00000000..035d3b21
--- /dev/null
+++ b/include/jemalloc/internal/hpa_utils.h
@@ -0,0 +1,82 @@
+#ifndef JEMALLOC_INTERNAL_HPA_UTILS_H
+#define JEMALLOC_INTERNAL_HPA_UTILS_H
+
+#include "jemalloc/internal/hpa.h"
+
+#define HPA_MIN_VAR_VEC_SIZE 8
+#ifdef JEMALLOC_HAVE_PROCESS_MADVISE
+typedef struct iovec hpa_io_vector_t;
+#else
+typedef struct {
+    void *iov_base;
+    size_t iov_len;
+} hpa_io_vector_t;
+#endif
+
+/* Actually invoke hooks. If we fail vectorized, use single purges */
+static void
+hpa_try_vectorized_purge(
+  hpa_shard_t *shard, hpa_io_vector_t *vec, size_t vlen, size_t nbytes) {
+    bool success = opt_process_madvise_max_batch > 0
+      && !shard->central->hooks.vectorized_purge(vec, vlen, nbytes);
+    if (!success) {
+        /* On failure, it is safe to purge again (potential perf
+         * penalty) If kernel can tell exactly which regions
+         * failed, we could avoid that penalty.
+         */
+        for (size_t i = 0; i < vlen; ++i) {
+            shard->central->hooks.purge(vec[i].iov_base, vec[i].iov_len);
+        }
+    }
+}
+
+/*
+ * This struct accumulates the regions for process_madvise.
+ * It invokes the hook when batch limit is reached
+ */
+typedef struct {
+    hpa_io_vector_t *vp;
+    size_t cur;
+    size_t total_bytes;
+    size_t capacity;
+} hpa_range_accum_t;
+
+static inline void
+hpa_range_accum_init(hpa_range_accum_t *ra, hpa_io_vector_t *v, size_t sz) {
+    ra->vp = v;
+    ra->capacity = sz;
+    ra->total_bytes = 0;
+    ra->cur = 0;
+}
+
+static inline void
+hpa_range_accum_flush(hpa_range_accum_t *ra, hpa_shard_t *shard) {
+    assert(ra->total_bytes > 0 && ra->cur > 0);
+    hpa_try_vectorized_purge(shard, ra->vp, ra->cur, ra->total_bytes);
+    ra->cur = 0;
+    ra->total_bytes = 0;
+}
+
+static inline void
+hpa_range_accum_add(
+  hpa_range_accum_t *ra, void *addr, size_t sz, hpa_shard_t *shard) {
+    assert(ra->cur < ra->capacity);
+
+    ra->vp[ra->cur].iov_base = addr;
+    ra->vp[ra->cur].iov_len = sz;
+    ra->total_bytes += sz;
+    ra->cur++;
+
+    if (ra->cur == ra->capacity) {
+        hpa_range_accum_flush(ra, shard);
+    }
+}
+
+static inline void
+hpa_range_accum_finish(hpa_range_accum_t *ra, hpa_shard_t *shard) {
+    if (ra->cur > 0) {
+        hpa_range_accum_flush(ra, shard);
+    }
+}
+
+#endif /* JEMALLOC_INTERNAL_HPA_UTILS_H */
diff --git a/src/hpa.c b/src/hpa.c
index adb106cc..c6771352 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -2,22 +2,13 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/hpa.h"
+#include "jemalloc/internal/hpa_utils.h"
 
 #include "jemalloc/internal/fb.h"
 #include "jemalloc/internal/witness.h"
 
 #define HPA_EDEN_SIZE (128 * HUGEPAGE)
 
-#define HPA_MIN_VAR_VEC_SIZE 8
-#ifdef JEMALLOC_HAVE_PROCESS_MADVISE
-typedef struct iovec hpa_io_vector_t;
-#else
-typedef struct {
-	void *iov_base;
-	size_t iov_len;
-} hpa_io_vector_t;
-#endif
-
 static edata_t *hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
     size_t alignment, bool zero, bool guarded, bool frequent_reuse,
     bool *deferred_work_generated);
@@ -432,22 +423,12 @@ hpa_shard_has_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard) {
 	return to_hugify != NULL || hpa_should_purge(tsdn, shard);
 }
 
-/* If we fail vectorized purge, we will do single */
-static void
-hpa_try_vectorized_purge(hpa_shard_t *shard, hpa_io_vector_t *vec,
-	size_t vlen, size_t nbytes) {
-	bool success = opt_process_madvise_max_batch > 0
-		&& !shard->central->hooks.vectorized_purge(vec, vlen, nbytes);
-	if (!success) {
-		/* On failure, it is safe to purge again (potential perf
-		 * penalty) If kernel can tell exactly which regions
-		 * failed, we could avoid that penalty.
-		 */
-		for (size_t i = 0; i < vlen; ++i) {
-			shard->central->hooks.purge(vec[i].iov_base,
-				vec[i].iov_len);
-		}
-	}
+static inline size_t
+hpa_process_madvise_max_iovec_len(void) {
+	assert(opt_process_madvise_max_batch <=
+		PROCESS_MADVISE_MAX_BATCH_LIMIT);
+	return opt_process_madvise_max_batch == 0 ?
+		HPA_MIN_VAR_VEC_SIZE : opt_process_madvise_max_batch;
 }
 
 /* Returns whether or not we purged anything. */
@@ -498,38 +479,24 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 	}
 	size_t total_purged = 0;
 	uint64_t purges_this_pass = 0;
-
-	assert(opt_process_madvise_max_batch <=
-		PROCESS_MADVISE_MAX_BATCH_LIMIT);
-	size_t len = opt_process_madvise_max_batch == 0 ?
-		HPA_MIN_VAR_VEC_SIZE : opt_process_madvise_max_batch;
+	
+	size_t len = hpa_process_madvise_max_iovec_len();
 	VARIABLE_ARRAY(hpa_io_vector_t, vec, len);
 
+	hpa_range_accum_t accum;
+	hpa_range_accum_init(&accum, vec, len);
+
 	void *purge_addr;
 	size_t purge_size;
-	size_t cur = 0;
-	size_t total_batch_bytes = 0;
 	while (hpdata_purge_next(to_purge, &purge_state, &purge_addr,
 	    &purge_size)) {
-		vec[cur].iov_base = purge_addr;
-		vec[cur].iov_len = purge_size;
 		total_purged += purge_size;
 		assert(total_purged <= HUGEPAGE);
+		hpa_range_accum_add(&accum, purge_addr, purge_size, shard);
 		purges_this_pass++;
-		total_batch_bytes += purge_size;
-		cur++;
-		if (cur == len) {
-			hpa_try_vectorized_purge(shard, vec, len, total_batch_bytes);
-			assert(total_batch_bytes > 0);
-			cur = 0;
-			total_batch_bytes = 0;
-		}
-	}
-
-	/* Batch was not full */
-	if (cur > 0) {
-		hpa_try_vectorized_purge(shard, vec, cur, total_batch_bytes);
 	}
+	/* If batch was not full, finish */
+	hpa_range_accum_finish(&accum, shard);
 
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	/* The shard updates */

From 0dfb4a5a1a83f0968f8499c101dc98586a582546 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Tue, 8 Apr 2025 10:49:05 -0700
Subject: [PATCH 280/395] Add output argument to hpa_purge_begin to count dirty
 ranges

---
 include/jemalloc/internal/hpdata.h |  6 ++++--
 src/hpa.c                          |  4 +++-
 src/hpdata.c                       |  7 ++++++-
 test/unit/hpdata.c                 | 15 +++++++++++----
 test/unit/psset.c                  |  4 +++-
 5 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index a8a845ec..a8a4a552 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -395,9 +395,11 @@ struct hpdata_purge_state_s {
  * until you're done, and then end.  Allocating out of an hpdata undergoing
  * purging is not allowed.
  *
- * Returns the number of dirty pages that will be purged.
+ * Returns the number of dirty pages that will be purged and sets nranges
+ * to number of ranges with dirty pages that will be purged.
  */
-size_t hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state);
+size_t hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
+    size_t *nranges);
 
 /*
  * If there are more extents to purge, sets *r_purge_addr and *r_purge_size to
diff --git a/src/hpa.c b/src/hpa.c
index c6771352..afcfbe7f 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -465,8 +465,10 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 
 	/* Gather all the metadata we'll need during the purge. */
 	bool dehugify = hpdata_huge_get(to_purge);
+	size_t nranges;
 	hpdata_purge_state_t purge_state;
-	size_t num_to_purge = hpdata_purge_begin(to_purge, &purge_state);
+	size_t num_to_purge = hpdata_purge_begin(to_purge, &purge_state, &nranges);
+	(void) nranges; /*not used yet */
 
 	shard->npending_purge += num_to_purge;
 
diff --git a/src/hpdata.c b/src/hpdata.c
index 3058eafe..f3e347c4 100644
--- a/src/hpdata.c
+++ b/src/hpdata.c
@@ -164,7 +164,8 @@ hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz) {
 }
 
 size_t
-hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
+hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
+	size_t *nranges) {
 	hpdata_assert_consistent(hpdata);
 	/*
 	 * See the comment below; we might purge any inactive extent, so it's
@@ -216,6 +217,7 @@ hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
 
 	fb_init(purge_state->to_purge, HUGEPAGE_PAGES);
 	size_t next_bit = 0;
+	*nranges = 0;
 	while (next_bit < HUGEPAGE_PAGES) {
 		size_t next_dirty = fb_ffs(dirty_pages, HUGEPAGE_PAGES,
 		    next_bit);
@@ -239,6 +241,7 @@ hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
 
 		fb_set_range(purge_state->to_purge, HUGEPAGE_PAGES, next_dirty,
 		    last_dirty - next_dirty + 1);
+		(*nranges)++;
 		next_bit = next_active + 1;
 	}
 
@@ -249,6 +252,8 @@ hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
 	    purge_state->to_purge, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES));
 	assert(ndirty == fb_scount(dirty_pages, HUGEPAGE_PAGES, 0,
 	    HUGEPAGE_PAGES));
+	assert(*nranges <= ndirty);
+	assert(ndirty == 0 || *nranges > 0);
 
 	hpdata_assert_consistent(hpdata);
 
diff --git a/test/unit/hpdata.c b/test/unit/hpdata.c
index 288e71d4..995ab77b 100644
--- a/test/unit/hpdata.c
+++ b/test/unit/hpdata.c
@@ -69,8 +69,10 @@ TEST_BEGIN(test_purge_simple) {
 
 	hpdata_alloc_allowed_set(&hpdata, false);
 	hpdata_purge_state_t purge_state;
-	size_t to_purge = hpdata_purge_begin(&hpdata, &purge_state);
+	size_t nranges;
+	size_t to_purge = hpdata_purge_begin(&hpdata, &purge_state, &nranges);
 	expect_zu_eq(HUGEPAGE_PAGES / 4, to_purge, "");
+	expect_zu_eq(1, nranges, "All dirty pages in a single range");
 
 	void *purge_addr;
 	size_t purge_size;
@@ -113,8 +115,10 @@ TEST_BEGIN(test_purge_intervening_dalloc) {
 
 	hpdata_alloc_allowed_set(&hpdata, false);
 	hpdata_purge_state_t purge_state;
-	size_t to_purge = hpdata_purge_begin(&hpdata, &purge_state);
+	size_t nranges;
+	size_t to_purge = hpdata_purge_begin(&hpdata, &purge_state, &nranges);
 	expect_zu_eq(HUGEPAGE_PAGES / 2, to_purge, "");
+	expect_zu_eq(2, nranges, "First quarter and last half");
 
 	void *purge_addr;
 	size_t purge_size;
@@ -171,8 +175,10 @@ TEST_BEGIN(test_purge_over_retained) {
 	/* Purge the second quarter. */
 	hpdata_alloc_allowed_set(&hpdata, false);
 	hpdata_purge_state_t purge_state;
-	size_t to_purge_dirty = hpdata_purge_begin(&hpdata, &purge_state);
+	size_t nranges;
+	size_t to_purge_dirty = hpdata_purge_begin(&hpdata, &purge_state, &nranges);
 	expect_zu_eq(HUGEPAGE_PAGES / 4, to_purge_dirty, "");
+	expect_zu_eq(1, nranges, "Second quarter only");
 
 	bool got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
 	    &purge_size);
@@ -199,8 +205,9 @@ TEST_BEGIN(test_purge_over_retained) {
 	 * re-purge it.  We expect a single purge of 3/4 of the hugepage,
 	 * purging half its pages.
 	 */
-	to_purge_dirty = hpdata_purge_begin(&hpdata, &purge_state);
+	to_purge_dirty = hpdata_purge_begin(&hpdata, &purge_state, &nranges);
 	expect_zu_eq(HUGEPAGE_PAGES / 2, to_purge_dirty, "");
+	expect_zu_eq(1, nranges, "Single range expected");
 
 	got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
 	    &purge_size);
diff --git a/test/unit/psset.c b/test/unit/psset.c
index c400f3b9..b15d9af3 100644
--- a/test/unit/psset.c
+++ b/test/unit/psset.c
@@ -19,7 +19,9 @@ static void
 test_psset_fake_purge(hpdata_t *ps) {
 	hpdata_purge_state_t purge_state;
 	hpdata_alloc_allowed_set(ps, false);
-	hpdata_purge_begin(ps, &purge_state);
+	size_t nranges;
+	hpdata_purge_begin(ps, &purge_state, &nranges);
+	(void) nranges;
 	void *addr;
 	size_t size;
 	while (hpdata_purge_next(ps, &purge_state, &addr, &size)) {

From 1956a54a434ec365fad22d7497d86495b0c31883 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Sat, 5 Apr 2025 12:14:14 -0700
Subject: [PATCH 281/395] [process_madvise] Use process_madvise across multiple
 huge_pages

---
 Makefile.in                                   |   1 +
 include/jemalloc/internal/hpa_utils.h         |  34 +++
 src/extent.c                                  |   1 +
 src/hpa.c                                     | 240 ++++++++++++------
 test/unit/hpa_vectorized_madvise.c            |  82 +++++-
 .../unit/hpa_vectorized_madvise_large_batch.c | 199 +++++++++++++++
 .../hpa_vectorized_madvise_large_batch.sh     |   3 +
 7 files changed, 482 insertions(+), 78 deletions(-)
 create mode 100644 test/unit/hpa_vectorized_madvise_large_batch.c
 create mode 100644 test/unit/hpa_vectorized_madvise_large_batch.sh

diff --git a/Makefile.in b/Makefile.in
index ee3399ec..ac8c51ff 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -232,6 +232,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/hook.c \
 	$(srcroot)test/unit/hpa.c \
 	$(srcroot)test/unit/hpa_vectorized_madvise.c \
+	$(srcroot)test/unit/hpa_vectorized_madvise_large_batch.c \
 	$(srcroot)test/unit/hpa_background_thread.c \
 	$(srcroot)test/unit/hpdata.c \
 	$(srcroot)test/unit/huge.c \
diff --git a/include/jemalloc/internal/hpa_utils.h b/include/jemalloc/internal/hpa_utils.h
index 035d3b21..283510b9 100644
--- a/include/jemalloc/internal/hpa_utils.h
+++ b/include/jemalloc/internal/hpa_utils.h
@@ -79,4 +79,38 @@ hpa_range_accum_finish(hpa_range_accum_t *ra, hpa_shard_t *shard) {
     }
 }
 
+/*
+ * For purging more than one page we use batch of these items
+ */
+typedef struct {
+	hpdata_purge_state_t state;
+	hpdata_t *hp;
+	bool dehugify;
+} hpa_purge_item_t;
+
+typedef struct hpa_purge_batch_s hpa_purge_batch_t;
+struct hpa_purge_batch_s {
+	hpa_purge_item_t *items;
+	size_t items_capacity;
+	/* Number of huge pages to purge in current batch */
+	size_t item_cnt;
+	/* Number of ranges to purge in current batch */
+	size_t nranges;
+	/* Total number of dirty pages in current batch*/
+	size_t ndirty_in_batch;
+
+	/* Max number of huge pages to purge */
+	size_t max_hp;
+	/*
+	 * Once we are above this watermark we should not add more pages
+	 * to the same batch. This is because while we want to minimize
+	 * number of madvise calls we also do not want to be preventing
+	 * allocations from too many huge pages (which we have to do
+	 * while they are being purged)
+	 */
+	size_t range_watermark;
+
+	size_t npurged_hp_total;
+};
+
 #endif /* JEMALLOC_INTERNAL_HPA_UTILS_H */
diff --git a/src/extent.c b/src/extent.c
index 86b30f82..3425e1ce 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -12,6 +12,7 @@
 /* Data. */
 
 size_t opt_lg_extent_max_active_fit = LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT;
+/* This option is intended for kernel tuning, not app tuning. */
 size_t opt_process_madvise_max_batch =
 #ifdef JEMALLOC_HAVE_PROCESS_MADVISE
     PROCESS_MADVISE_MAX_BATCH_DEFAULT;
diff --git a/src/hpa.c b/src/hpa.c
index afcfbe7f..50614e42 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -423,6 +423,31 @@ hpa_shard_has_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard) {
 	return to_hugify != NULL || hpa_should_purge(tsdn, shard);
 }
 
+/*
+ * This is used for jemalloc internal tuning and may change in the
+ * future based on production traffic.
+ *
+ * This value protects two things:
+ *    1. Stack size
+ *    2. Number of huge pages that are being purged in a batch as
+ *       we do not allow allocations while making *madvise
+ *       syscall.
+ */
+#define HPA_PURGE_BATCH_MAX_DEFAULT 16
+
+#ifndef JEMALLOC_JET
+#define HPA_PURGE_BATCH_MAX HPA_PURGE_BATCH_MAX_DEFAULT
+#else
+size_t hpa_purge_max_batch_size_for_test = HPA_PURGE_BATCH_MAX_DEFAULT;
+size_t
+hpa_purge_max_batch_size_for_test_set(size_t new_size) {
+	size_t old_size = hpa_purge_max_batch_size_for_test;
+	hpa_purge_max_batch_size_for_test = new_size;
+	return old_size;
+}
+#define HPA_PURGE_BATCH_MAX hpa_purge_max_batch_size_for_test
+#endif
+
 static inline size_t
 hpa_process_madvise_max_iovec_len(void) {
 	assert(opt_process_madvise_max_batch <=
@@ -431,14 +456,48 @@ hpa_process_madvise_max_iovec_len(void) {
 		HPA_MIN_VAR_VEC_SIZE : opt_process_madvise_max_batch;
 }
 
-/* Returns whether or not we purged anything. */
-static bool
-hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
-	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+static inline void
+hpa_purge_actual_unlocked(hpa_shard_t *shard, hpa_purge_item_t *batch,
+	size_t batch_sz) {
+	assert(batch_sz > 0);
 
-	hpdata_t *to_purge = psset_pick_purge(&shard->psset);
+	size_t len = hpa_process_madvise_max_iovec_len();
+	VARIABLE_ARRAY(hpa_io_vector_t, vec, len);
+
+	hpa_range_accum_t accum;
+	hpa_range_accum_init(&accum, vec, len);
+
+	for (size_t i = 0; i < batch_sz; ++i) {
+		hpdata_t *to_purge = batch[i].hp;
+
+		/* Actually do the purging, now that the lock is dropped. */
+		if (batch[i].dehugify) {
+			shard->central->hooks.dehugify(hpdata_addr_get(to_purge),
+		    	HUGEPAGE);
+		}
+		void *purge_addr;
+		size_t purge_size;
+		size_t total_purged_on_one_hp = 0;
+		while (hpdata_purge_next(
+				to_purge, &batch[i].state, &purge_addr, &purge_size)) {
+			total_purged_on_one_hp += purge_size;
+			assert(total_purged_on_one_hp <= HUGEPAGE);
+			hpa_range_accum_add(&accum, purge_addr, purge_size, shard);
+		}
+	}
+	hpa_range_accum_finish(&accum, shard);
+}
+
+/* Prepare purge of one page. Return num of dirty regular pages on it
+ * Return 0 if no purgable huge page is found
+ *
+ * If there was a page to purge its purge state is initialized
+ */
+static inline size_t
+hpa_purge_start_hp(hpa_purge_batch_t *b, psset_t *psset) {
+	hpdata_t *to_purge = psset_pick_purge(psset);
 	if (to_purge == NULL) {
-		return false;
+		return 0;
 	}
 	assert(hpdata_purge_allowed_get(to_purge));
 	assert(!hpdata_changing_state_get(to_purge));
@@ -448,7 +507,7 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 	 * we're purging it (allocations and deallocations are
 	 * OK).
 	 */
-	psset_update_begin(&shard->psset, to_purge);
+	psset_update_begin(psset, to_purge);
 	assert(hpdata_alloc_allowed_get(to_purge));
 	hpdata_mid_purge_set(to_purge, true);
 	hpdata_purge_allowed_set(to_purge, false);
@@ -461,70 +520,115 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 	 * (clearing out user data).
 	 */
 	hpdata_alloc_allowed_set(to_purge, false);
-	psset_update_end(&shard->psset, to_purge);
+	psset_update_end(psset, to_purge);
 
+	assert(b->item_cnt < b->items_capacity);
+	hpa_purge_item_t *hp_item = &b->items[b->item_cnt];
+	b->item_cnt++;
+	hp_item->hp = to_purge;
 	/* Gather all the metadata we'll need during the purge. */
-	bool dehugify = hpdata_huge_get(to_purge);
+	hp_item->dehugify = hpdata_huge_get(hp_item->hp);
 	size_t nranges;
-	hpdata_purge_state_t purge_state;
-	size_t num_to_purge = hpdata_purge_begin(to_purge, &purge_state, &nranges);
-	(void) nranges; /*not used yet */
+	size_t ndirty =
+		hpdata_purge_begin(hp_item->hp, &hp_item->state, &nranges);
+	/* We picked hp to purge, so it should have some dirty ranges */
+	assert(ndirty > 0 && nranges >0);
+	b->ndirty_in_batch += ndirty;
+	b->nranges += nranges;
+	return ndirty;
+}
 
-	shard->npending_purge += num_to_purge;
-
-	malloc_mutex_unlock(tsdn, &shard->mtx);
-
-	/* Actually do the purging, now that the lock is dropped. */
-	if (dehugify) {
-		shard->central->hooks.dehugify(hpdata_addr_get(to_purge),
-		    HUGEPAGE);
-	}
-	size_t total_purged = 0;
-	uint64_t purges_this_pass = 0;
-	
-	size_t len = hpa_process_madvise_max_iovec_len();
-	VARIABLE_ARRAY(hpa_io_vector_t, vec, len);
-
-	hpa_range_accum_t accum;
-	hpa_range_accum_init(&accum, vec, len);
-
-	void *purge_addr;
-	size_t purge_size;
-	while (hpdata_purge_next(to_purge, &purge_state, &purge_addr,
-	    &purge_size)) {
-		total_purged += purge_size;
-		assert(total_purged <= HUGEPAGE);
-		hpa_range_accum_add(&accum, purge_addr, purge_size, shard);
-		purges_this_pass++;
-	}
-	/* If batch was not full, finish */
-	hpa_range_accum_finish(&accum, shard);
-
-	malloc_mutex_lock(tsdn, &shard->mtx);
-	/* The shard updates */
-	shard->npending_purge -= num_to_purge;
-	shard->stats.npurge_passes++;
-	shard->stats.npurges += purges_this_pass;
-	shard->central->hooks.curtime(&shard->last_purge,
-	    /* first_reading */ false);
-	if (dehugify) {
+/* Finish purge of one huge page. */
+static inline void
+hpa_purge_finish_hp(tsdn_t *tsdn, hpa_shard_t *shard,
+	hpa_purge_item_t *hp_item) {
+	if (hp_item->dehugify) {
 		shard->stats.ndehugifies++;
 	}
-
 	/* The hpdata updates. */
-	psset_update_begin(&shard->psset, to_purge);
-	if (dehugify) {
-		hpdata_dehugify(to_purge);
+	psset_update_begin(&shard->psset, hp_item->hp);
+	if (hp_item->dehugify) {
+		hpdata_dehugify(hp_item->hp);
 	}
-	hpdata_purge_end(to_purge, &purge_state);
-	hpdata_mid_purge_set(to_purge, false);
+	hpdata_purge_end(hp_item->hp, &hp_item->state);
+	hpdata_mid_purge_set(hp_item->hp, false);
 
-	hpdata_alloc_allowed_set(to_purge, true);
-	hpa_update_purge_hugify_eligibility(tsdn, shard, to_purge);
+	hpdata_alloc_allowed_set(hp_item->hp, true);
+	hpa_update_purge_hugify_eligibility(tsdn, shard, hp_item->hp);
 
-	psset_update_end(&shard->psset, to_purge);
+	psset_update_end(&shard->psset, hp_item->hp);
+}
 
-	return true;
+static inline bool
+hpa_batch_full(hpa_purge_batch_t *b) {
+	/* It's okay for ranges to go above */
+	return b->npurged_hp_total == b->max_hp ||
+		b->item_cnt == b->items_capacity ||
+		b->nranges >= b->range_watermark;
+}
+
+static inline void
+hpa_batch_pass_start(hpa_purge_batch_t *b) {
+	b->item_cnt = 0;
+	b->nranges = 0;
+	b->ndirty_in_batch = 0;
+}
+
+static inline bool
+hpa_batch_empty(hpa_purge_batch_t *b) {
+	return b->item_cnt == 0;
+}
+
+/* Returns number of huge pages purged. */
+static inline size_t
+hpa_purge(tsdn_t *tsdn, hpa_shard_t *shard, size_t max_hp) {
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+	assert(max_hp > 0);
+
+	assert(HPA_PURGE_BATCH_MAX > 0);
+	assert(HPA_PURGE_BATCH_MAX <
+		(VARIABLE_ARRAY_SIZE_MAX / sizeof(hpa_purge_item_t)));
+	VARIABLE_ARRAY(hpa_purge_item_t, items, HPA_PURGE_BATCH_MAX);
+	hpa_purge_batch_t batch = {
+		.max_hp = max_hp,
+		.npurged_hp_total = 0,
+		.items = &items[0],
+		.items_capacity = HPA_PURGE_BATCH_MAX,
+		.range_watermark = hpa_process_madvise_max_iovec_len(),
+	};
+	assert(batch.range_watermark > 0);
+
+	while (1) {
+		hpa_batch_pass_start(&batch);
+		assert(hpa_batch_empty(&batch));
+		while(!hpa_batch_full(&batch) && hpa_should_purge(tsdn, shard)) {
+			size_t ndirty = hpa_purge_start_hp(&batch, &shard->psset);
+			if (ndirty == 0) {
+				break;
+			}
+			shard->npending_purge += ndirty;
+			batch.npurged_hp_total++;
+		}
+
+		if (hpa_batch_empty(&batch)) {
+			break;
+		}
+		malloc_mutex_unlock(tsdn, &shard->mtx);
+		hpa_purge_actual_unlocked(shard, batch.items, batch.item_cnt);
+		malloc_mutex_lock(tsdn, &shard->mtx);
+
+		/* The shard updates */
+		shard->npending_purge -= batch.ndirty_in_batch;
+		shard->stats.npurges += batch.ndirty_in_batch;
+		shard->central->hooks.curtime(&shard->last_purge,
+			/* first_reading */ false);
+		for (size_t i=0; i<batch.item_cnt; ++i) {
+			hpa_purge_finish_hp(tsdn, shard, &batch.items[i]);
+		}
+	}
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+	shard->stats.npurge_passes++;
+	return batch.npurged_hp_total;
 }
 
 /* Returns whether or not we hugified anything. */
@@ -654,19 +758,9 @@ hpa_shard_maybe_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard,
 			max_purges = max_purge_nhp;
 		}
 
-		while (hpa_should_purge(tsdn, shard) && nops < max_purges) {
-			if (!hpa_try_purge(tsdn, shard)) {
-				/*
-				 * It is fine if we couldn't purge as sometimes
-				 * we try to purge just to unblock
-				 * hugification, but there is maybe no dirty
-				 * pages at all at the moment.
-				 */
-				break;
-			}
-			malloc_mutex_assert_owner(tsdn, &shard->mtx);
-			nops++;
-		}
+		malloc_mutex_assert_owner(tsdn, &shard->mtx);
+		nops += hpa_purge(tsdn, shard, max_purges);
+		malloc_mutex_assert_owner(tsdn, &shard->mtx);
 	}
 
 	/*
diff --git a/test/unit/hpa_vectorized_madvise.c b/test/unit/hpa_vectorized_madvise.c
index 130dc699..ae25fdde 100644
--- a/test/unit/hpa_vectorized_madvise.c
+++ b/test/unit/hpa_vectorized_madvise.c
@@ -237,15 +237,86 @@ TEST_BEGIN(test_more_regions_purged_from_one_page) {
 	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
 	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
 
-	/* We purge from 2 huge pages, each one 3 segments. That's 6 non
-	 * vectorized calls, or 2 <= vc <=6 vectorized calls
-	 * (depending on batch size).
+	/* We purge from 2 huge pages, each one 3 dirty continous segments.
+	 * For opt_process_madvise_max_batch = 2, that is
+	 * 2 calls for first page, and 2 calls for second as we don't
+	 * want to hold the lock on the second page while vectorized batch
+	 * of size 2 is already filled with the first one.
 	 */
-	size_t nexpected = 2 * (1 + (3 - 1) / opt_process_madvise_max_batch);
+	expect_zu_eq(4, ndefer_vec_purge_calls, "Expect purge");
+	expect_zu_eq(0, ndefer_purge_calls, "Expect no non-vec purge");
+	ndefer_vec_purge_calls = 0;
+
+	destroy_test_data(shard);
+}
+TEST_END
+
+size_t
+hpa_purge_max_batch_size_for_test_set(size_t new_size);
+TEST_BEGIN(test_more_pages_than_batch_page_size) {
+	test_skip_if(!hpa_supported() ||
+		(opt_process_madvise_max_batch == 0) ||
+		HUGEPAGE_PAGES <= 4);
+
+	size_t old_page_batch = hpa_purge_max_batch_size_for_test_set(1);
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
+	opts.deferral_allowed = true;
+	opts.min_purge_interval_ms = 0;
+	ndefer_vec_purge_calls = 0;
+	ndefer_purge_calls = 0;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+
+	bool deferred_work_generated = false;
+
+	nstime_init(&defer_curtime, 0);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+
+	enum {NALLOCS = 8 * HUGEPAGE_PAGES};
+	edata_t *edatas[NALLOCS];
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	for (int i = 0; i < 3 * (int)HUGEPAGE_PAGES; i++) {
+		pai_dalloc(tsdn, &shard->pai, edatas[i],
+			&deferred_work_generated);
+	}
+
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	/*
+	 * Strict minimum purge interval is not set, we should purge as long as
+	 * we have dirty pages.
+	 */
+	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
+
+	/* We have page batch size = 1.
+	 * we have 5 * HP active pages, 3 * HP dirty pages
+	 * To achieve the balance of 25% max dirty we need to
+	 * purge 2 pages. Since batch is 1 that must be 2 calls
+	 * no matter what opt_process_madvise_max_batch is
+	 */
+	size_t nexpected = 2;
 	expect_zu_eq(nexpected, ndefer_vec_purge_calls, "Expect purge");
 	expect_zu_eq(0, ndefer_purge_calls, "Expect no non-vec purge");
 	ndefer_vec_purge_calls = 0;
 
+	hpa_purge_max_batch_size_for_test_set(old_page_batch);
+
 	destroy_test_data(shard);
 }
 TEST_END
@@ -254,5 +325,6 @@ int
 main(void) {
 	return test_no_reentrancy(
 	    test_vectorized_failure_fallback,
-	    test_more_regions_purged_from_one_page);
+	    test_more_regions_purged_from_one_page,
+	    test_more_pages_than_batch_page_size);
 }
diff --git a/test/unit/hpa_vectorized_madvise_large_batch.c b/test/unit/hpa_vectorized_madvise_large_batch.c
new file mode 100644
index 00000000..99ce15f4
--- /dev/null
+++ b/test/unit/hpa_vectorized_madvise_large_batch.c
@@ -0,0 +1,199 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/hpa.h"
+#include "jemalloc/internal/nstime.h"
+
+#define SHARD_IND 111
+
+#define ALLOC_MAX (HUGEPAGE)
+
+typedef struct test_data_s test_data_t;
+struct test_data_s {
+	/*
+	 * Must be the first member -- we convert back and forth between the
+	 * test_data_t and the hpa_shard_t;
+	 */
+	hpa_shard_t shard;
+	hpa_central_t central;
+	base_t *base;
+	edata_cache_t shard_edata_cache;
+
+	emap_t emap;
+};
+
+static hpa_shard_opts_t test_hpa_shard_opts_default = {
+	/* slab_max_alloc */
+	ALLOC_MAX,
+	/* hugification_threshold */
+	HUGEPAGE,
+	/* dirty_mult */
+	FXP_INIT_PERCENT(25),
+	/* deferral_allowed */
+	false,
+	/* hugify_delay_ms */
+	10 * 1000,
+	/* hugify_sync */
+	false,
+	/* min_purge_interval_ms */
+	5 * 1000,
+	/* experimental_max_purge_nhp */
+	-1,
+	/* peak_demand_window_ms */
+	0
+};
+
+static hpa_shard_t *
+create_test_data(const hpa_hooks_t *hooks, hpa_shard_opts_t *opts) {
+	bool err;
+	base_t *base = base_new(TSDN_NULL, /* ind */ SHARD_IND,
+	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
+	assert_ptr_not_null(base, "");
+
+	test_data_t *test_data = malloc(sizeof(test_data_t));
+	assert_ptr_not_null(test_data, "");
+
+	test_data->base = base;
+
+	err = edata_cache_init(&test_data->shard_edata_cache, base);
+	assert_false(err, "");
+
+	err = emap_init(&test_data->emap, test_data->base, /* zeroed */ false);
+	assert_false(err, "");
+
+	err = hpa_central_init(&test_data->central, test_data->base, hooks);
+	assert_false(err, "");
+
+	err = hpa_shard_init(&test_data->shard, &test_data->central,
+	    &test_data->emap, test_data->base, &test_data->shard_edata_cache,
+	    SHARD_IND, opts);
+	assert_false(err, "");
+
+	return (hpa_shard_t *)test_data;
+}
+
+static void
+destroy_test_data(hpa_shard_t *shard) {
+	test_data_t *test_data = (test_data_t *)shard;
+	base_delete(TSDN_NULL, test_data->base);
+	free(test_data);
+}
+
+static uintptr_t defer_bump_ptr = HUGEPAGE * 123;
+static void *
+defer_test_map(size_t size) {
+	void *result = (void *)defer_bump_ptr;
+	defer_bump_ptr += size;
+	return result;
+}
+
+static void
+defer_test_unmap(void *ptr, size_t size) {
+	(void)ptr;
+	(void)size;
+}
+
+static size_t ndefer_purge_calls = 0;
+static void
+defer_test_purge(void *ptr, size_t size) {
+	(void)ptr;
+	(void)size;
+	++ndefer_purge_calls;
+}
+
+static size_t ndefer_vec_purge_calls = 0;
+static bool
+defer_vectorized_purge(void *vec, size_t vlen, size_t nbytes) {
+	(void)vec;
+	(void)nbytes;
+	++ndefer_vec_purge_calls;
+	return false;
+}
+
+static size_t ndefer_hugify_calls = 0;
+static bool
+defer_test_hugify(void *ptr, size_t size, bool sync) {
+	++ndefer_hugify_calls;
+	return false;
+}
+
+static size_t ndefer_dehugify_calls = 0;
+static void
+defer_test_dehugify(void *ptr, size_t size) {
+	++ndefer_dehugify_calls;
+}
+
+static nstime_t defer_curtime;
+static void
+defer_test_curtime(nstime_t *r_time, bool first_reading) {
+	*r_time = defer_curtime;
+}
+
+static uint64_t
+defer_test_ms_since(nstime_t *past_time) {
+	return (nstime_ns(&defer_curtime) - nstime_ns(past_time)) / 1000 / 1000;
+}
+
+TEST_BEGIN(test_vectorized_purge) {
+	test_skip_if(!hpa_supported() ||
+		     opt_process_madvise_max_batch == 0 || HUGEPAGE_PAGES <= 4);
+	assert(opt_process_madvise_max_batch == 64);
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
+	opts.deferral_allowed = true;
+	opts.min_purge_interval_ms = 0;
+	ndefer_vec_purge_calls = 0;
+	ndefer_purge_calls = 0;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+
+	bool deferred_work_generated = false;
+
+	nstime_init(&defer_curtime, 0);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+
+	enum {NALLOCS = 8 * HUGEPAGE_PAGES};
+	edata_t *edatas[NALLOCS];
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	/* Deallocate almost 3 hugepages out of 8, and to force batching
+	 * leave the 2nd and 4th PAGE in the first 3 hugepages.
+	 */
+	for (int i = 0; i < 3 * (int)HUGEPAGE_PAGES; i++) {
+		int j = i % HUGEPAGE_PAGES;
+		if (j != 1 && j != 3) {
+			pai_dalloc(tsdn, &shard->pai, edatas[i],
+			    &deferred_work_generated);
+		}
+	}
+
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	/*
+	 * We purge from 2 huge pages, each one 3 dirty continous segments.
+	 * For opt_process_madvise_max_batch = 64, that is all just one call
+	 */
+	expect_zu_eq(1, ndefer_vec_purge_calls, "Expect single purge");
+	ndefer_vec_purge_calls = 0;
+
+	destroy_test_data(shard);
+}
+TEST_END
+
+int
+main(void) {
+	return test_no_reentrancy(
+	    test_vectorized_purge);
+}
diff --git a/test/unit/hpa_vectorized_madvise_large_batch.sh b/test/unit/hpa_vectorized_madvise_large_batch.sh
new file mode 100644
index 00000000..f996047f
--- /dev/null
+++ b/test/unit/hpa_vectorized_madvise_large_batch.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+export MALLOC_CONF="process_madvise_max_batch:64"

From 852da1be150e9811a3f0ab91302c5d6e9ee62e4f Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Fri, 25 Apr 2025 18:26:49 -0700
Subject: [PATCH 282/395] Add experimental option force using
 SYS_process_madvise

---
 configure.ac                                       | 14 ++++++++++++++
 .../jemalloc/internal/jemalloc_internal_defs.h.in  |  2 ++
 src/pages.c                                        |  8 +++++++-
 3 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index 1c9c5067..f731e8b4 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2457,6 +2457,13 @@ if test "x${je_cv_osatomic}" = "xyes" ; then
 fi
 
 dnl ============================================================================
+
+AC_ARG_WITH([experimental_sys_process_madvise],
+  [AS_HELP_STRING([--with-experimental-sys-process-madvise=<experimental-sys-process-madvise>],
+   [Force process_madvise and use experimental-sys-process-madvise number when making syscall])],
+  [je_cv_sys_pmadv_nr="${with_experimental_sys_process_madvise}"],
+  [je_cv_sys_pmadv_nr=""])
+
 dnl Check for madvise(2).
 
 JE_COMPILABLE([madvise(2)], [
@@ -2554,6 +2561,13 @@ if test "x${je_cv_madvise}" = "xyes" ; then
 ], [je_cv_process_madvise])
   if test "x${je_cv_process_madvise}" = "xyes" ; then
     AC_DEFINE([JEMALLOC_HAVE_PROCESS_MADVISE], [ ], [ ])
+  else
+    if test "x${je_cv_sys_pmadv_nr}" != "x" ; then
+      dnl Forcing experimental usage of process_madvise
+      AC_MSG_RESULT([Forcing usage of process_madvise with syscall nr=${je_cv_sys_pmadv_nr}])
+      AC_DEFINE([JEMALLOC_HAVE_PROCESS_MADVISE], [ ], [ ])
+      AC_DEFINE_UNQUOTED([EXPERIMENTAL_SYS_PROCESS_MADVISE_NR], [${je_cv_sys_pmadv_nr}], [ ])
+    fi
   fi
 else
   dnl Check for posix_madvise.
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 2e47438a..c7218c66 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -348,6 +348,8 @@
 /* Defined if process_madvise(2) is available. */
 #undef JEMALLOC_HAVE_PROCESS_MADVISE
 
+#undef EXPERIMENTAL_SYS_PROCESS_MADVISE_NR
+
 /* Defined if mprotect(2) is available. */
 #undef JEMALLOC_HAVE_MPROTECT
 
diff --git a/src/pages.c b/src/pages.c
index babfd50f..d53e0fef 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -640,10 +640,16 @@ init_process_madvise(void) {
 	return false;
 }
 
+#ifdef SYS_process_madvise
+#define JE_SYS_PROCESS_MADVISE_NR SYS_process_madvise
+#else
+#define JE_SYS_PROCESS_MADVISE_NR EXPERIMENTAL_SYS_PROCESS_MADVISE_NR
+#endif
+
 static bool
 pages_purge_process_madvise_impl(void *vec, size_t vec_len,
     size_t total_bytes) {
-	size_t purged_bytes = (size_t)syscall(SYS_process_madvise, pidfd,
+	size_t purged_bytes = (size_t)syscall(JE_SYS_PROCESS_MADVISE_NR, pidfd,
 	    (struct iovec *)vec, vec_len, MADV_DONTNEED, 0);
 
 	return purged_bytes != total_bytes;

From 01e9ecbeb2fa69ae8e9f3e1013c9f7d44f6d033e Mon Sep 17 00:00:00 2001
From: Guangli Dai <gdai@uh.edu>
Date: Tue, 15 Apr 2025 23:50:43 -0700
Subject: [PATCH 283/395] Remove build-time configuration
 'config_limit_usize_gap'

---
 .travis.yml                                   | 228 +++++++++---------
 configure.ac                                  |  19 --
 include/jemalloc/internal/arena_inlines_b.h   |   3 +-
 include/jemalloc/internal/emap.h              |  50 ++--
 .../internal/jemalloc_internal_defs.h.in      |   6 -
 .../jemalloc/internal/jemalloc_preamble.h.in  |   8 -
 include/jemalloc/internal/sc.h                |   2 +-
 include/jemalloc/internal/sz.h                |   6 +-
 include/jemalloc/internal/tcache_types.h      |   6 +-
 scripts/gen_travis.py                         |   1 -
 src/arena.c                                   |  33 +--
 src/ctl.c                                     |   3 +-
 src/jemalloc.c                                |  34 +--
 test/unit/arena_reset.c                       |   3 +-
 test/unit/mallctl.c                           |   2 +-
 15 files changed, 160 insertions(+), 244 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 5a83d757..433288cb 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -12,346 +12,346 @@ jobs:
   include:
     - os: windows
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-limit-usize-gap" EXTRA_CFLAGS="-fcommon"
+      env: CC=gcc CXX=g++ EXTRA_CFLAGS="-fcommon"
     - os: windows
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-limit-usize-gap" EXTRA_CFLAGS="-fcommon"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-fcommon"
     - os: windows
       arch: amd64
-      env: CC=cl.exe CXX=cl.exe CONFIGURE_FLAGS="--enable-limit-usize-gap"
+      env: CC=cl.exe CXX=cl.exe
     - os: windows
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-limit-usize-gap" EXTRA_CFLAGS="-fcommon"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes EXTRA_CFLAGS="-fcommon"
     - os: windows
       arch: amd64
-      env: CC=cl.exe CXX=cl.exe CONFIGURE_FLAGS="--enable-debug --enable-limit-usize-gap"
+      env: CC=cl.exe CXX=cl.exe CONFIGURE_FLAGS="--enable-debug"
     - os: windows
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug --enable-limit-usize-gap" EXTRA_CFLAGS="-fcommon"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-fcommon"
     - os: windows
       arch: amd64
-      env: CC=cl.exe CXX=cl.exe CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-limit-usize-gap"
+      env: CC=cl.exe CXX=cl.exe CROSS_COMPILE_32BIT=yes
     - os: windows
       arch: amd64
-      env: CC=cl.exe CXX=cl.exe CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug --enable-limit-usize-gap"
+      env: CC=cl.exe CXX=cl.exe CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--enable-debug --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--enable-prof --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--disable-stats --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--disable-libdl --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--enable-opt-safety-checks --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-debug --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-prof --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-stats --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-libdl --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-opt-safety-checks --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-prof --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --disable-stats --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --disable-libdl --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-opt-safety-checks --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-prof --enable-prof-frameptr --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-prof --enable-prof-frameptr" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=dss:primary --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --disable-stats --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --disable-libdl --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-opt-safety-checks --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof --enable-prof-frameptr --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof --enable-prof-frameptr" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=dss:primary --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --disable-libdl --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --enable-opt-safety-checks --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --enable-prof --enable-prof-frameptr --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --enable-prof --enable-prof-frameptr" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=dss:primary --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --enable-opt-safety-checks --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --enable-prof --enable-prof-frameptr --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --enable-prof --enable-prof-frameptr" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=dss:primary --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --enable-prof --enable-prof-frameptr --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --enable-prof --enable-prof-frameptr" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=dss:primary --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --enable-prof --enable-prof-frameptr --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --enable-prof --enable-prof-frameptr" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=dss:primary --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr --with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr --with-malloc-conf=dss:primary --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr --with-malloc-conf=percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr --with-malloc-conf=background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,dss:primary --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary,percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary,percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary,background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu,background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: arm64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: arm64
-      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: arm64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: arm64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: arm64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: arm64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: arm64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: arm64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: arm64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-lg-hugepage=29 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-lg-hugepage=29" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: arm64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: arm64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: arm64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: arm64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: arm64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     # Development build
     - os: linux
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --disable-cache-oblivious --enable-stats --enable-log --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
diff --git a/configure.ac b/configure.ac
index f731e8b4..c615cab2 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2757,24 +2757,6 @@ if test "x${have_pthread}" = "x1" -a "x${je_cv_os_unfair_lock}" != "xyes" -a \
   AC_DEFINE([JEMALLOC_BACKGROUND_THREAD], [ ], [ ])
 fi
 
-dnl ============================================================================
-dnl Limit the gap between two contiguous usizes to be at most PAGE.
-AC_ARG_ENABLE([limit_usize_gap],
-  [AS_HELP_STRING([--enable-limit-usize-gap],
-                  [Limit the gap between two contiguous usizes])],
-[if test "x$limit_usize_gap" = "xno" ; then
-  limit_usize_gap="0"
-else
-  limit_usize_gap="1"
-fi
-],
-[limit_usize_gap="0"]
-)
-if test "x$limit_usize_gap" = "x1" ; then
-  AC_DEFINE([LIMIT_USIZE_GAP], [ ])
-fi
-AC_SUBST([limit_usize_gap])
-
 dnl ============================================================================
 dnl Check for glibc malloc hooks
 
@@ -3050,5 +3032,4 @@ AC_MSG_RESULT([cxx                : ${enable_cxx}])
 AC_MSG_RESULT([dss                : ${enable_dss}])
 AC_MSG_RESULT([tsan               : ${enable_tsan}])
 AC_MSG_RESULT([ubsan              : ${enable_ubsan}])
-AC_MSG_RESULT([limit-usize-gap    : ${limit_usize_gap}])
 AC_MSG_RESULT([===============================================================================])
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 108493f2..4b765289 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -503,8 +503,7 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 		assert(alloc_ctx.slab == edata_slab_get(edata));
 		emap_alloc_ctx_init(&alloc_ctx, alloc_ctx.szind, alloc_ctx.slab,
 		    sz_s2u(size));
-		assert(!config_limit_usize_gap ||
-		    emap_alloc_ctx_usize_get(&alloc_ctx) ==
+		assert(emap_alloc_ctx_usize_get(&alloc_ctx) ==
 		    edata_usize_get(edata));
 	}
 
diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index 5885daa6..06ed5d32 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -236,25 +236,16 @@ emap_alloc_ctx_init(emap_alloc_ctx_t *alloc_ctx, szind_t szind, bool slab,
     size_t usize) {
 	alloc_ctx->szind = szind;
 	alloc_ctx->slab = slab;
-	/*
-	 * When config_limit_usize_gap disabled, alloc_ctx->usize
-	 * should not be accessed.
-	 */
-	if (config_limit_usize_gap) {
-		alloc_ctx->usize = usize;
-		assert(sz_limit_usize_gap_enabled() ||
-		    usize == sz_index2size(szind));
-	} else if (config_debug) {
-		alloc_ctx->usize = SC_LARGE_MAXCLASS + 1;
-	}
+	alloc_ctx->usize = usize;
+	assert(sz_limit_usize_gap_enabled() ||
+	    usize == sz_index2size(szind));
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
 emap_alloc_ctx_usize_get(emap_alloc_ctx_t *alloc_ctx) {
 	assert(alloc_ctx->szind < SC_NSIZES);
-	if (!config_limit_usize_gap || alloc_ctx->slab) {
-		assert(!config_limit_usize_gap ||
-		    alloc_ctx->usize == sz_index2size(alloc_ctx->szind));
+	if (alloc_ctx->slab) {
+		assert(alloc_ctx->usize == sz_index2size(alloc_ctx->szind));
 		return sz_index2size(alloc_ctx->szind);
 	}
 	assert(sz_limit_usize_gap_enabled() ||
@@ -269,28 +260,15 @@ emap_alloc_ctx_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
     emap_alloc_ctx_t *alloc_ctx) {
 	EMAP_DECLARE_RTREE_CTX;
 
-	if (config_limit_usize_gap) {
-		rtree_contents_t contents = rtree_read(tsdn, &emap->rtree,
-		    rtree_ctx, (uintptr_t)ptr);
-		/*
-		 * If the alloc is invalid, do not calculate usize since edata
-		 * could be corrupted.
-		 */
-		if (contents.metadata.szind == SC_NSIZES ||
-		    contents.edata == NULL) {
-			emap_alloc_ctx_init(alloc_ctx, contents.metadata.szind,
-			    contents.metadata.slab, 0);
-			return;
-		}
-		emap_alloc_ctx_init(alloc_ctx, contents.metadata.szind,
-		    contents.metadata.slab, edata_usize_get(contents.edata));
-	} else {
-		rtree_metadata_t metadata = rtree_metadata_read(tsdn,
-		    &emap->rtree, rtree_ctx, (uintptr_t)ptr);
-		/* alloc_ctx->usize will not be read/write in this case. */
-		emap_alloc_ctx_init(alloc_ctx, metadata.szind, metadata.slab,
-		    SC_LARGE_MAXCLASS + 1);
-	}
+	rtree_contents_t contents = rtree_read(tsdn, &emap->rtree,
+	    rtree_ctx, (uintptr_t)ptr);
+	/*
+	 * If the alloc is invalid, do not calculate usize since edata
+	 * could be corrupted.
+	 */
+	emap_alloc_ctx_init(alloc_ctx, contents.metadata.szind,
+	    contents.metadata.slab, (contents.metadata.szind == SC_NSIZES
+	    || contents.edata == NULL)? 0: edata_usize_get(contents.edata));
 }
 
 /* The pointer must be mapped. */
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index c7218c66..6d557959 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -480,12 +480,6 @@
 /* If defined, use __int128 for optimization. */
 #undef JEMALLOC_HAVE_INT128
 
-/*
- * If defined, the gap between any two contiguous usizes should not exceed
- * PAGE.
- */
-#undef LIMIT_USIZE_GAP
-
 #include "jemalloc/internal/jemalloc_internal_overrides.h"
 
 #endif /* JEMALLOC_INTERNAL_DEFS_H_ */
diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in
index eba475a6..bbfe2513 100644
--- a/include/jemalloc/internal/jemalloc_preamble.h.in
+++ b/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -283,12 +283,4 @@ static const bool have_memcntl =
 #endif
     ;
 
-static const bool config_limit_usize_gap =
-#ifdef LIMIT_USIZE_GAP
-    true
-#else
-    false
-#endif
-    ;
-
 #endif /* JEMALLOC_PREAMBLE_H */
diff --git a/include/jemalloc/internal/sc.h b/include/jemalloc/internal/sc.h
index 098e47b7..3b9280d8 100644
--- a/include/jemalloc/internal/sc.h
+++ b/include/jemalloc/internal/sc.h
@@ -287,7 +287,7 @@
 #endif
 
 /*
- * When config_limit_usize_gap is enabled, the gaps between two contiguous
+ * When limit_usize_gap is enabled, the gaps between two contiguous
  * size classes should not exceed PAGE.  This means there should be no concept
  * of size classes for sizes > SC_SMALL_MAXCLASS (or >= SC_LARGE_MINCLASS).
  * However, between SC_LARGE_MINCLASS (SC_NGROUP * PAGE) and
diff --git a/include/jemalloc/internal/sz.h b/include/jemalloc/internal/sz.h
index 1122461c..6f161260 100644
--- a/include/jemalloc/internal/sz.h
+++ b/include/jemalloc/internal/sz.h
@@ -56,11 +56,7 @@ extern void sz_boot(const sc_data_t *sc_data, bool cache_oblivious);
 
 JEMALLOC_ALWAYS_INLINE bool
 sz_limit_usize_gap_enabled() {
-#ifdef LIMIT_USIZE_GAP
 	return opt_limit_usize_gap;
-#else
-	return false;
-#endif
 }
 
 JEMALLOC_ALWAYS_INLINE pszind_t
@@ -356,7 +352,7 @@ sz_s2u_compute(size_t size) {
 
 JEMALLOC_ALWAYS_INLINE size_t
 sz_s2u_lookup(size_t size) {
-	assert(!config_limit_usize_gap || size < SC_LARGE_MINCLASS);
+	assert(size < SC_LARGE_MINCLASS);
 	size_t ret = sz_index2size_lookup(sz_size2index_lookup(size));
 
 	assert(ret == sz_s2u_compute(size));
diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h
index f13ff748..b3828ecf 100644
--- a/include/jemalloc/internal/tcache_types.h
+++ b/include/jemalloc/internal/tcache_types.h
@@ -19,11 +19,7 @@ typedef struct tcaches_s tcaches_t;
 /* NOLINTNEXTLINE(performance-no-int-to-ptr) */
 #define TCACHES_ELM_NEED_REINIT ((tcache_t *)(uintptr_t)1)
 
-#ifdef LIMIT_USIZE_GAP
-    #define TCACHE_LG_MAXCLASS_LIMIT LG_USIZE_GROW_SLOW_THRESHOLD
-#else
-    #define TCACHE_LG_MAXCLASS_LIMIT 23 /* tcache_max = 8M */
-#endif
+#define TCACHE_LG_MAXCLASS_LIMIT LG_USIZE_GROW_SLOW_THRESHOLD
 #define TCACHE_MAXCLASS_LIMIT ((size_t)1 << TCACHE_LG_MAXCLASS_LIMIT)
 #define TCACHE_NBINS_MAX (SC_NBINS + SC_NGROUP *			\
     (TCACHE_LG_MAXCLASS_LIMIT - SC_LG_LARGE_MINCLASS) + 1)
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
index fa98f2a2..d43c802e 100755
--- a/scripts/gen_travis.py
+++ b/scripts/gen_travis.py
@@ -191,7 +191,6 @@ def format_job(os, arch, combination):
     if len(malloc_conf) > 0:
         configure_flags.append('--with-malloc-conf=' + ','.join(malloc_conf))
 
-    configure_flags.append('--enable-limit-usize-gap')
     if not compilers:
         compiler = GCC.value
     else:
diff --git a/src/arena.c b/src/arena.c
index 0a0c97ef..1586ee91 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -154,17 +154,12 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 		size_t curlextents = (size_t)(nmalloc - ndalloc);
 		lstats[i].curlextents += curlextents;
 
-		if (config_limit_usize_gap) {
-			uint64_t active_bytes = locked_read_u64(tsdn,
-			    LOCKEDINT_MTX(arena->stats.mtx),
-			    &arena->stats.lstats[i].active_bytes);
-			locked_inc_u64_unsynchronized(
-			    &lstats[i].active_bytes, active_bytes);
-			astats->allocated_large += active_bytes;
-		} else {
-			astats->allocated_large +=
-			    curlextents * sz_index2size(SC_NBINS + i);
-		}
+		uint64_t active_bytes = locked_read_u64(tsdn,
+		    LOCKEDINT_MTX(arena->stats.mtx),
+		    &arena->stats.lstats[i].active_bytes);
+		locked_inc_u64_unsynchronized(
+		    &lstats[i].active_bytes, active_bytes);
+		astats->allocated_large += active_bytes;
 	}
 
 	pa_shard_stats_merge(tsdn, &arena->pa_shard, &astats->pa_shard_stats,
@@ -333,11 +328,9 @@ arena_large_malloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) {
 		LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
 			&arena->stats.lstats[hindex].nmalloc, 1);
-		if (config_limit_usize_gap) {
-			locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-			    &arena->stats.lstats[hindex].active_bytes,
-			    usize);
-		}
+		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
+		    &arena->stats.lstats[hindex].active_bytes,
+		    usize);
 		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 	}
 }
@@ -361,11 +354,9 @@ arena_large_dalloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) {
 		LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
 			&arena->stats.lstats[hindex].ndalloc, 1);
-		if (config_limit_usize_gap) {
-			locked_dec_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-			    &arena->stats.lstats[hindex].active_bytes,
-			    usize);
-		}
+		locked_dec_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
+		    &arena->stats.lstats[hindex].active_bytes,
+		    usize);
 		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 	}
 }
diff --git a/src/ctl.c b/src/ctl.c
index 1d7eace6..49820af6 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -2355,8 +2355,7 @@ CTL_RO_NL_CGEN(config_uaf_detection, opt_lg_san_uaf_align,
     opt_lg_san_uaf_align, ssize_t)
 CTL_RO_NL_GEN(opt_zero_realloc,
     zero_realloc_mode_names[opt_zero_realloc_action], const char *)
-CTL_RO_NL_CGEN(config_limit_usize_gap, opt_limit_usize_gap, opt_limit_usize_gap,
-    bool)
+CTL_RO_NL_GEN(opt_limit_usize_gap, opt_limit_usize_gap, bool)
 
 /* malloc_conf options */
 CTL_RO_NL_CGEN(opt_malloc_conf_symlink, opt_malloc_conf_symlink,
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 9451df77..445955b0 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -123,12 +123,7 @@ zero_realloc_action_t opt_zero_realloc_action =
 
 atomic_zu_t zero_realloc_count = ATOMIC_INIT(0);
 
-bool opt_limit_usize_gap =
-#ifdef LIMIT_USIZE_GAP
-    true;
-#else
-    false;
-#endif
+bool opt_limit_usize_gap = true;
 
 const char *const zero_realloc_mode_names[] = {
 	"alloc",
@@ -1785,10 +1780,8 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    "san_guard_large", 0, SIZE_T_MAX,
 			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, false)
 
-			if (config_limit_usize_gap) {
-				CONF_HANDLE_BOOL(opt_limit_usize_gap,
-				    "limit_usize_gap");
-			}
+			CONF_HANDLE_BOOL(opt_limit_usize_gap,
+			    "limit_usize_gap");
 
 			CONF_ERROR("Invalid conf pair", k, klen, v, vlen);
 #undef CONF_ERROR
@@ -2209,17 +2202,16 @@ static bool
 malloc_init_hard(void) {
 	tsd_t *tsd;
 
-	if (config_limit_usize_gap) {
-		assert(TCACHE_MAXCLASS_LIMIT <= USIZE_GROW_SLOW_THRESHOLD);
-		assert(SC_LOOKUP_MAXCLASS <= USIZE_GROW_SLOW_THRESHOLD);
-		/*
-		 * This asserts an extreme case where TINY_MAXCLASS is larger
-		 * than LARGE_MINCLASS.  It could only happen if some constants
-		 * are configured miserably wrong.
-		 */
-		assert(SC_LG_TINY_MAXCLASS <=
-		    (size_t)1ULL << (LG_PAGE + SC_LG_NGROUP));
-	}
+	assert(TCACHE_MAXCLASS_LIMIT <= USIZE_GROW_SLOW_THRESHOLD);
+	assert(SC_LOOKUP_MAXCLASS <= USIZE_GROW_SLOW_THRESHOLD);
+	/*
+	 * This asserts an extreme case where TINY_MAXCLASS is larger
+	 * than LARGE_MINCLASS.  It could only happen if some constants
+	 * are configured miserably wrong.
+	 */
+	assert(SC_LG_TINY_MAXCLASS <=
+	    (size_t)1ULL << (LG_PAGE + SC_LG_NGROUP));
+
 #if defined(_WIN32) && _WIN32_WINNT < 0x0600
 	_init_init_lock();
 #endif
diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
index 09536b29..42fa9a5d 100644
--- a/test/unit/arena_reset.c
+++ b/test/unit/arena_reset.c
@@ -78,8 +78,7 @@ vsalloc(tsdn_t *tsdn, const void *ptr) {
 		return 0;
 	}
 
-	return config_limit_usize_gap? edata_usize_get(full_alloc_ctx.edata):
-	    sz_index2size(full_alloc_ctx.szind);
+	return edata_usize_get(full_alloc_ctx.edata);
 }
 
 static unsigned
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 366b992b..7d4634e8 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -333,7 +333,7 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(bool, prof_sys_thread_name, prof);
 	TEST_MALLCTL_OPT(ssize_t, lg_san_uaf_align, uaf_detection);
 	TEST_MALLCTL_OPT(unsigned, debug_double_free_max_scan, always);
-	TEST_MALLCTL_OPT(bool, limit_usize_gap, limit_usize_gap);
+	TEST_MALLCTL_OPT(bool, limit_usize_gap, always);
 	TEST_MALLCTL_OPT(size_t, process_madvise_max_batch, always);
 
 #undef TEST_MALLCTL_OPT

From 8347f1045aaf975192b06c3168a40a05ae8c206a Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@meta.com>
Date: Wed, 16 Apr 2025 11:57:55 -0700
Subject: [PATCH 284/395] Renaming limit_usize_gap to
 disable_large_size_classes

---
 include/jemalloc/internal/edata.h             | 12 +++----
 include/jemalloc/internal/emap.h              |  4 +--
 .../internal/jemalloc_internal_externs.h      |  2 +-
 include/jemalloc/internal/sc.h                | 10 +++---
 include/jemalloc/internal/sz.h                | 14 ++++----
 src/ctl.c                                     |  6 ++--
 src/eset.c                                    | 22 ++++++-------
 src/jemalloc.c                                | 19 ++++++++---
 src/pac.c                                     | 33 ++++++++++---------
 src/prof_data.c                               |  2 +-
 src/psset.c                                   |  2 +-
 src/sec.c                                     |  2 +-
 src/stats.c                                   |  2 +-
 test/test.sh.in                               |  2 +-
 test/unit/arena_decay.c                       |  4 +--
 test/unit/mallctl.c                           |  2 +-
 test/unit/size_classes.c                      |  4 +--
 test/unit/size_classes.sh                     |  4 +--
 18 files changed, 78 insertions(+), 68 deletions(-)

diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index b087ea31..e41e4efa 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -291,12 +291,12 @@ static inline size_t
 edata_usize_get(const edata_t *edata) {
 	assert(edata != NULL);
 	/*
-	 * When sz_limit_usize_gap_enabled() is true, two cases:
+	 * When sz_large_size_classes_disabled() is true, two cases:
 	 * 1. if usize_from_ind is not smaller than SC_LARGE_MINCLASS,
 	 * usize_from_size is accurate;
 	 * 2. otherwise, usize_from_ind is accurate.
 	 *
-	 * When sz_limit_usize_gap_enabled() is not true, the two should be the
+	 * When sz_large_size_classes_disabled() is not true, the two should be the
 	 * same when usize_from_ind is not smaller than SC_LARGE_MINCLASS.
 	 *
 	 * Note sampled small allocs will be promoted.  Their extent size is
@@ -316,9 +316,9 @@ edata_usize_get(const edata_t *edata) {
 	}
 #endif
 
-	if (!sz_limit_usize_gap_enabled() || szind < SC_NBINS) {
+	if (!sz_large_size_classes_disabled() || szind < SC_NBINS) {
 		size_t usize_from_ind = sz_index2size(szind);
-		if (!sz_limit_usize_gap_enabled() &&
+		if (!sz_large_size_classes_disabled() &&
 		    usize_from_ind >= SC_LARGE_MINCLASS) {
 			size_t size = (edata->e_size_esn & EDATA_SIZE_MASK);
 			assert(size > sz_large_pad);
@@ -332,8 +332,8 @@ edata_usize_get(const edata_t *edata) {
 	assert(size > sz_large_pad);
 	size_t usize_from_size = size - sz_large_pad;
 	/*
-	 * no matter limit-usize-gap enabled or not, usize retrieved from size
-	 * is not accurate when smaller than SC_LARGE_MINCLASS.
+	 * no matter large size classes disabled or not, usize retrieved from
+	 * size is not accurate when smaller than SC_LARGE_MINCLASS.
 	 */
 	assert(usize_from_size >= SC_LARGE_MINCLASS);
 	return usize_from_size;
diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index 06ed5d32..fba46abe 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -237,7 +237,7 @@ emap_alloc_ctx_init(emap_alloc_ctx_t *alloc_ctx, szind_t szind, bool slab,
 	alloc_ctx->szind = szind;
 	alloc_ctx->slab = slab;
 	alloc_ctx->usize = usize;
-	assert(sz_limit_usize_gap_enabled() ||
+	assert(sz_large_size_classes_disabled() ||
 	    usize == sz_index2size(szind));
 }
 
@@ -248,7 +248,7 @@ emap_alloc_ctx_usize_get(emap_alloc_ctx_t *alloc_ctx) {
 		assert(alloc_ctx->usize == sz_index2size(alloc_ctx->szind));
 		return sz_index2size(alloc_ctx->szind);
 	}
-	assert(sz_limit_usize_gap_enabled() ||
+	assert(sz_large_size_classes_disabled() ||
 	    alloc_ctx->usize == sz_index2size(alloc_ctx->szind));
 	assert(alloc_ctx->usize <= SC_LARGE_MAXCLASS);
 	return alloc_ctx->usize;
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index 83a37baf..3b42f833 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -39,7 +39,7 @@ extern atomic_zu_t zero_realloc_count;
 extern bool opt_cache_oblivious;
 extern unsigned opt_debug_double_free_max_scan;
 extern size_t opt_calloc_madvise_threshold;
-extern bool opt_limit_usize_gap;
+extern bool opt_disable_large_size_classes;
 
 extern const char *opt_malloc_conf_symlink;
 extern const char *opt_malloc_conf_env_var;
diff --git a/include/jemalloc/internal/sc.h b/include/jemalloc/internal/sc.h
index 3b9280d8..97956e7a 100644
--- a/include/jemalloc/internal/sc.h
+++ b/include/jemalloc/internal/sc.h
@@ -287,11 +287,11 @@
 #endif
 
 /*
- * When limit_usize_gap is enabled, the gaps between two contiguous
- * size classes should not exceed PAGE.  This means there should be no concept
- * of size classes for sizes > SC_SMALL_MAXCLASS (or >= SC_LARGE_MINCLASS).
- * However, between SC_LARGE_MINCLASS (SC_NGROUP * PAGE) and
- * 2 * SC_NGROUP * PAGE, the size class also happens to be aligned with PAGE.
+ * When large size classes are disabled, there is no concept of size classes
+ * for sizes > SC_SMALLMAXCLASS (or >= SC_LARGE_MINCLASS).  This ensures that
+ * the overhead between the usable size and the user request size will not
+ * exceed PAGE.  Between SC_LARGE_MINCLASS (SC_NGROUP * PAGE) and
+ * 2 * SC_NGROUP * PAGE, the size classes also happen to be aligned with PAGE.
  * Since tcache relies on size classes to work and it greatly increases the
  * perf of allocs & deallocs, we extend the existence of size class to
  * 2 * SC_NGROUP * PAGE ONLY for the tcache module.  This means for all other
diff --git a/include/jemalloc/internal/sz.h b/include/jemalloc/internal/sz.h
index 6f161260..e6cfa6a9 100644
--- a/include/jemalloc/internal/sz.h
+++ b/include/jemalloc/internal/sz.h
@@ -55,8 +55,8 @@ extern size_t sz_large_pad;
 extern void sz_boot(const sc_data_t *sc_data, bool cache_oblivious);
 
 JEMALLOC_ALWAYS_INLINE bool
-sz_limit_usize_gap_enabled() {
-	return opt_limit_usize_gap;
+sz_large_size_classes_disabled() {
+	return opt_disable_large_size_classes;
 }
 
 JEMALLOC_ALWAYS_INLINE pszind_t
@@ -269,11 +269,11 @@ sz_index2size_unsafe(szind_t index) {
 
 JEMALLOC_ALWAYS_INLINE size_t
 sz_index2size(szind_t index) {
-	assert(!sz_limit_usize_gap_enabled() ||
+	assert(!sz_large_size_classes_disabled() ||
 	    index <= sz_size2index(USIZE_GROW_SLOW_THRESHOLD));
 	size_t size = sz_index2size_unsafe(index);
 	/*
-	 * With limit_usize_gap enabled, the usize above
+	 * With large size classes disabled, the usize above
 	 * SC_LARGE_MINCLASS should grow by PAGE.  However, for sizes
 	 * in [SC_LARGE_MINCLASS, USIZE_GROW_SLOW_THRESHOLD], the
 	 * usize would not change because the size class gap in this
@@ -285,7 +285,7 @@ sz_index2size(szind_t index) {
 	 * the size is no larger than USIZE_GROW_SLOW_THRESHOLD here
 	 * instead of SC_LARGE_MINCLASS.
 	 */
-	assert(!sz_limit_usize_gap_enabled() ||
+	assert(!sz_large_size_classes_disabled() ||
 	    size <= USIZE_GROW_SLOW_THRESHOLD);
 	return size;
 }
@@ -335,11 +335,11 @@ sz_s2u_compute(size_t size) {
 		    (ZU(1) << lg_ceil));
 	}
 #endif
-	if (size <= SC_SMALL_MAXCLASS || !sz_limit_usize_gap_enabled()) {
+	if (size <= SC_SMALL_MAXCLASS || !sz_large_size_classes_disabled()) {
 		return sz_s2u_compute_using_delta(size);
 	} else {
 		/*
-		 * With sz_limit_usize_gap_enabled() == true, usize of a large
+		 * With sz_large_size_classes_disabled() == true, usize of a large
 		 * allocation is calculated by ceiling size to the smallest
 		 * multiple of PAGE to minimize the memory overhead, especially
 		 * when using hugepages.
diff --git a/src/ctl.c b/src/ctl.c
index 49820af6..92d254c1 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -170,7 +170,7 @@ CTL_PROTO(opt_prof_sys_thread_name)
 CTL_PROTO(opt_prof_time_res)
 CTL_PROTO(opt_lg_san_uaf_align)
 CTL_PROTO(opt_zero_realloc)
-CTL_PROTO(opt_limit_usize_gap)
+CTL_PROTO(opt_disable_large_size_classes)
 CTL_PROTO(opt_process_madvise_max_batch)
 CTL_PROTO(opt_malloc_conf_symlink)
 CTL_PROTO(opt_malloc_conf_env_var)
@@ -564,7 +564,7 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("zero_realloc"),	CTL(opt_zero_realloc)},
 	{NAME("debug_double_free_max_scan"),
 		CTL(opt_debug_double_free_max_scan)},
-	{NAME("limit_usize_gap"),	CTL(opt_limit_usize_gap)},
+	{NAME("disable_large_size_classes"),	CTL(opt_disable_large_size_classes)},
 	{NAME("process_madvise_max_batch"), CTL(opt_process_madvise_max_batch)},
 	{NAME("malloc_conf"),	CHILD(named, opt_malloc_conf)}
 };
@@ -2355,7 +2355,7 @@ CTL_RO_NL_CGEN(config_uaf_detection, opt_lg_san_uaf_align,
     opt_lg_san_uaf_align, ssize_t)
 CTL_RO_NL_GEN(opt_zero_realloc,
     zero_realloc_mode_names[opt_zero_realloc_action], const char *)
-CTL_RO_NL_GEN(opt_limit_usize_gap, opt_limit_usize_gap, bool)
+CTL_RO_NL_GEN(opt_disable_large_size_classes, opt_disable_large_size_classes, bool)
 
 /* malloc_conf options */
 CTL_RO_NL_CGEN(opt_malloc_conf_symlink, opt_malloc_conf_symlink,
diff --git a/src/eset.c b/src/eset.c
index 7dc9cce7..677162ff 100644
--- a/src/eset.c
+++ b/src/eset.c
@@ -232,7 +232,7 @@ eset_fit_alignment(eset_t *eset, size_t min_size, size_t max_size,
 
 	/* See comments in eset_first_fit for why we enumerate search below. */
 	pszind_t pind_prev = sz_psz2ind(sz_psz_quantize_floor(min_size));
-	if (sz_limit_usize_gap_enabled() && pind != pind_prev) {
+	if (sz_large_size_classes_disabled() && pind != pind_prev) {
 		edata_t *ret = NULL;
 		ret = eset_enumerate_alignment_search(eset, min_size, pind_prev,
 		    alignment);
@@ -287,7 +287,7 @@ eset_first_fit(eset_t *eset, size_t size, bool exact_only,
 	pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(size));
 
 	if (exact_only) {
-		if (sz_limit_usize_gap_enabled()) {
+		if (sz_large_size_classes_disabled()) {
 			pszind_t pind_prev =
 			    sz_psz2ind(sz_psz_quantize_floor(size));
 			return eset_enumerate_search(eset, size, pind_prev,
@@ -300,28 +300,28 @@ eset_first_fit(eset_t *eset, size_t size, bool exact_only,
 
 	/*
 	 * Each element in the eset->bins is a heap corresponding to a size
-	 * class.  When sz_limit_usize_gap_enabled() is false, all heaps after
+	 * class.  When sz_large_size_classes_disabled() is false, all heaps after
 	 * pind (including pind itself) will surely satisfy the rquests while
 	 * heaps before pind cannot satisfy the request because usize is
 	 * calculated based on size classes then.  However, when
-	 * sz_limit_usize_gap_enabled() is true, usize is calculated by ceiling
-	 * user requested size to the closest multiple of PAGE.  This means in
-	 * the heap before pind, i.e., pind_prev, there may exist extents able
-	 * to satisfy the request and we should enumerate the heap when
-	 * pind_prev != pind.
+	 * sz_large_size_classes_disabled() is true, usize is calculated by
+	 * ceiling user requested size to the closest multiple of PAGE.  This
+	 * means in the heap before pind, i.e., pind_prev, there may exist
+	 * extents able to satisfy the request and we should enumerate the heap
+	 * when pind_prev != pind.
 	 *
 	 * For example, when PAGE=4KB and the user requested size is 1MB + 4KB,
-	 * usize would be 1.25MB when sz_limit_usize_gap_enabled() is false.
+	 * usize would be 1.25MB when sz_large_size_classes_disabled() is false.
 	 * pind points to the heap containing extents ranging in
 	 * [1.25MB, 1.5MB).  Thus, searching starting from pind will not miss
-	 * any candidates.  When sz_limit_usize_gap_enabled() is true, the
+	 * any candidates.  When sz_large_size_classes_disabled() is true, the
 	 * usize would be 1MB + 4KB and pind still points to the same heap.
 	 * In this case, the heap pind_prev points to, which contains extents
 	 * in the range [1MB, 1.25MB), may contain candidates satisfying the
 	 * usize and thus should be enumerated.
 	 */
 	pszind_t pind_prev = sz_psz2ind(sz_psz_quantize_floor(size));
-	if (sz_limit_usize_gap_enabled() && pind != pind_prev){
+	if (sz_large_size_classes_disabled() && pind != pind_prev){
 		ret = eset_enumerate_search(eset, size, pind_prev,
 		    /* exact_only */ false, &ret_summ);
 	}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 445955b0..360635a8 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -123,7 +123,12 @@ zero_realloc_action_t opt_zero_realloc_action =
 
 atomic_zu_t zero_realloc_count = ATOMIC_INIT(0);
 
-bool opt_limit_usize_gap = true;
+/*
+ * Disable large size classes is now the default behavior in jemalloc.
+ * Although it is configurable in MALLOC_CONF, this is mainly for debugging
+ * purposes and should not be tuned.
+ */
+bool opt_disable_large_size_classes = true;
 
 const char *const zero_realloc_mode_names[] = {
 	"alloc",
@@ -1780,8 +1785,14 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    "san_guard_large", 0, SIZE_T_MAX,
 			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, false)
 
-			CONF_HANDLE_BOOL(opt_limit_usize_gap,
-			    "limit_usize_gap");
+			/*
+			 * Disable large size classes is now the default
+			 * behavior in jemalloc.  Although it is configurable
+			 * in MALLOC_CONF, this is mainly for debugging
+			 * purposes and should not be tuned.
+			 */
+			CONF_HANDLE_BOOL(opt_disable_large_size_classes,
+			    "disable_large_size_classes");
 
 			CONF_ERROR("Invalid conf pair", k, klen, v, vlen);
 #undef CONF_ERROR
@@ -2406,7 +2417,7 @@ aligned_usize_get(size_t size, size_t alignment, size_t *usize, szind_t *ind,
 			if (unlikely(*ind >= SC_NSIZES)) {
 				return true;
 			}
-			*usize = sz_limit_usize_gap_enabled()? sz_s2u(size):
+			*usize = sz_large_size_classes_disabled()? sz_s2u(size):
 			    sz_index2size(*ind);
 			assert(*usize > 0 && *usize <= SC_LARGE_MAXCLASS);
 			return false;
diff --git a/src/pac.c b/src/pac.c
index 12c1e444..e9ba7957 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -143,25 +143,26 @@ pac_alloc_real(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size,
 	}
 
 	/*
-	 * We batched allocate a larger extent when limit_usize_gap is enabled
+	 * We batched allocate a larger extent with large size classes disabled
 	 * because the reuse of extents in the dirty pool is worse without size
-	 * classes for large allocs.  For instance, when limit_usize_gap is not
-	 * enabled, 1.1MB, 1.15MB, and 1.2MB allocs will all be ceiled to
-	 * 1.25MB and can reuse the same buffer if they are alloc & dalloc
-	 * sequentially.  However, with limit_usize_gap enabled, they cannot
-	 * reuse the same buffer and their sequential allocs & dallocs will
-	 * result in three different extents.  Thus, we cache extra mergeable
-	 * extents in the dirty pool to improve the reuse.  We skip this
-	 * optimization if both maps_coalesce and opt_retain are disabled
-	 * because VM is not cheap enough to be used aggressively and extents
-	 * cannot be merged at will (only extents from the same VirtualAlloc
-	 * can be merged).  Note that it could still be risky to cache more
-	 * extents when either mpas_coalesce or opt_retain is enabled.  Yet
-	 * doing so is still beneficial in improving the reuse of extents
-	 * with some limits.  This choice should be reevaluated if
+	 * classes for large allocs.  For instance, when
+	 * disable_large_size_classes is false, 1.1MB, 1.15MB, and 1.2MB allocs
+	 * will all be ceiled to 1.25MB and can reuse the same buffer if they
+	 * are alloc & dalloc sequentially.  However, with
+	 * disable_large_size_classes being true, they cannot reuse the same
+	 * buffer and their sequential allocs & dallocs will result in three
+	 * different extents.  Thus, we cache extra mergeable extents in the
+	 * dirty pool to improve the reuse.  We skip this optimization if both
+	 * maps_coalesce and opt_retain are disabled because VM is not cheap
+	 * enough in such cases to be used aggressively and extents cannot be
+	 * merged at will (only extents from the same VirtualAlloc can be
+	 * merged).  Note that it could still be risky to cache more extents
+	 * when either mpas_coalesce or opt_retain is enabled.  Yet doing
+	 * so is still beneficial in improving the reuse of extents with some
+	 * limits.  This choice should be reevaluated if
 	 * pac_alloc_retained_batched_size is changed to be more aggressive.
 	 */
-	if (sz_limit_usize_gap_enabled() && edata == NULL &&
+	if (sz_large_size_classes_disabled() && edata == NULL &&
 	    (maps_coalesce || opt_retain)) {
 		size_t batched_size = pac_alloc_retained_batched_size(size);
 		/*
diff --git a/src/prof_data.c b/src/prof_data.c
index 437673ee..edc5c558 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -514,7 +514,7 @@ void prof_unbias_map_init(void) {
 #ifdef JEMALLOC_PROF
 	for (szind_t i = 0; i < SC_NSIZES; i++) {
 		/*
-		 * When limit_usize_gap is enabled, the unbiased calculation
+		 * With large size classes disabled, the unbiased calculation
 		 * here is not as accurate as it was because usize now changes
 		 * in a finer grain while the unbiased_sz is still calculated
 		 * using the old way.
diff --git a/src/psset.c b/src/psset.c
index e617f426..97694301 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -368,7 +368,7 @@ psset_pick_alloc(psset_t *psset, size_t size) {
 
 	/* See comments in eset_first_fit for why we enumerate search below. */
 	pszind_t pind_prev = sz_psz2ind(sz_psz_quantize_floor(size));
-	if (sz_limit_usize_gap_enabled() && pind_prev < min_pind) {
+	if (sz_large_size_classes_disabled() && pind_prev < min_pind) {
 		ps = psset_enumerate_search(psset, pind_prev, size);
 		if (ps != NULL) {
 			return ps;
diff --git a/src/sec.c b/src/sec.c
index 8827d1bd..67585a71 100644
--- a/src/sec.c
+++ b/src/sec.c
@@ -29,7 +29,7 @@ sec_init(tsdn_t *tsdn, sec_t *sec, base_t *base, pai_t *fallback,
 	 * USIZE_GROW_SLOW_THRESHOLD because the usize above this increases
 	 * by PAGE and the number of usizes is too large.
 	 */
-	assert(!sz_limit_usize_gap_enabled() ||
+	assert(!sz_large_size_classes_disabled() ||
 	    opts->max_alloc <= USIZE_GROW_SLOW_THRESHOLD);
 
 	size_t max_alloc = PAGE_FLOOR(opts->max_alloc);
diff --git a/src/stats.c b/src/stats.c
index db9b9f43..d3127483 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1730,7 +1730,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_CHAR_P("stats_interval_opts")
 	OPT_WRITE_CHAR_P("zero_realloc")
 	OPT_WRITE_SIZE_T("process_madvise_max_batch")
-	OPT_WRITE_BOOL("limit_usize_gap")
+	OPT_WRITE_BOOL("disable_large_size_classes")
 
 	emitter_dict_end(emitter); /* Close "opt". */
 
diff --git a/test/test.sh.in b/test/test.sh.in
index a4ee9396..dc13bc28 100644
--- a/test/test.sh.in
+++ b/test/test.sh.in
@@ -43,7 +43,7 @@ for t in $@; do
     # per test shell script to ignore the @JEMALLOC_CPREFIX@ detail).
     enable_fill=@enable_fill@ \
     enable_prof=@enable_prof@ \
-    limit_usize_gap=@limit_usize_gap@ \
+    disable_large_size_classes=@disable_large_size_classes@ \
     . @srcroot@${t}.sh && \
     export_malloc_conf && \
     $JEMALLOC_TEST_PREFIX ${t}@exe@ @abs_srcroot@ @abs_objroot@
diff --git a/test/unit/arena_decay.c b/test/unit/arena_decay.c
index 00a38326..177ba505 100644
--- a/test/unit/arena_decay.c
+++ b/test/unit/arena_decay.c
@@ -411,11 +411,11 @@ TEST_BEGIN(test_decay_never) {
 	size_t pdirty_prev = get_arena_pdirty(arena_ind);
 	size_t pmuzzy_prev = get_arena_pmuzzy(arena_ind);
 	/*
-	 * With limit_usize_gap enabled, some more extents
+	 * With sz_large_size_classes_disabled() = true, some more extents
 	 * are cached in the dirty pool, making the assumption below
 	 * not true.
 	 */
-	if (!sz_limit_usize_gap_enabled()) {
+	if (!sz_large_size_classes_disabled()) {
 		expect_zu_eq(pdirty_prev, 0, "Unexpected dirty pages");
 	}
 	expect_zu_eq(pmuzzy_prev, 0, "Unexpected muzzy pages");
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 7d4634e8..cf9b88aa 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -333,7 +333,7 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(bool, prof_sys_thread_name, prof);
 	TEST_MALLCTL_OPT(ssize_t, lg_san_uaf_align, uaf_detection);
 	TEST_MALLCTL_OPT(unsigned, debug_double_free_max_scan, always);
-	TEST_MALLCTL_OPT(bool, limit_usize_gap, always);
+	TEST_MALLCTL_OPT(bool, disable_large_size_classes, always);
 	TEST_MALLCTL_OPT(size_t, process_madvise_max_batch, always);
 
 #undef TEST_MALLCTL_OPT
diff --git a/test/unit/size_classes.c b/test/unit/size_classes.c
index 24913803..c373829c 100644
--- a/test/unit/size_classes.c
+++ b/test/unit/size_classes.c
@@ -26,7 +26,7 @@ TEST_BEGIN(test_size_classes) {
 	size_t size_class, max_size_class;
 	szind_t index, gen_index, max_index;
 
-	max_size_class = sz_limit_usize_gap_enabled()? SC_SMALL_MAXCLASS:
+	max_size_class = sz_large_size_classes_disabled()? SC_SMALL_MAXCLASS:
 	    get_max_size_class();
 	max_index = sz_size2index(max_size_class);
 
@@ -81,7 +81,7 @@ TEST_BEGIN(test_size_classes) {
 TEST_END
 
 TEST_BEGIN(test_grow_slow_size_classes) {
-	test_skip_if(!sz_limit_usize_gap_enabled());
+	test_skip_if(!sz_large_size_classes_disabled());
 
 	size_t size = SC_LARGE_MINCLASS;
 	size_t target_usize = SC_LARGE_MINCLASS;
diff --git a/test/unit/size_classes.sh b/test/unit/size_classes.sh
index 93d5e8d1..54363554 100644
--- a/test/unit/size_classes.sh
+++ b/test/unit/size_classes.sh
@@ -1,5 +1,3 @@
 #!/bin/sh
 
-if [ "x${limit_usize_gap}" = "x1" ] ; then
-  export MALLOC_CONF="limit_usize_gap:true"
-fi
+export MALLOC_CONF="disable_large_size_classes:true"

From 37bf846cc38345947ff644bf47d7d51126353c09 Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@meta.com>
Date: Fri, 2 May 2025 15:58:27 -0700
Subject: [PATCH 285/395] Fixes to prevent static analysis warnings.

---
 include/jemalloc/internal/hpa.h | 2 +-
 include/jemalloc/internal/sz.h  | 2 +-
 src/eset.c                      | 4 ++--
 src/hpa.c                       | 2 +-
 src/pac.c                       | 2 +-
 src/prof_sys.c                  | 2 +-
 src/psset.c                     | 2 +-
 7 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index a384d04a..117c1c20 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -153,7 +153,7 @@ struct hpa_shard_s {
 	peak_demand_t peak_demand;
 };
 
-bool hpa_hugepage_size_exceeds_limit();
+bool hpa_hugepage_size_exceeds_limit(void);
 /*
  * Whether or not the HPA can be used given the current configuration.  This is
  * is not necessarily a guarantee that it backs its allocations by hugepages,
diff --git a/include/jemalloc/internal/sz.h b/include/jemalloc/internal/sz.h
index e6cfa6a9..3a32e232 100644
--- a/include/jemalloc/internal/sz.h
+++ b/include/jemalloc/internal/sz.h
@@ -55,7 +55,7 @@ extern size_t sz_large_pad;
 extern void sz_boot(const sc_data_t *sc_data, bool cache_oblivious);
 
 JEMALLOC_ALWAYS_INLINE bool
-sz_large_size_classes_disabled() {
+sz_large_size_classes_disabled(void) {
 	return opt_disable_large_size_classes;
 }
 
diff --git a/src/eset.c b/src/eset.c
index 677162ff..b4666e2c 100644
--- a/src/eset.c
+++ b/src/eset.c
@@ -155,7 +155,7 @@ eset_remove(eset_t *eset, edata_t *edata) {
 	    cur_extents_npages - (size >> LG_PAGE), ATOMIC_RELAXED);
 }
 
-edata_t *
+static edata_t *
 eset_enumerate_alignment_search(eset_t *eset, size_t size, pszind_t bin_ind,
     size_t alignment) {
 	if (edata_heap_empty(&eset->bins[bin_ind].heap)) {
@@ -191,7 +191,7 @@ eset_enumerate_alignment_search(eset_t *eset, size_t size, pszind_t bin_ind,
 	return NULL;
 }
 
-edata_t *
+static edata_t *
 eset_enumerate_search(eset_t *eset, size_t size, pszind_t bin_ind,
     bool exact_only, edata_cmp_summary_t *ret_summ) {
 	if (edata_heap_empty(&eset->bins[bin_ind].heap)) {
diff --git a/src/hpa.c b/src/hpa.c
index 50614e42..9b7ff744 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -26,7 +26,7 @@ static void hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self,
 static uint64_t hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self);
 
 bool
-hpa_hugepage_size_exceeds_limit() {
+hpa_hugepage_size_exceeds_limit(void) {
 	return HUGEPAGE > HUGEPAGE_MAX_EXPECTED_SIZE;
 }
 
diff --git a/src/pac.c b/src/pac.c
index e9ba7957..0e435717 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -112,7 +112,7 @@ pac_may_have_muzzy(pac_t *pac) {
 	return pac_decay_ms_get(pac, extent_state_muzzy) != 0;
 }
 
-size_t pac_alloc_retained_batched_size(size_t size) {
+static size_t pac_alloc_retained_batched_size(size_t size) {
 	if (size > SC_LARGE_MAXCLASS) {
 		/*
 		 * A valid input with usize SC_LARGE_MAXCLASS could still
diff --git a/src/prof_sys.c b/src/prof_sys.c
index 642d8c89..e3b7bbcb 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -586,7 +586,7 @@ prof_getpid(void) {
 }
 
 static long
-prof_get_pid_namespace() {
+prof_get_pid_namespace(void) {
 	long ret = 0;
 
 #if defined(_WIN32) || defined(__APPLE__)
diff --git a/src/psset.c b/src/psset.c
index 97694301..afe9f1c1 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -337,7 +337,7 @@ psset_update_end(psset_t *psset, hpdata_t *ps) {
 	hpdata_assert_consistent(ps);
 }
 
-hpdata_t *
+static hpdata_t *
 psset_enumerate_search(psset_t *psset, pszind_t pind, size_t size) {
 	if (hpdata_age_heap_empty(&psset->pageslabs[pind])) {
 		return NULL;

From 3c14707b016b156c5f86dfd21304b01161c40750 Mon Sep 17 00:00:00 2001
From: Jiebin Sun <jiebin.sun@intel.com>
Date: Fri, 25 Apr 2025 02:04:05 -0700
Subject: [PATCH 286/395] To improve reuse efficiency, the maximum coalesced
 size for large extents in the dirty ecache has been limited. This patch was
 tested with real workloads using ClickHouse (Clickbench Q35) on a system with
 2x240 vCPUs. The results showed a 2X in query per second (QPS) performance
 and a reduction in page faults to 29% of the previous rate. Additionally,
 microbenchmark testing involved 256 memory reallocations resizing from 4KB to
 16KB in one arena, which demonstrated a 5X performance improvement.

Signed-off-by: Jiebin Sun <jiebin.sun@intel.com>
---
 src/extent.c | 40 +++++++++++++++++++++++++++++++++-------
 1 file changed, 33 insertions(+), 7 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index 3425e1ce..03a3fdd8 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -888,7 +888,7 @@ extent_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 
 static edata_t *
 extent_try_coalesce_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    ecache_t *ecache, edata_t *edata, bool *coalesced) {
+    ecache_t *ecache, edata_t *edata, size_t max_size, bool *coalesced) {
 	assert(!edata_guarded_get(edata));
 	assert(coalesced != NULL);
 	*coalesced = false;
@@ -908,7 +908,8 @@ extent_try_coalesce_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		/* Try to coalesce forward. */
 		edata_t *next = emap_try_acquire_edata_neighbor(tsdn, pac->emap,
 		    edata, EXTENT_PAI_PAC, ecache->state, /* forward */ true);
-		if (next != NULL) {
+		size_t max_next_neighbor = max_size > edata_size_get(edata) ?  max_size - edata_size_get(edata) : 0;
+		if (next != NULL && edata_size_get(next) <= max_next_neighbor) {
 			if (!extent_coalesce(tsdn, pac, ehooks, ecache, edata,
 			    next, true)) {
 				if (ecache->delay_coalesce) {
@@ -923,7 +924,8 @@ extent_try_coalesce_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		/* Try to coalesce backward. */
 		edata_t *prev = emap_try_acquire_edata_neighbor(tsdn, pac->emap,
 		    edata, EXTENT_PAI_PAC, ecache->state, /* forward */ false);
-		if (prev != NULL) {
+		size_t max_prev_neighbor = max_size > edata_size_get(edata) ?  max_size - edata_size_get(edata) : 0;
+		if (prev != NULL && edata_size_get(prev) <= max_prev_neighbor) {
 			if (!extent_coalesce(tsdn, pac, ehooks, ecache, edata,
 			    prev, false)) {
 				edata = prev;
@@ -947,14 +949,14 @@ static edata_t *
 extent_try_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, bool *coalesced) {
 	return extent_try_coalesce_impl(tsdn, pac, ehooks, ecache, edata,
-	    coalesced);
+	    SC_LARGE_MAXCLASS, coalesced);
 }
 
 static edata_t *
 extent_try_coalesce_large(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    ecache_t *ecache, edata_t *edata, bool *coalesced) {
+    ecache_t *ecache, edata_t *edata, size_t max_size, bool *coalesced) {
 	return extent_try_coalesce_impl(tsdn, pac, ehooks, ecache, edata,
-	    coalesced);
+	    max_size, coalesced);
 }
 
 /* Purge a single extent to retained / unmapped directly. */
@@ -1004,11 +1006,35 @@ extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 	} else if (edata_size_get(edata) >= SC_LARGE_MINCLASS) {
 		assert(ecache == &pac->ecache_dirty);
 		/* Always coalesce large extents eagerly. */
+		/**
+		* Maximum size limit (max_size) for large extents waiting to be coalesced
+		* in dirty ecache.
+		*
+		* When set to a non-zero value, this parameter restricts the maximum size
+		* of large extents after coalescing. If the combined size of two extents
+		* would exceed this threshold, the coalescing operation is skipped.
+		*
+		* This improves dirty ecache reuse efficiency by:
+		* - Maintaining appropriately sized extents that match common allocation requests
+		* - Limiting large extent coalescence to prevent overly large extents that are
+		*   less likely to be reused efficiently
+		* - Setting lg_max_coalesce for large extent merging scenarios, similar to how
+		*   lg_max_fit is used during extent reuse
+		*
+		* Note that during extent decay/purge operations, no coalescing restrictions
+		* are applied to dirty ecache despite the delay_coalesce setting. This ensures
+		* that while improving dirty ecache reuse efficiency, we don't compromise
+		* the final coalescing that happens during the transition from dirty ecache
+		* to muzzy/retained ecache states.
+		*/
+		unsigned lg_max_coalesce = (unsigned)opt_lg_extent_max_active_fit;
+		size_t edata_size = edata_size_get(edata);
+		size_t max_size = (SC_LARGE_MAXCLASS >> lg_max_coalesce) > edata_size ? (edata_size << lg_max_coalesce) : SC_LARGE_MAXCLASS;
 		bool coalesced;
 		do {
 			assert(edata_state_get(edata) == extent_state_active);
 			edata = extent_try_coalesce_large(tsdn, pac, ehooks,
-			    ecache, edata, &coalesced);
+			    ecache, edata, max_size, &coalesced);
 		} while (coalesced);
 		if (edata_size_get(edata) >=
 		    atomic_load_zu(&pac->oversize_threshold, ATOMIC_RELAXED)

From 3cee771cfa1d3b0df5bab166fdcb654e60cca9bd Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@meta.com>
Date: Thu, 17 Apr 2025 15:29:34 -0700
Subject: [PATCH 287/395] Modify .clang-format to make it more aligned with
 current freebsd style

---
 .clang-format | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/.clang-format b/.clang-format
index 719c03c5..a890af4d 100644
--- a/.clang-format
+++ b/.clang-format
@@ -20,16 +20,16 @@ AlwaysBreakBeforeMultilineStrings: true
 BinPackArguments: true
 BinPackParameters: true
 BraceWrapping:
-  AfterClass: false
-  AfterControlStatement: false
-  AfterEnum: false
-  AfterFunction: false
-  AfterNamespace: false
-  AfterObjCDeclaration: false
-  AfterStruct: false
-  AfterUnion: false
-  BeforeCatch: false
-  BeforeElse: false
+  AfterClass: true
+  AfterControlStatement: true
+  AfterEnum: true
+  AfterFunction: true
+  AfterNamespace: true
+  AfterObjCDeclaration: true
+  AfterStruct: true
+  AfterUnion: true
+  BeforeCatch: true
+  BeforeElse: true
   IndentBraces: false
 # BreakAfterJavaFieldAnnotations: true
 BreakBeforeBinaryOperators: NonAssignment
@@ -43,7 +43,7 @@ ColumnLimit: 80
 # CompactNamespaces: true
 # ConstructorInitializerAllOnOneLineOrOnePerLine: true
 # ConstructorInitializerIndentWidth: 4
-ContinuationIndentWidth: 2
+ContinuationIndentWidth: 4
 Cpp11BracedListStyle: true
 DerivePointerAlignment: false
 DisableFormat:   false
@@ -57,7 +57,7 @@ ForEachMacros:   [ ql_foreach, qr_foreach, ]
 # IncludeIsMainRegex: ''
 IndentCaseLabels: false
 IndentPPDirectives: AfterHash
-IndentWidth: 4
+IndentWidth: 8
 IndentWrappedFunctionNames: false
 # JavaImportGroups: []
 # JavaScriptQuotes: Leave
@@ -73,8 +73,8 @@ MaxEmptyLinesToKeep: 1
 # ObjCSpaceAfterProperty: false
 # ObjCSpaceBeforeProtocolList: false
 
-PenaltyBreakAssignment: 2
-PenaltyBreakBeforeFirstCallParameter: 1
+PenaltyBreakAssignment: 100
+PenaltyBreakBeforeFirstCallParameter: 100
 PenaltyBreakComment: 300
 PenaltyBreakFirstLessLess: 120
 PenaltyBreakString: 1000
@@ -96,7 +96,7 @@ PointerAlignment: Right
 #         - 'cpp'
 #       BasedOnStyle: llvm
 #       CanonicalDelimiter: 'cc'
-ReflowComments: true
+ReflowComments: false
 SortIncludes: false
 SpaceAfterCStyleCast: false
 # SpaceAfterTemplateKeyword: true
@@ -107,7 +107,7 @@ SpaceBeforeAssignmentOperators: true
 SpaceBeforeParens: ControlStatements
 # SpaceBeforeRangeBasedForLoopColon: true
 SpaceInEmptyParentheses: false
-SpacesBeforeTrailingComments: 2
+SpacesBeforeTrailingComments: 1
 SpacesInAngles:  false
 SpacesInCStyleCastParentheses: false
 # SpacesInContainerLiterals: false
@@ -118,5 +118,5 @@ SpacesInSquareBrackets: false
 # used by some of the core jemalloc developers.
 # StatementMacros: []
 TabWidth: 8
-UseTab: Never
+UseTab: ForIndentation
 ...

From 554185356bf990155df8d72060c4efe993642baf Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@meta.com>
Date: Fri, 18 Apr 2025 11:45:57 -0700
Subject: [PATCH 288/395] Sample format on tcache_max test

---
 test/unit/tcache_max.c | 151 +++++++++++++++++++----------------------
 1 file changed, 71 insertions(+), 80 deletions(-)

diff --git a/test/unit/tcache_max.c b/test/unit/tcache_max.c
index a64fca71..884ee7fe 100644
--- a/test/unit/tcache_max.c
+++ b/test/unit/tcache_max.c
@@ -3,12 +3,7 @@
 
 const char *malloc_conf = TEST_SAN_UAF_ALIGN_DISABLE;
 
-enum {
-	alloc_option_start = 0,
-	use_malloc = 0,
-	use_mallocx,
-	alloc_option_end
-};
+enum { alloc_option_start = 0, use_malloc = 0, use_mallocx, alloc_option_end };
 
 enum {
 	dalloc_option_start = 0,
@@ -59,14 +54,15 @@ dalloc_func(void *ptr, size_t sz, unsigned dalloc_option) {
 static size_t
 tcache_bytes_read_global(void) {
 	uint64_t epoch;
-	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch,
-	    sizeof(epoch)), 0, "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+	    0, "Unexpected mallctl() failure");
 
 	size_t tcache_bytes;
 	size_t sz = sizeof(tcache_bytes);
-	assert_d_eq(mallctl(
-	    "stats.arenas." STRINGIFY(MALLCTL_ARENAS_ALL) ".tcache_bytes",
-	    &tcache_bytes, &sz, NULL, 0), 0, "Unexpected mallctl failure");
+	assert_d_eq(mallctl("stats.arenas." STRINGIFY(
+	                        MALLCTL_ARENAS_ALL) ".tcache_bytes",
+	                &tcache_bytes, &sz, NULL, 0),
+	    0, "Unexpected mallctl failure");
 
 	return tcache_bytes;
 }
@@ -88,8 +84,8 @@ tcache_bytes_read_local(void) {
 }
 static void
 tcache_bytes_check_update(size_t *prev, ssize_t diff) {
-	size_t tcache_bytes = global_test ? tcache_bytes_read_global():
-	    tcache_bytes_read_local();
+	size_t tcache_bytes = global_test ? tcache_bytes_read_global()
+	                                  : tcache_bytes_read_local();
 	expect_zu_eq(tcache_bytes, *prev + diff, "tcache bytes not expected");
 	*prev += diff;
 }
@@ -108,8 +104,8 @@ test_tcache_bytes_alloc(size_t alloc_size, size_t tcache_max,
 	void *ptr1 = alloc_func(alloc_size, alloc_option);
 	void *ptr2 = alloc_func(alloc_size, alloc_option);
 
-	size_t bytes = global_test ? tcache_bytes_read_global() :
-	    tcache_bytes_read_local();
+	size_t bytes = global_test ? tcache_bytes_read_global()
+	                           : tcache_bytes_read_local();
 	dalloc_func(ptr2, alloc_size, dalloc_option);
 	/* Expect tcache_bytes increase after dalloc */
 	tcache_bytes_check_update(&bytes, diff);
@@ -139,48 +135,48 @@ test_tcache_bytes_alloc(size_t alloc_size, size_t tcache_max,
 }
 
 static void
-test_tcache_max_impl(size_t target_tcache_max, unsigned alloc_option,
-    unsigned dalloc_option) {
+test_tcache_max_impl(
+    size_t target_tcache_max, unsigned alloc_option, unsigned dalloc_option) {
 	size_t tcache_max, sz;
 	sz = sizeof(tcache_max);
 	if (global_test) {
 		assert_d_eq(mallctl("arenas.tcache_max", (void *)&tcache_max,
-		    &sz, NULL, 0), 0, "Unexpected mallctl() failure");
+		                &sz, NULL, 0),
+		    0, "Unexpected mallctl() failure");
 		expect_zu_eq(tcache_max, target_tcache_max,
 		    "Global tcache_max not expected");
 	} else {
-		assert_d_eq(mallctl("thread.tcache.max",
-		    (void *)&tcache_max, &sz, NULL,.0), 0,
-		    "Unexpected.mallctl().failure");
+		assert_d_eq(mallctl("thread.tcache.max", (void *)&tcache_max,
+		                &sz, NULL, .0),
+		    0, "Unexpected.mallctl().failure");
 		expect_zu_eq(tcache_max, target_tcache_max,
 		    "Current thread's tcache_max not expected");
 	}
 	test_tcache_bytes_alloc(1, tcache_max, alloc_option, dalloc_option);
-	test_tcache_bytes_alloc(tcache_max - 1, tcache_max, alloc_option,
-	    dalloc_option);
-	test_tcache_bytes_alloc(tcache_max, tcache_max, alloc_option,
-	    dalloc_option);
-	test_tcache_bytes_alloc(tcache_max + 1, tcache_max, alloc_option,
-	    dalloc_option);
+	test_tcache_bytes_alloc(
+	    tcache_max - 1, tcache_max, alloc_option, dalloc_option);
+	test_tcache_bytes_alloc(
+	    tcache_max, tcache_max, alloc_option, dalloc_option);
+	test_tcache_bytes_alloc(
+	    tcache_max + 1, tcache_max, alloc_option, dalloc_option);
 
-	test_tcache_bytes_alloc(PAGE - 1, tcache_max, alloc_option,
-	    dalloc_option);
-	test_tcache_bytes_alloc(PAGE, tcache_max, alloc_option,
-	    dalloc_option);
-	test_tcache_bytes_alloc(PAGE + 1, tcache_max, alloc_option,
-	    dalloc_option);
+	test_tcache_bytes_alloc(
+	    PAGE - 1, tcache_max, alloc_option, dalloc_option);
+	test_tcache_bytes_alloc(PAGE, tcache_max, alloc_option, dalloc_option);
+	test_tcache_bytes_alloc(
+	    PAGE + 1, tcache_max, alloc_option, dalloc_option);
 
 	size_t large;
 	sz = sizeof(large);
-	assert_d_eq(mallctl("arenas.lextent.0.size", (void *)&large, &sz, NULL,
-	    0), 0, "Unexpected mallctl() failure");
+	assert_d_eq(
+	    mallctl("arenas.lextent.0.size", (void *)&large, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
 
-	test_tcache_bytes_alloc(large - 1, tcache_max, alloc_option,
-	    dalloc_option);
-	test_tcache_bytes_alloc(large, tcache_max, alloc_option,
-	    dalloc_option);
-	test_tcache_bytes_alloc(large + 1, tcache_max, alloc_option,
-	    dalloc_option);
+	test_tcache_bytes_alloc(
+	    large - 1, tcache_max, alloc_option, dalloc_option);
+	test_tcache_bytes_alloc(large, tcache_max, alloc_option, dalloc_option);
+	test_tcache_bytes_alloc(
+	    large + 1, tcache_max, alloc_option, dalloc_option);
 }
 
 TEST_BEGIN(test_tcache_max) {
@@ -193,19 +189,17 @@ TEST_BEGIN(test_tcache_max) {
 	size_t sz = sizeof(arena_ind);
 	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
 	    0, "Unexpected mallctl() failure");
-	expect_d_eq(mallctl("thread.arena", NULL, NULL, &arena_ind,
-	    sizeof(arena_ind)), 0, "Unexpected mallctl() failure");
+	expect_d_eq(
+	    mallctl("thread.arena", NULL, NULL, &arena_ind, sizeof(arena_ind)),
+	    0, "Unexpected mallctl() failure");
 
 	global_test = true;
-	for (alloc_option = alloc_option_start;
-	     alloc_option < alloc_option_end;
+	for (alloc_option = alloc_option_start; alloc_option < alloc_option_end;
 	     alloc_option++) {
 		for (dalloc_option = dalloc_option_start;
-		     dalloc_option < dalloc_option_end;
-		     dalloc_option++) {
+		     dalloc_option < dalloc_option_end; dalloc_option++) {
 			/* opt.tcache_max set to 1024 in tcache_max.sh. */
-			test_tcache_max_impl(1024, alloc_option,
-			    dalloc_option);
+			test_tcache_max_impl(1024, alloc_option, dalloc_option);
 		}
 	}
 	global_test = false;
@@ -229,8 +223,9 @@ validate_tcache_stack(tcache_t *tcache) {
 	bool found = false;
 	do {
 		base_block_t *block = next;
-		if ((byte_t *)tcache_stack >= (byte_t *)block &&
-		    (byte_t *)tcache_stack < ((byte_t *)block + block->size)) {
+		if ((byte_t *)tcache_stack >= (byte_t *)block
+		    && (byte_t *)tcache_stack
+		        < ((byte_t *)block + block->size)) {
 			found = true;
 			break;
 		}
@@ -271,42 +266,42 @@ tcache_check(void *arg) {
 	bool e0 = false, e1;
 	size_t bool_sz = sizeof(bool);
 	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e1, &bool_sz,
-	    (void *)&e0, bool_sz), 0, "Unexpected mallctl() error");
+	                (void *)&e0, bool_sz),
+	    0, "Unexpected mallctl() error");
 	expect_true(e1, "Unexpected previous tcache state");
 
 	size_t temp_tcache_max = TCACHE_MAXCLASS_LIMIT - 1;
-	assert_d_eq(mallctl("thread.tcache.max",
-	    NULL, NULL, (void *)&temp_tcache_max, sz),.0,
-	    "Unexpected.mallctl().failure");
+	assert_d_eq(mallctl("thread.tcache.max", NULL, NULL,
+	                (void *)&temp_tcache_max, sz),
+	    .0, "Unexpected.mallctl().failure");
 	old_tcache_max = tcache_max_get(tcache_slow);
 	expect_zu_eq(old_tcache_max, TCACHE_MAXCLASS_LIMIT,
 	    "Unexpected value for tcache_max");
 	tcache_nbins = tcache_nbins_get(tcache_slow);
 	expect_zu_eq(tcache_nbins, TCACHE_NBINS_MAX,
 	    "Unexpected value for tcache_nbins");
-	assert_d_eq(mallctl("thread.tcache.max",
-	    (void *)&old_tcache_max, &sz,
-	    (void *)&min_tcache_max, sz),.0,
-	    "Unexpected.mallctl().failure");
+	assert_d_eq(mallctl("thread.tcache.max", (void *)&old_tcache_max, &sz,
+	                (void *)&min_tcache_max, sz),
+	    .0, "Unexpected.mallctl().failure");
 	expect_zu_eq(old_tcache_max, TCACHE_MAXCLASS_LIMIT,
 	    "Unexpected value for tcache_max");
 
 	/* Enable tcache, the set should still be valid. */
 	e0 = true;
 	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e1, &bool_sz,
-	    (void *)&e0, bool_sz), 0, "Unexpected mallctl() error");
+	                (void *)&e0, bool_sz),
+	    0, "Unexpected mallctl() error");
 	expect_false(e1, "Unexpected previous tcache state");
 	min_tcache_max = sz_s2u(min_tcache_max);
 	expect_zu_eq(tcache_max_get(tcache_slow), min_tcache_max,
 	    "Unexpected value for tcache_max");
 	expect_zu_eq(tcache_nbins_get(tcache_slow),
 	    tcache_max2nbins(min_tcache_max), "Unexpected value for nbins");
-	assert_d_eq(mallctl("thread.tcache.max",
-	    (void *)&old_tcache_max, &sz,
-	    (void *)&new_tcache_max, sz),.0,
-	    "Unexpected.mallctl().failure");
-	expect_zu_eq(old_tcache_max, min_tcache_max,
-	    "Unexpected value for tcache_max");
+	assert_d_eq(mallctl("thread.tcache.max", (void *)&old_tcache_max, &sz,
+	                (void *)&new_tcache_max, sz),
+	    .0, "Unexpected.mallctl().failure");
+	expect_zu_eq(
+	    old_tcache_max, min_tcache_max, "Unexpected value for tcache_max");
 	validate_tcache_stack(tcache);
 
 	/*
@@ -317,19 +312,17 @@ tcache_check(void *arg) {
 		new_tcache_max = TCACHE_MAXCLASS_LIMIT;
 	}
 	old_tcache_max = tcache_max_get(tcache_slow);
-	expect_zu_eq(old_tcache_max, new_tcache_max,
-	    "Unexpected value for tcache_max");
+	expect_zu_eq(
+	    old_tcache_max, new_tcache_max, "Unexpected value for tcache_max");
 	tcache_nbins = tcache_nbins_get(tcache_slow);
 	expect_zu_eq(tcache_nbins, tcache_max2nbins(new_tcache_max),
 	    "Unexpected value for tcache_nbins");
 	for (unsigned alloc_option = alloc_option_start;
-	     alloc_option < alloc_option_end;
-	     alloc_option++) {
+	     alloc_option < alloc_option_end; alloc_option++) {
 		for (unsigned dalloc_option = dalloc_option_start;
-		     dalloc_option < dalloc_option_end;
-		     dalloc_option++) {
-			test_tcache_max_impl(new_tcache_max,
-			    alloc_option, dalloc_option);
+		     dalloc_option < dalloc_option_end; dalloc_option++) {
+			test_tcache_max_impl(
+			    new_tcache_max, alloc_option, dalloc_option);
 		}
 		validate_tcache_stack(tcache);
 	}
@@ -348,14 +341,14 @@ TEST_BEGIN(test_thread_tcache_max) {
 	VARIABLE_ARRAY(thd_t, threads, nthreads);
 	VARIABLE_ARRAY(size_t, all_threads_tcache_max, nthreads);
 	for (unsigned i = 0; i < nthreads; i++) {
-		all_threads_tcache_max[i] = 1024 * (1<<((i + 10) % 20));
+		all_threads_tcache_max[i] = 1024 * (1 << ((i + 10) % 20));
 		if (i == nthreads - 1) {
 			all_threads_tcache_max[i] = UINT_MAX;
 		}
 	}
 	for (unsigned i = 0; i < nthreads; i++) {
-		thd_create(&threads[i], tcache_check,
-		    &(all_threads_tcache_max[i]));
+		thd_create(
+		    &threads[i], tcache_check, &(all_threads_tcache_max[i]));
 	}
 	for (unsigned i = 0; i < nthreads; i++) {
 		thd_join(threads[i], NULL);
@@ -365,7 +358,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_tcache_max,
-	    test_thread_tcache_max);
+	return test(test_tcache_max, test_thread_tcache_max);
 }

From b6338c4ff6cd1a95b4f956a9028df420ad6f4761 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Mon, 19 May 2025 15:17:35 -0700
Subject: [PATCH 289/395] EASY - be explicit in non-vectorized hpa tests

---
 test/unit/hpa.sh | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 test/unit/hpa.sh

diff --git a/test/unit/hpa.sh b/test/unit/hpa.sh
new file mode 100644
index 00000000..fe0e0b67
--- /dev/null
+++ b/test/unit/hpa.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+export MALLOC_CONF="process_madvise_max_batch:0"

From f19a569216e829fcd646191277374387e8291c62 Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@meta.com>
Date: Mon, 19 May 2025 15:15:49 -0700
Subject: [PATCH 290/395] Ignore formatting commit in blame.

---
 .git-blame-ignore-revs | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 .git-blame-ignore-revs

diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
new file mode 100644
index 00000000..365e8bb1
--- /dev/null
+++ b/.git-blame-ignore-revs
@@ -0,0 +1 @@
+554185356bf990155df8d72060c4efe993642baf

From 9169e9272a9fb123702e04c77ff5326f29818f70 Mon Sep 17 00:00:00 2001
From: Xin Yang <yangxin.dev@bytedance.com>
Date: Tue, 13 May 2025 11:01:36 +0800
Subject: [PATCH 291/395] Fix: Adjust CACHE_BIN_NFLUSH_BATCH_MAX size to
 prevent assert failures

The maximum allowed value for `nflush_batch` is
`CACHE_BIN_NFLUSH_BATCH_MAX`. However, `tcache_bin_flush_impl_small`
could potentially declare an array of `emap_batch_lookup_result_t`
of size `CACHE_BIN_NFLUSH_BATCH_MAX + 1`. leads to a `VARIABLE_ARRAY`
assertion failure, observed when `tcache_nslots_small_max` is
configured to 2048. This patch ensures the array size does not exceed
the allowed maximum.

Signed-off-by: Xin Yang <yangxin.dev@bytedance.com>
---
 include/jemalloc/internal/cache_bin.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index cb137af9..3ca7e287 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -616,7 +616,7 @@ struct cache_bin_fill_ctl_s {
  * This is to avoid stack overflow when we do batch edata look up, which
  * reserves a nflush * sizeof(emap_batch_lookup_result_t) stack variable.
  */
-#define CACHE_BIN_NFLUSH_BATCH_MAX (VARIABLE_ARRAY_SIZE_MAX >> LG_SIZEOF_PTR)
+#define CACHE_BIN_NFLUSH_BATCH_MAX ((VARIABLE_ARRAY_SIZE_MAX >> LG_SIZEOF_PTR) - 1)
 
 /*
  * Filling and flushing are done in batch, on arrays of void *s.  For filling,

From 5e460bfea25c39d9bf8ea0077c3b6740e9515487 Mon Sep 17 00:00:00 2001
From: Xin Yang <yangxin.dev@bytedance.com>
Date: Tue, 13 May 2025 11:25:13 +0800
Subject: [PATCH 292/395] Refactor: use the cache_bin_sz_t typedef instead of
 direct uint16_t

any future changes to the underlying data type for bin sizes
(such as upgrading from `uint16_t` to `uint32_t`) can be achieved
by modifying only the `cache_bin_sz_t` definition.

Signed-off-by: Xin Yang <yangxin.dev@bytedance.com>
---
 include/jemalloc/internal/cache_bin.h | 42 +++++++++++++--------------
 src/cache_bin.c                       | 10 +++----
 2 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 3ca7e287..7ab48dc9 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -108,7 +108,7 @@ struct cache_bin_s {
 	 * Since the stack grows down, this is a higher address than
 	 * low_bits_full.
 	 */
-	uint16_t low_bits_low_water;
+	cache_bin_sz_t low_bits_low_water;
 
 	/*
 	 * The low bits of the value that stack_head will take on when the array
@@ -119,7 +119,7 @@ struct cache_bin_s {
 	 * Recall that since the stack grows down, this is the lowest available
 	 * address in the array for caching.  Only adjusted when stashing items.
 	 */
-	uint16_t low_bits_full;
+	cache_bin_sz_t low_bits_full;
 
 	/*
 	 * The low bits of the value that stack_head will take on when the array
@@ -128,7 +128,7 @@ struct cache_bin_s {
 	 * The stack grows down -- this is one past the highest address in the
 	 * array.  Immutable after initialization.
 	 */
-	uint16_t low_bits_empty;
+	cache_bin_sz_t low_bits_empty;
 
 	/* The maximum number of cached items in the bin. */
 	cache_bin_info_t bin_info;
@@ -222,7 +222,7 @@ cache_bin_ncached_max_get(cache_bin_t *bin) {
  * with later.
  */
 static inline void
-cache_bin_assert_earlier(cache_bin_t *bin, uint16_t earlier, uint16_t later) {
+cache_bin_assert_earlier(cache_bin_t *bin, cache_bin_sz_t earlier, cache_bin_sz_t later) {
 	if (earlier > later) {
 		assert(bin->low_bits_full > bin->low_bits_empty);
 	}
@@ -235,7 +235,7 @@ cache_bin_assert_earlier(cache_bin_t *bin, uint16_t earlier, uint16_t later) {
  * be associated with the position earlier in memory.
  */
 static inline cache_bin_sz_t
-cache_bin_diff(cache_bin_t *bin, uint16_t earlier, uint16_t later) {
+cache_bin_diff(cache_bin_t *bin, cache_bin_sz_t earlier, cache_bin_sz_t later) {
 	cache_bin_assert_earlier(bin, earlier, later);
 	return later - earlier;
 }
@@ -246,7 +246,7 @@ cache_bin_diff(cache_bin_t *bin, uint16_t earlier, uint16_t later) {
 static inline cache_bin_sz_t
 cache_bin_ncached_get_internal(cache_bin_t *bin) {
 	cache_bin_sz_t diff = cache_bin_diff(bin,
-	    (uint16_t)(uintptr_t)bin->stack_head, bin->low_bits_empty);
+	    (cache_bin_sz_t)(uintptr_t)bin->stack_head, bin->low_bits_empty);
 	cache_bin_sz_t n = diff / sizeof(void *);
 	/*
 	 * We have undefined behavior here; if this function is called from the
@@ -284,7 +284,7 @@ cache_bin_ncached_get_local(cache_bin_t *bin) {
 static inline void **
 cache_bin_empty_position_get(cache_bin_t *bin) {
 	cache_bin_sz_t diff = cache_bin_diff(bin,
-	    (uint16_t)(uintptr_t)bin->stack_head, bin->low_bits_empty);
+	    (cache_bin_sz_t)(uintptr_t)bin->stack_head, bin->low_bits_empty);
 	byte_t *empty_bits = (byte_t *)bin->stack_head + diff;
 	void **ret = (void **)empty_bits;
 
@@ -303,9 +303,9 @@ cache_bin_empty_position_get(cache_bin_t *bin) {
  * multithreaded environment. Currently concurrent access happens only during
  * arena statistics collection.
  */
-static inline uint16_t
+static inline cache_bin_sz_t
 cache_bin_low_bits_low_bound_get(cache_bin_t *bin) {
-	return (uint16_t)bin->low_bits_empty -
+	return (cache_bin_sz_t)bin->low_bits_empty -
 	    cache_bin_ncached_max_get(bin) * sizeof(void *);
 }
 
@@ -351,7 +351,7 @@ cache_bin_low_water_get(cache_bin_t *bin) {
 	assert(low_water <= cache_bin_ncached_max_get(bin));
 	assert(low_water <= cache_bin_ncached_get_local(bin));
 
-	cache_bin_assert_earlier(bin, (uint16_t)(uintptr_t)bin->stack_head,
+	cache_bin_assert_earlier(bin, (cache_bin_sz_t)(uintptr_t)bin->stack_head,
 	    bin->low_bits_low_water);
 
 	return low_water;
@@ -364,7 +364,7 @@ cache_bin_low_water_get(cache_bin_t *bin) {
 static inline void
 cache_bin_low_water_set(cache_bin_t *bin) {
 	assert(!cache_bin_disabled(bin));
-	bin->low_bits_low_water = (uint16_t)(uintptr_t)bin->stack_head;
+	bin->low_bits_low_water = (cache_bin_sz_t)(uintptr_t)bin->stack_head;
 }
 
 static inline void
@@ -391,7 +391,7 @@ cache_bin_alloc_impl(cache_bin_t *bin, bool *success, bool adjust_low_water) {
 	 * be used.  It's safe because the stack has one more slot reserved.
 	 */
 	void *ret = *bin->stack_head;
-	uint16_t low_bits = (uint16_t)(uintptr_t)bin->stack_head;
+	cache_bin_sz_t low_bits = (cache_bin_sz_t)(uintptr_t)bin->stack_head;
 	void **new_head = bin->stack_head + 1;
 
 	/*
@@ -414,7 +414,7 @@ cache_bin_alloc_impl(cache_bin_t *bin, bool *success, bool adjust_low_water) {
 	 */
 	if (likely(low_bits != bin->low_bits_empty)) {
 		bin->stack_head = new_head;
-		bin->low_bits_low_water = (uint16_t)(uintptr_t)new_head;
+		bin->low_bits_low_water = (cache_bin_sz_t)(uintptr_t)new_head;
 		*success = true;
 		return ret;
 	}
@@ -455,7 +455,7 @@ cache_bin_alloc_batch(cache_bin_t *bin, size_t num, void **out) {
 
 JEMALLOC_ALWAYS_INLINE bool
 cache_bin_full(cache_bin_t *bin) {
-	return ((uint16_t)(uintptr_t)bin->stack_head == bin->low_bits_full);
+	return ((cache_bin_sz_t)(uintptr_t)bin->stack_head == bin->low_bits_full);
 }
 
 /*
@@ -503,7 +503,7 @@ cache_bin_dalloc_easy(cache_bin_t *bin, void *ptr) {
 	bin->stack_head--;
 	*bin->stack_head = ptr;
 	cache_bin_assert_earlier(bin, bin->low_bits_full,
-	    (uint16_t)(uintptr_t)bin->stack_head);
+	    (cache_bin_sz_t)(uintptr_t)bin->stack_head);
 
 	return true;
 }
@@ -516,9 +516,9 @@ cache_bin_stash(cache_bin_t *bin, void *ptr) {
 	}
 
 	/* Stash at the full position, in the [full, head) range. */
-	uint16_t low_bits_head = (uint16_t)(uintptr_t)bin->stack_head;
+	cache_bin_sz_t low_bits_head = (cache_bin_sz_t)(uintptr_t)bin->stack_head;
 	/* Wraparound handled as well. */
-	uint16_t diff = cache_bin_diff(bin, bin->low_bits_full, low_bits_head);
+	cache_bin_sz_t diff = cache_bin_diff(bin, bin->low_bits_full, low_bits_head);
 	*(void **)((byte_t *)bin->stack_head - diff) = ptr;
 
 	assert(!cache_bin_full(bin));
@@ -532,7 +532,7 @@ cache_bin_stash(cache_bin_t *bin, void *ptr) {
 JEMALLOC_ALWAYS_INLINE cache_bin_sz_t
 cache_bin_nstashed_get_internal(cache_bin_t *bin) {
 	cache_bin_sz_t ncached_max = cache_bin_ncached_max_get(bin);
-	uint16_t low_bits_low_bound = cache_bin_low_bits_low_bound_get(bin);
+	cache_bin_sz_t low_bits_low_bound = cache_bin_low_bits_low_bound_get(bin);
 
 	cache_bin_sz_t n = cache_bin_diff(bin, low_bits_low_bound,
 	    bin->low_bits_full) / sizeof(void *);
@@ -541,7 +541,7 @@ cache_bin_nstashed_get_internal(cache_bin_t *bin) {
 		/* Below are for assertions only. */
 		void **low_bound = cache_bin_low_bound_get(bin);
 
-		assert((uint16_t)(uintptr_t)low_bound == low_bits_low_bound);
+		assert((cache_bin_sz_t)(uintptr_t)low_bound == low_bits_low_bound);
 		void *stashed = *(low_bound + n - 1);
 		bool aligned = cache_bin_nonfast_aligned(stashed);
 #ifdef JEMALLOC_JET
@@ -586,12 +586,12 @@ cache_bin_nitems_get_remote(cache_bin_t *bin, cache_bin_sz_t *ncached,
     cache_bin_sz_t *nstashed) {
 	/* Racy version of cache_bin_ncached_get_internal. */
 	cache_bin_sz_t diff = bin->low_bits_empty -
-	    (uint16_t)(uintptr_t)bin->stack_head;
+	    (cache_bin_sz_t)(uintptr_t)bin->stack_head;
 	cache_bin_sz_t n = diff / sizeof(void *);
 	*ncached = n;
 
 	/* Racy version of cache_bin_nstashed_get_internal. */
-	uint16_t low_bits_low_bound = cache_bin_low_bits_low_bound_get(bin);
+	cache_bin_sz_t low_bits_low_bound = cache_bin_low_bits_low_bound_get(bin);
 	n = (bin->low_bits_full - low_bits_low_bound) / sizeof(void *);
 	*nstashed = n;
 	/*
diff --git a/src/cache_bin.c b/src/cache_bin.c
index 6438705f..2f5afeb9 100644
--- a/src/cache_bin.c
+++ b/src/cache_bin.c
@@ -85,19 +85,19 @@ cache_bin_init(cache_bin_t *bin, const cache_bin_info_t *info, void *alloc,
 	 */
 	void *stack_cur = (void *)((byte_t *)alloc + *cur_offset);
 	void *full_position = stack_cur;
-	uint16_t bin_stack_size = info->ncached_max * sizeof(void *);
+	cache_bin_sz_t bin_stack_size = info->ncached_max * sizeof(void *);
 
 	*cur_offset += bin_stack_size;
 	void *empty_position = (void *)((byte_t *)alloc + *cur_offset);
 
 	/* Init to the empty position. */
 	bin->stack_head = (void **)empty_position;
-	bin->low_bits_low_water = (uint16_t)(uintptr_t)bin->stack_head;
-	bin->low_bits_full = (uint16_t)(uintptr_t)full_position;
-	bin->low_bits_empty = (uint16_t)(uintptr_t)empty_position;
+	bin->low_bits_low_water = (cache_bin_sz_t)(uintptr_t)bin->stack_head;
+	bin->low_bits_full = (cache_bin_sz_t)(uintptr_t)full_position;
+	bin->low_bits_empty = (cache_bin_sz_t)(uintptr_t)empty_position;
 	cache_bin_info_init(&bin->bin_info, info->ncached_max);
 	cache_bin_sz_t free_spots = cache_bin_diff(bin,
-	    bin->low_bits_full, (uint16_t)(uintptr_t)bin->stack_head);
+	    bin->low_bits_full, (cache_bin_sz_t)(uintptr_t)bin->stack_head);
 	assert(free_spots == bin_stack_size);
 	if (!cache_bin_disabled(bin)) {
 		assert(cache_bin_ncached_get_local(bin) == 0);

From fd60645260b74645cd606bb6a48464890ab39dee Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@meta.com>
Date: Wed, 7 May 2025 16:37:26 -0700
Subject: [PATCH 293/395] Add one more check to double free validation.

---
 include/jemalloc/internal/arena_inlines_b.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 4b765289..61008b59 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -68,7 +68,8 @@ large_dalloc_safety_checks(edata_t *edata, const void *ptr, size_t input_size) {
 		    "possibly caused by double free bugs.", ptr);
 		return true;
 	}
-	if (unlikely(input_size != edata_usize_get(edata))) {
+	if (unlikely(input_size != edata_usize_get(edata) ||
+	    input_size > SC_LARGE_MAXCLASS)) {
 		safety_check_fail_sized_dealloc(/* current_dealloc */ true, ptr,
 		    /* true_size */ edata_usize_get(edata), input_size);
 		return true;

From 1818170c8d4de24e0f3b67d7e0347b4ea8e2fec1 Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@meta.com>
Date: Wed, 7 May 2025 17:39:20 -0700
Subject: [PATCH 294/395] Fix binshard.sh by specifying bin_shards for all
 sizes.

---
 test/unit/binshard.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/unit/binshard.sh b/test/unit/binshard.sh
index c1d58c88..1882e90a 100644
--- a/test/unit/binshard.sh
+++ b/test/unit/binshard.sh
@@ -1,3 +1,3 @@
 #!/bin/sh
 
-export MALLOC_CONF="narenas:1,bin_shards:1-160:16|129-512:4|256-256:8"
+export MALLOC_CONF="narenas:1,bin_shards:1-160:16|129-512:4|256-256:8|513-8070450532247928832:1"

From 4531411abed6bc4e6740bdaa21abafe02960ee51 Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@meta.com>
Date: Tue, 20 May 2025 11:58:26 -0700
Subject: [PATCH 295/395] Modify .clang-format to have declarations aligned

---
 .clang-format | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.clang-format b/.clang-format
index a890af4d..527ec375 100644
--- a/.clang-format
+++ b/.clang-format
@@ -4,10 +4,10 @@
 # AccessModifierOffset: -2
 AlignAfterOpenBracket: DontAlign
 AlignConsecutiveAssignments: false
-AlignConsecutiveDeclarations: false
+AlignConsecutiveDeclarations: true
 AlignEscapedNewlines: Right
 AlignOperands: false
-AlignTrailingComments: false
+AlignTrailingComments: true
 AllowAllParametersOfDeclarationOnNextLine: true
 AllowShortBlocksOnASingleLine: false
 AllowShortCaseLabelsOnASingleLine: false

From edaab8b3ad752a845019985062689551cd6315c1 Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@meta.com>
Date: Thu, 22 May 2025 22:59:00 -0700
Subject: [PATCH 296/395] Turn clang-format off for codes with multi-line
 commands in macros

---
 include/jemalloc/internal/atomic_c11.h      | 2 ++
 include/jemalloc/internal/atomic_gcc_sync.h | 2 ++
 include/jemalloc/internal/atomic_msvc.h     | 2 ++
 include/jemalloc/internal/bit_util.h        | 6 ++++++
 include/jemalloc/internal/hpa_opts.h        | 2 ++
 include/jemalloc/internal/mpsc_queue.h      | 2 ++
 include/jemalloc/internal/rb.h              | 2 ++
 include/jemalloc/internal/seq.h             | 2 ++
 include/jemalloc/internal/tsd.h             | 2 ++
 test/unit/atomic.c                          | 2 ++
 test/unit/bit_util.c                        | 2 ++
 11 files changed, 26 insertions(+)

diff --git a/include/jemalloc/internal/atomic_c11.h b/include/jemalloc/internal/atomic_c11.h
index 74173b03..a37e9661 100644
--- a/include/jemalloc/internal/atomic_c11.h
+++ b/include/jemalloc/internal/atomic_c11.h
@@ -15,6 +15,7 @@
 
 #define atomic_fence atomic_thread_fence
 
+/* clang-format off */
 #define JEMALLOC_GENERATE_ATOMICS(type, short_type,			\
     /* unused */ lg_size)						\
 typedef _Atomic(type) atomic_##short_type##_t;				\
@@ -59,6 +60,7 @@ atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a,	\
 	return atomic_compare_exchange_strong_explicit(a, expected,	\
 	    desired, success_mo, failure_mo);				\
 }
+/* clang-format on */
 
 /*
  * Integral types have some special operations available that non-integral ones
diff --git a/include/jemalloc/internal/atomic_gcc_sync.h b/include/jemalloc/internal/atomic_gcc_sync.h
index 21136bd0..801d6197 100644
--- a/include/jemalloc/internal/atomic_gcc_sync.h
+++ b/include/jemalloc/internal/atomic_gcc_sync.h
@@ -87,6 +87,7 @@ atomic_post_sc_store_fence() {
 
 }
 
+/* clang-format off */
 #define JEMALLOC_GENERATE_ATOMICS(type, short_type,			\
     /* unused */ lg_size)						\
 typedef struct {							\
@@ -161,6 +162,7 @@ atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a,	\
 		return false;						\
 	}								\
 }
+/* clang-format on */
 
 #define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type,			\
     /* unused */ lg_size)						\
diff --git a/include/jemalloc/internal/atomic_msvc.h b/include/jemalloc/internal/atomic_msvc.h
index a429f1ab..5313aed9 100644
--- a/include/jemalloc/internal/atomic_msvc.h
+++ b/include/jemalloc/internal/atomic_msvc.h
@@ -118,6 +118,7 @@ atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a,	\
 }
 
 
+/* clang-format off */
 #define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, lg_size)	\
 JEMALLOC_GENERATE_ATOMICS(type, short_type, lg_size)			\
 									\
@@ -158,6 +159,7 @@ atomic_fetch_xor_##short_type(atomic_##short_type##_t *a,		\
 	return (type)ATOMIC_INTERLOCKED_NAME(_InterlockedXor, lg_size)(	\
 	    &a->repr, (ATOMIC_INTERLOCKED_REPR(lg_size))val);		\
 }
+/* clang-format on */
 
 #undef ATOMIC_INLINE
 
diff --git a/include/jemalloc/internal/bit_util.h b/include/jemalloc/internal/bit_util.h
index c413a75d..840dbde2 100644
--- a/include/jemalloc/internal/bit_util.h
+++ b/include/jemalloc/internal/bit_util.h
@@ -35,6 +35,7 @@ ffs_u(unsigned x) {
 	return JEMALLOC_INTERNAL_FFS(x) - 1;
 }
 
+/* clang-format off */
 #define DO_FLS_SLOW(x, suffix) do {					\
 	util_assume(x != 0);						\
 	x |= (x >> 1);							\
@@ -58,6 +59,7 @@ ffs_u(unsigned x) {
 	}								\
 	return ffs_##suffix(x) - 1;					\
 } while(0)
+/* clang-format on */
 
 static inline unsigned
 fls_llu_slow(unsigned long long x) {
@@ -118,6 +120,7 @@ fls_u(unsigned x) {
 #define DO_BSR64(bit, x) bit = 0; unreachable()
 #endif
 
+/* clang-format off */
 #define DO_FLS(x) do {							\
 	if (x == 0) {							\
 		return 8 * sizeof(x);					\
@@ -144,6 +147,7 @@ fls_u(unsigned x) {
 	}								\
 	unreachable();							\
 } while (0)
+/* clang-format on */
 
 static inline unsigned
 fls_llu(unsigned long long x) {
@@ -184,6 +188,7 @@ fls_u(unsigned x) {
 #  error "Haven't implemented popcount for 16-byte ints."
 #endif
 
+/* clang-format off */
 #define DO_POPCOUNT(x, type) do {					\
 	/*								\
 	 * Algorithm from an old AMD optimization reference manual.	\
@@ -227,6 +232,7 @@ fls_u(unsigned x) {
 	x >>= ((sizeof(x) - 1) * 8);					\
 	return (unsigned)x;						\
 } while(0)
+/* clang-format on */
 
 static inline unsigned
 popcount_u_slow(unsigned bitmap) {
diff --git a/include/jemalloc/internal/hpa_opts.h b/include/jemalloc/internal/hpa_opts.h
index 816bb577..e5517719 100644
--- a/include/jemalloc/internal/hpa_opts.h
+++ b/include/jemalloc/internal/hpa_opts.h
@@ -69,6 +69,7 @@ struct hpa_shard_opts_s {
 	uint64_t peak_demand_window_ms;
 };
 
+/* clang-format off */
 #define HPA_SHARD_OPTS_DEFAULT {					\
 	/* slab_max_alloc */						\
 	64 * 1024,							\
@@ -95,5 +96,6 @@ struct hpa_shard_opts_s {
 	/* peak_demand_window_ms */					\
 	0								\
 }
+/* clang-format on */
 
 #endif /* JEMALLOC_INTERNAL_HPA_OPTS_H */
diff --git a/include/jemalloc/internal/mpsc_queue.h b/include/jemalloc/internal/mpsc_queue.h
index d8aa624b..86f4898f 100644
--- a/include/jemalloc/internal/mpsc_queue.h
+++ b/include/jemalloc/internal/mpsc_queue.h
@@ -26,6 +26,7 @@
  * two-stack tricks reverses orders in the lock-free first stack).
  */
 
+/* clang-format off */
 #define mpsc_queue(a_type)						\
 struct {								\
 	atomic_p_t tail;						\
@@ -131,5 +132,6 @@ a_prefix##pop_batch(a_queue_type *queue, a_list_type *dst) {		\
 	}								\
 	ql_concat(dst, &reversed, a_link);				\
 }
+/* clang-format on */
 
 #endif /* JEMALLOC_INTERNAL_MPSC_QUEUE_H */
diff --git a/include/jemalloc/internal/rb.h b/include/jemalloc/internal/rb.h
index 5f2771a9..235d548e 100644
--- a/include/jemalloc/internal/rb.h
+++ b/include/jemalloc/internal/rb.h
@@ -38,6 +38,7 @@
  */
 #define RB_MAX_DEPTH (sizeof(void *) << 4)
 
+/* clang-format off */
 #ifdef RB_COMPACT
 /* Node structure. */
 #define rb_node(a_type)							\
@@ -1871,5 +1872,6 @@ a_prefix##reverse_iter_filtered(a_rbt_type *rbtree, a_type *start,	\
     return ret;								\
 }									\
 ) /* end rb_summarized_only */
+/* clang-format on */
 
 #endif /* JEMALLOC_INTERNAL_RB_H */
diff --git a/include/jemalloc/internal/seq.h b/include/jemalloc/internal/seq.h
index 9bb6b235..d2c0d1fc 100644
--- a/include/jemalloc/internal/seq.h
+++ b/include/jemalloc/internal/seq.h
@@ -8,6 +8,7 @@
  * A simple seqlock implementation.
  */
 
+/* clang-format off */
 #define seq_define(type, short_type)					\
 typedef struct {							\
 	atomic_zu_t seq;						\
@@ -52,5 +53,6 @@ seq_try_load_##short_type(type *dst, seq_##short_type##_t *src) {	\
 	memcpy(dst, buf, sizeof(type));					\
 	return true;							\
 }
+/* clang-format on */
 
 #endif /* JEMALLOC_INTERNAL_SEQ_H */
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 4f22dcff..c06605df 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -32,6 +32,7 @@ TSD_DATA_FAST
 TSD_DATA_SLOWER
 #undef O
 
+/* clang-format off */
 /* tsd_foop_get(tsd) returns a pointer to the thread-local instance of foo. */
 #define O(n, t, nt)							\
 JEMALLOC_ALWAYS_INLINE t *						\
@@ -48,6 +49,7 @@ tsd_##n##p_get(tsd_t *tsd) {						\
 	    state == tsd_state_minimal_initialized);			\
 	return tsd_##n##p_get_unsafe(tsd);				\
 }
+/* clang-format on */
 TSD_DATA_SLOW
 TSD_DATA_FAST
 TSD_DATA_SLOWER
diff --git a/test/unit/atomic.c b/test/unit/atomic.c
index c2ec8c7e..6c4b85e5 100644
--- a/test/unit/atomic.c
+++ b/test/unit/atomic.c
@@ -15,6 +15,7 @@
  * and val3 for desired.
  */
 
+/* clang-format off */
 #define DO_TESTS(t, ta, val1, val2, val3) do {				\
 	t val;								\
 	t expected;							\
@@ -174,6 +175,7 @@ typedef struct {							\
 		DO_INTEGER_TESTS(t, ta, test.val1, test.val2);		\
 	}								\
 } while (0)
+/* clang-format on */
 
 TEST_STRUCT(uint64_t, u64);
 TEST_BEGIN(test_atomic_u64) {
diff --git a/test/unit/bit_util.c b/test/unit/bit_util.c
index 295abb1b..4e9d2e16 100644
--- a/test/unit/bit_util.c
+++ b/test/unit/bit_util.c
@@ -226,6 +226,7 @@ expand_byte_to_mask(unsigned byte) {
 	return result;
 }
 
+/* clang-format off */
 #define TEST_POPCOUNT(t, suf, pri_hex) do {				\
 	t bmul = (t)0x0101010101010101ULL;				\
 	for (unsigned i = 0; i < (1 << sizeof(t)); i++) {		\
@@ -245,6 +246,7 @@ expand_byte_to_mask(unsigned byte) {
 		}							\
 	}								\
 } while (0)
+/* clang-format on */
 
 TEST_BEGIN(test_popcount_u) {
 	TEST_POPCOUNT(unsigned, u, "x");

From 27d7960cf9b48a9a9395661f212d05a471dceed4 Mon Sep 17 00:00:00 2001
From: Jason Evans <je@branchtaken.com>
Date: Mon, 19 May 2025 21:09:01 -0700
Subject: [PATCH 297/395] Revert "Extend purging algorithm with peak demand
 tracking"

This reverts commit ad108d50f1c30700389103ff5fe3ef5f538f804c.
---
 Makefile.in                                   |   2 -
 include/jemalloc/internal/hpa.h               |   4 -
 include/jemalloc/internal/hpa_opts.h          |  14 +-
 include/jemalloc/internal/peak_demand.h       |  55 ------
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |   1 -
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |   3 -
 .../projects/vc2017/jemalloc/jemalloc.vcxproj |   1 -
 .../vc2017/jemalloc/jemalloc.vcxproj.filters  |   3 -
 .../projects/vc2019/jemalloc/jemalloc.vcxproj |   1 -
 .../vc2019/jemalloc/jemalloc.vcxproj.filters  |   3 -
 .../projects/vc2022/jemalloc/jemalloc.vcxproj |   1 -
 .../vc2022/jemalloc/jemalloc.vcxproj.filters  |   3 -
 src/ctl.c                                     |   5 -
 src/hpa.c                                     |  53 +-----
 src/jemalloc.c                                |   5 -
 src/peak_demand.c                             |  74 --------
 src/stats.c                                   |   1 -
 test/unit/hpa.c                               | 174 +++---------------
 test/unit/mallctl.c                           |   1 -
 test/unit/peak_demand.c                       | 162 ----------------
 20 files changed, 28 insertions(+), 538 deletions(-)
 delete mode 100644 include/jemalloc/internal/peak_demand.h
 delete mode 100644 src/peak_demand.c
 delete mode 100644 test/unit/peak_demand.c

diff --git a/Makefile.in b/Makefile.in
index ac8c51ff..7085a22a 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -137,7 +137,6 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/pai.c \
 	$(srcroot)src/pac.c \
 	$(srcroot)src/pages.c \
-	$(srcroot)src/peak_demand.c \
 	$(srcroot)src/peak_event.c \
 	$(srcroot)src/prof.c \
 	$(srcroot)src/prof_data.c \
@@ -255,7 +254,6 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/pack.c \
 	$(srcroot)test/unit/pages.c \
 	$(srcroot)test/unit/peak.c \
-	$(srcroot)test/unit/peak_demand.c \
 	$(srcroot)test/unit/ph.c \
 	$(srcroot)test/unit/prng.c \
 	$(srcroot)test/unit/prof_accum.c \
diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index 117c1c20..2e9fccc2 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -10,7 +10,6 @@
 #include "jemalloc/internal/hpa_opts.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/pai.h"
-#include "jemalloc/internal/peak_demand.h"
 #include "jemalloc/internal/psset.h"
 
 typedef struct hpa_central_s hpa_central_t;
@@ -148,9 +147,6 @@ struct hpa_shard_s {
 	 * Last time we performed purge on this shard.
 	 */
 	nstime_t last_purge;
-
-	/* Peak active memory sliding window statistics. */
-	peak_demand_t peak_demand;
 };
 
 bool hpa_hugepage_size_exceeds_limit(void);
diff --git a/include/jemalloc/internal/hpa_opts.h b/include/jemalloc/internal/hpa_opts.h
index e5517719..9e7f76ac 100644
--- a/include/jemalloc/internal/hpa_opts.h
+++ b/include/jemalloc/internal/hpa_opts.h
@@ -27,8 +27,7 @@ struct hpa_shard_opts_s {
 
 	/*
 	 * The HPA purges whenever the number of pages exceeds dirty_mult *
-	 * peak_active_pages.  This may be set to (fxp_t)-1 to disable
-	 * purging.
+	 * active_pages.  This may be set to (fxp_t)-1 to disable purging.
 	 */
 	fxp_t dirty_mult;
 
@@ -60,13 +59,6 @@ struct hpa_shard_opts_s {
 	 * Maximum number of hugepages to purge on each purging attempt.
 	 */
 	ssize_t experimental_max_purge_nhp;
-
-	/*
-	 * Sliding window duration to track active memory demand statistics.
-	 * This might be set to 0, to disable sliding window statistics
-	 * tracking and use current number of active pages for purging instead.
-	 */
-	uint64_t peak_demand_window_ms;
 };
 
 /* clang-format off */
@@ -92,9 +84,7 @@ struct hpa_shard_opts_s {
 	/* min_purge_interval_ms */					\
 	5 * 1000,							\
 	/* experimental_max_purge_nhp */				\
-	-1,								\
-	/* peak_demand_window_ms */					\
-	0								\
+	-1								\
 }
 /* clang-format on */
 
diff --git a/include/jemalloc/internal/peak_demand.h b/include/jemalloc/internal/peak_demand.h
deleted file mode 100644
index 2664cbec..00000000
--- a/include/jemalloc/internal/peak_demand.h
+++ /dev/null
@@ -1,55 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_PEAK_DEMAND_H
-#define JEMALLOC_INTERNAL_PEAK_DEMAND_H
-
-#include "jemalloc/internal/jemalloc_preamble.h"
-
-/*
- * Implementation of peak active memory demand tracking.
- *
- * Inspired by "Beyond malloc efficiency to fleet efficiency: a hugepage-aware
- * memory allocator" whitepaper.
- * https://storage.googleapis.com/gweb-research2023-media/pubtools/6170.pdf
- *
- * End goal is to track peak active memory usage over specified time interval.
- * We do so by dividing this time interval into disjoint subintervals and
- * storing value of maximum memory usage for each subinterval in a circular
- * buffer.  Nanoseconds resolution timestamp uniquely maps into epoch, which is
- * used as an index to access circular buffer.
- */
-
-#define PEAK_DEMAND_LG_BUCKETS 4
-/*
- * Number of buckets should be power of 2 to ensure modulo operation is
- * optimized to bit masking by the compiler.
- */
-#define PEAK_DEMAND_NBUCKETS (1 << PEAK_DEMAND_LG_BUCKETS)
-
-typedef struct peak_demand_s peak_demand_t;
-struct peak_demand_s {
-	/*
-	 * Absolute value of current epoch, monotonically increases over time.  Epoch
-	 * value modulo number of buckets used as an index to access nactive_max
-	 * array.
-	 */
-	uint64_t epoch;
-
-	/* How many nanoseconds each epoch approximately takes. */
-	uint64_t epoch_interval_ns;
-
-	/*
-	 * Circular buffer to track maximum number of active pages for each
-	 * epoch.
-	 */
-	size_t nactive_max[PEAK_DEMAND_NBUCKETS];
-};
-
-void peak_demand_init(peak_demand_t *peak_demand, uint64_t interval_ms);
-
-/* Updates peak demand statistics with current number of active pages. */
-void peak_demand_update(peak_demand_t *peak_demand, const nstime_t *now,
-    size_t nactive);
-
-/* Returns maximum number of active pages in sliding window. */
-size_t peak_demand_nactive_max(peak_demand_t *peak_demand);
-
-#endif /* JEMALLOC_INTERNAL_PEAK_DEMAND_H */
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 97a95fbf..c43b30b1 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -76,7 +76,6 @@
     <ClCompile Include="..\..\..\..\src\pai.c" />
     <ClCompile Include="..\..\..\..\src\pac.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
-    <ClCompile Include="..\..\..\..\src\peak_demand.c" />
     <ClCompile Include="..\..\..\..\src\peak_event.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
     <ClCompile Include="..\..\..\..\src\prof_data.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 1a89369e..f091475e 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -112,9 +112,6 @@
     <ClCompile Include="..\..\..\..\src\pages.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\peak_demand.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\peak_event.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 8529438c..a195f6b3 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -76,7 +76,6 @@
     <ClCompile Include="..\..\..\..\src\pai.c" />
     <ClCompile Include="..\..\..\..\src\pac.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
-    <ClCompile Include="..\..\..\..\src\peak_demand.c" />
     <ClCompile Include="..\..\..\..\src\peak_event.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
     <ClCompile Include="..\..\..\..\src\prof_data.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 1a89369e..f091475e 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -112,9 +112,6 @@
     <ClCompile Include="..\..\..\..\src\pages.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\peak_demand.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\peak_event.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
index eace48ba..cd16005d 100644
--- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
@@ -76,7 +76,6 @@
     <ClCompile Include="..\..\..\..\src\pai.c" />
     <ClCompile Include="..\..\..\..\src\pac.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
-    <ClCompile Include="..\..\..\..\src\peak_demand.c" />
     <ClCompile Include="..\..\..\..\src\peak_event.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
     <ClCompile Include="..\..\..\..\src\prof_data.c" />
diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
index 1a89369e..f091475e 100644
--- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
@@ -112,9 +112,6 @@
     <ClCompile Include="..\..\..\..\src\pages.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\peak_demand.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\peak_event.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
index 98085cfd..2d8c4be6 100644
--- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
@@ -76,7 +76,6 @@
     <ClCompile Include="..\..\..\..\src\pai.c" />
     <ClCompile Include="..\..\..\..\src\pac.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
-    <ClCompile Include="..\..\..\..\src\peak_demand.c" />
     <ClCompile Include="..\..\..\..\src\peak_event.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
     <ClCompile Include="..\..\..\..\src\prof_data.c" />
diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
index 1a89369e..f091475e 100644
--- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
@@ -112,9 +112,6 @@
     <ClCompile Include="..\..\..\..\src\pages.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\peak_demand.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\peak_event.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/ctl.c b/src/ctl.c
index 92d254c1..a30adc52 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -106,7 +106,6 @@ CTL_PROTO(opt_hpa_hugify_delay_ms)
 CTL_PROTO(opt_hpa_hugify_sync)
 CTL_PROTO(opt_hpa_min_purge_interval_ms)
 CTL_PROTO(opt_experimental_hpa_max_purge_nhp)
-CTL_PROTO(opt_hpa_peak_demand_window_ms)
 CTL_PROTO(opt_hpa_dirty_mult)
 CTL_PROTO(opt_hpa_sec_nshards)
 CTL_PROTO(opt_hpa_sec_max_alloc)
@@ -489,8 +488,6 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("hpa_min_purge_interval_ms"), CTL(opt_hpa_min_purge_interval_ms)},
 	{NAME("experimental_hpa_max_purge_nhp"),
 		CTL(opt_experimental_hpa_max_purge_nhp)},
-	{NAME("hpa_peak_demand_window_ms"),
-	    CTL(opt_hpa_peak_demand_window_ms)},
 	{NAME("hpa_dirty_mult"), CTL(opt_hpa_dirty_mult)},
 	{NAME("hpa_sec_nshards"),	CTL(opt_hpa_sec_nshards)},
 	{NAME("hpa_sec_max_alloc"),	CTL(opt_hpa_sec_max_alloc)},
@@ -2260,8 +2257,6 @@ CTL_RO_NL_GEN(opt_hpa_min_purge_interval_ms, opt_hpa_opts.min_purge_interval_ms,
     uint64_t)
 CTL_RO_NL_GEN(opt_experimental_hpa_max_purge_nhp,
     opt_hpa_opts.experimental_max_purge_nhp, ssize_t)
-CTL_RO_NL_GEN(opt_hpa_peak_demand_window_ms,
-    opt_hpa_opts.peak_demand_window_ms, uint64_t)
 
 /*
  * This will have to change before we publicly document this option; fxp_t and
diff --git a/src/hpa.c b/src/hpa.c
index 9b7ff744..48e356c6 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -64,11 +64,6 @@ hpa_supported(void) {
 	return true;
 }
 
-static bool
-hpa_peak_demand_tracking_enabled(hpa_shard_t *shard) {
-	return shard->opts.peak_demand_window_ms > 0;
-}
-
 static void
 hpa_do_consistency_checks(hpa_shard_t *shard) {
 	assert(shard->base != NULL);
@@ -223,11 +218,6 @@ hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap,
 	shard->stats.nhugify_failures = 0;
 	shard->stats.ndehugifies = 0;
 
-	if (hpa_peak_demand_tracking_enabled(shard)) {
-		peak_demand_init(&shard->peak_demand,
-		    shard->opts.peak_demand_window_ms);
-	}
-
 	/*
 	 * Fill these in last, so that if an hpa_shard gets used despite
 	 * initialization failing, we'll at least crash instead of just
@@ -305,37 +295,8 @@ hpa_ndirty_max(tsdn_t *tsdn, hpa_shard_t *shard) {
 	if (shard->opts.dirty_mult == (fxp_t)-1) {
 		return (size_t)-1;
 	}
-	/*
-	 * We are trying to estimate maximum amount of active memory we'll
-	 * need in the near future.  We do so by projecting future active
-	 * memory demand (based on peak active memory usage we observed in the
-	 * past within sliding window) and adding slack on top of it (an
-	 * overhead is reasonable to have in exchange of higher hugepages
-	 * coverage).  When peak demand tracking is off, projection of future
-	 * active memory is active memory we are having right now.
-	 *
-	 * Estimation is essentially the same as nactive_max * (1 +
-	 * dirty_mult), but expressed differently to factor in necessary
-	 * implementation details.
-	 */
-	size_t nactive = psset_nactive(&shard->psset);
-	size_t nactive_max = nactive;
-	if (hpa_peak_demand_tracking_enabled(shard)) {
-		/*
-		 * We release shard->mtx, when we do a syscall to purge dirty
-		 * memory, so someone might grab shard->mtx, allocate memory
-		 * from this shard and update psset's nactive counter, before
-		 * peak_demand_update(...) was called and we'll get
-		 * peak_demand_nactive_max(...) <= nactive as a result.
-		 */
-		size_t peak = peak_demand_nactive_max(&shard->peak_demand);
-		if (peak > nactive_max) {
-			nactive_max = peak;
-		}
-	}
-	size_t slack = fxp_mul_frac(nactive_max, shard->opts.dirty_mult);
-	size_t estimation = nactive_max + slack;
-	return estimation - nactive;
+	return fxp_mul_frac(psset_nactive(&shard->psset),
+	    shard->opts.dirty_mult);
 }
 
 static bool
@@ -711,16 +672,6 @@ static void
 hpa_shard_maybe_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard,
     bool forced) {
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
-
-	/* Update active memory demand statistics. */
-	if (hpa_peak_demand_tracking_enabled(shard)) {
-		nstime_t now;
-		shard->central->hooks.curtime(&now,
-		    /* first_reading */ true);
-		peak_demand_update(&shard->peak_demand, &now,
-		    psset_nactive(&shard->psset));
-	}
-
 	if (!forced && shard->opts.deferral_allowed) {
 		return;
 	}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 360635a8..d958c8ca 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1573,11 +1573,6 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    opt_hpa_opts.experimental_max_purge_nhp,
 			    "experimental_hpa_max_purge_nhp", -1, SSIZE_MAX);
 
-			CONF_HANDLE_UINT64_T(
-			    opt_hpa_opts.peak_demand_window_ms,
-			    "hpa_peak_demand_window_ms", 0, 0,
-			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, false);
-
 			if (CONF_MATCH("hpa_dirty_mult")) {
 				if (CONF_MATCH_VALUE("-1")) {
 					opt_hpa_opts.dirty_mult = (fxp_t)-1;
diff --git a/src/peak_demand.c b/src/peak_demand.c
deleted file mode 100644
index 49f28930..00000000
--- a/src/peak_demand.c
+++ /dev/null
@@ -1,74 +0,0 @@
-#include "jemalloc/internal/jemalloc_preamble.h"
-#include "jemalloc/internal/jemalloc_internal_includes.h"
-
-#include "jemalloc/internal/peak_demand.h"
-
-void
-peak_demand_init(peak_demand_t *peak_demand, uint64_t interval_ms) {
-	assert(interval_ms > 0);
-	peak_demand->epoch = 0;
-	uint64_t interval_ns = interval_ms * 1000 * 1000;
-	peak_demand->epoch_interval_ns = interval_ns / PEAK_DEMAND_NBUCKETS;
-	memset(peak_demand->nactive_max, 0, sizeof(peak_demand->nactive_max));
-}
-
-static uint64_t
-peak_demand_epoch_ind(peak_demand_t *peak_demand) {
-	return peak_demand->epoch % PEAK_DEMAND_NBUCKETS;
-}
-
-static nstime_t
-peak_demand_next_epoch_advance(peak_demand_t *peak_demand) {
-	uint64_t epoch = peak_demand->epoch;
-	uint64_t ns = (epoch + 1) * peak_demand->epoch_interval_ns;
-	nstime_t next;
-	nstime_init(&next, ns);
-	return next;
-}
-
-static uint64_t
-peak_demand_maybe_advance_epoch(peak_demand_t *peak_demand,
-    const nstime_t *now) {
-	nstime_t next_epoch_advance =
-	    peak_demand_next_epoch_advance(peak_demand);
-	if (nstime_compare(now, &next_epoch_advance) < 0) {
-		return peak_demand_epoch_ind(peak_demand);
-	}
-	uint64_t next_epoch = nstime_ns(now) / peak_demand->epoch_interval_ns;
-	assert(next_epoch > peak_demand->epoch);
-	/*
-	 * If we missed more epochs, than capacity of circular buffer
-	 * (PEAK_DEMAND_NBUCKETS), re-write no more than PEAK_DEMAND_NBUCKETS
-	 * items as we don't want to zero out same item multiple times.
-	 */
-	if (peak_demand->epoch + PEAK_DEMAND_NBUCKETS < next_epoch) {
-		peak_demand->epoch = next_epoch - PEAK_DEMAND_NBUCKETS;
-	}
-	while (peak_demand->epoch < next_epoch) {
-		++peak_demand->epoch;
-		uint64_t ind = peak_demand_epoch_ind(peak_demand);
-		peak_demand->nactive_max[ind] = 0;
-	}
-	return peak_demand_epoch_ind(peak_demand);
-}
-
-void
-peak_demand_update(peak_demand_t *peak_demand, const nstime_t *now,
-    size_t nactive) {
-	uint64_t ind = peak_demand_maybe_advance_epoch(peak_demand, now);
-	size_t *epoch_nactive = &peak_demand->nactive_max[ind];
-	if (nactive > *epoch_nactive) {
-		*epoch_nactive = nactive;
-	}
-}
-
-size_t
-peak_demand_nactive_max(peak_demand_t *peak_demand) {
-	size_t nactive_max = peak_demand->nactive_max[0];
-	for (int i = 1; i < PEAK_DEMAND_NBUCKETS; ++i) {
-		if (peak_demand->nactive_max[i] > nactive_max) {
-			nactive_max = peak_demand->nactive_max[i];
-		}
-	}
-	return nactive_max;
-}
diff --git a/src/stats.c b/src/stats.c
index d3127483..8496e457 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1657,7 +1657,6 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_BOOL("hpa_hugify_sync")
 	OPT_WRITE_UINT64("hpa_min_purge_interval_ms")
 	OPT_WRITE_SSIZE_T("experimental_hpa_max_purge_nhp")
-	OPT_WRITE_UINT64("hpa_peak_demand_window_ms")
 	if (je_mallctl("opt.hpa_dirty_mult", (void *)&u32v, &u32sz, NULL, 0)
 	    == 0) {
 		/*
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index e53ee2ec..47fa25f2 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -37,9 +37,26 @@ static hpa_shard_opts_t test_hpa_shard_opts_default = {
 	/* min_purge_interval_ms */
 	5 * 1000,
 	/* experimental_max_purge_nhp */
-	-1,
-	/* peak_demand_window_ms */
-	0
+	-1
+};
+
+static hpa_shard_opts_t test_hpa_shard_opts_purge = {
+	/* slab_max_alloc */
+	HUGEPAGE,
+	/* hugification_threshold */
+	0.9 * HUGEPAGE,
+	/* dirty_mult */
+	FXP_INIT_PERCENT(11),
+	/* deferral_allowed */
+	true,
+	/* hugify_delay_ms */
+	0,
+	/* hugify_sync */
+	false,
+	/* min_purge_interval_ms */
+	5 * 1000,
+	/* experimental_max_purge_nhp */
+	-1
 };
 
 static hpa_shard_t *
@@ -474,14 +491,8 @@ TEST_END
 TEST_BEGIN(test_purge_no_infinite_loop) {
 	test_skip_if(!hpa_supported());
 
-	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
-	opts.slab_max_alloc = HUGEPAGE;
-	opts.hugification_threshold = 0.9 * HUGEPAGE;
-	opts.dirty_mult = FXP_INIT_PERCENT(11);
-	opts.deferral_allowed = true;
-	opts.hugify_delay_ms = 0;
-
-	hpa_shard_t *shard = create_test_data(&hpa_hooks_default, &opts);
+	hpa_shard_t *shard = create_test_data(&hpa_hooks_default,
+	    &test_hpa_shard_opts_purge);
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 
 	/*
@@ -489,7 +500,8 @@ TEST_BEGIN(test_purge_no_infinite_loop) {
 	 * criteria for huge page and at the same time do not allow hugify page
 	 * without triggering a purge.
 	 */
-	const size_t npages = opts.hugification_threshold / PAGE + 1;
+	const size_t npages =
+	    test_hpa_shard_opts_purge.hugification_threshold / PAGE + 1;
 	const size_t size = npages * PAGE;
 
 	bool deferred_work_generated = false;
@@ -736,142 +748,6 @@ TEST_BEGIN(test_experimental_max_purge_nhp) {
 }
 TEST_END
 
-TEST_BEGIN(test_demand_purge_slack) {
-	test_skip_if(!hpa_supported());
-
-	hpa_hooks_t hooks;
-	hooks.map = &defer_test_map;
-	hooks.unmap = &defer_test_unmap;
-	hooks.purge = &defer_test_purge;
-	hooks.hugify = &defer_test_hugify;
-	hooks.dehugify = &defer_test_dehugify;
-	hooks.curtime = &defer_test_curtime;
-	hooks.ms_since = &defer_test_ms_since;
-	hooks.vectorized_purge = &defer_vectorized_purge;
-
-	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
-	opts.deferral_allowed = true;
-	/* Allow 10% of slack. */
-	opts.dirty_mult = FXP_INIT_PERCENT(10);
-	/* Peak demand sliding window duration is 10 seconds. */
-	opts.peak_demand_window_ms = 10 * 1000;
-
-	hpa_shard_t *shard = create_test_data(&hooks, &opts);
-
-	bool deferred_work_generated = false;
-
-	nstime_init(&defer_curtime, 0);
-	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
-	enum {NALLOCS = 16 * HUGEPAGE_PAGES};
-	edata_t *edatas[NALLOCS];
-	for (int i = 0; i < NALLOCS; i++) {
-		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
-		    false, false, &deferred_work_generated);
-		expect_ptr_not_null(edatas[i], "Unexpected null edata");
-	}
-
-	/* Deallocate 5 hugepages out of 16. */
-	for (int i = 0; i < 5 * (int)HUGEPAGE_PAGES; i++) {
-		pai_dalloc(tsdn, &shard->pai, edatas[i],
-		    &deferred_work_generated);
-	}
-	nstime_init2(&defer_curtime, 6, 0);
-	hpa_shard_do_deferred_work(tsdn, shard);
-
-	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
-	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
-	/*
-	 * Peak demand within sliding window is 16 hugepages, so we don't need
-	 * to purge anything just yet.
-	 */
-	expect_zu_eq(0, ndefer_purge_calls, "Purged too early");
-
-	nstime_init2(&defer_curtime, 12, 0);
-	hpa_shard_do_deferred_work(tsdn, shard);
-
-	expect_zu_eq(11, ndefer_hugify_calls, "Expect hugification");
-	ndefer_hugify_calls = 0;
-	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
-	/*
-	 * 12 seconds passed now, peak demand is 11 hugepages, we allowed to
-	 * keep 11 * 0.1 (hpa_dirty_mult) = 1.1 dirty hugepages, but we
-	 * have 5 dirty hugepages, so we should purge 4 of them.
-	 */
-	expect_zu_eq(4, ndefer_purge_calls, "Expect purges");
-	ndefer_purge_calls = 0;
-
-	destroy_test_data(shard);
-}
-TEST_END
-
-TEST_BEGIN(test_demand_purge_tight) {
-	test_skip_if(!hpa_supported());
-
-	hpa_hooks_t hooks;
-	hooks.map = &defer_test_map;
-	hooks.unmap = &defer_test_unmap;
-	hooks.purge = &defer_test_purge;
-	hooks.hugify = &defer_test_hugify;
-	hooks.dehugify = &defer_test_dehugify;
-	hooks.curtime = &defer_test_curtime;
-	hooks.ms_since = &defer_test_ms_since;
-	hooks.vectorized_purge = &defer_vectorized_purge;
-
-	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
-	opts.deferral_allowed = true;
-	/* No slack allowed. */
-	opts.dirty_mult = FXP_INIT_PERCENT(0);
-	/* Peak demand sliding window duration is 10 seconds. */
-	opts.peak_demand_window_ms = 10 * 1000;
-
-	hpa_shard_t *shard = create_test_data(&hooks, &opts);
-
-	bool deferred_work_generated = false;
-
-	nstime_init(&defer_curtime, 0);
-	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
-	enum {NALLOCS = 16 * HUGEPAGE_PAGES};
-	edata_t *edatas[NALLOCS];
-	for (int i = 0; i < NALLOCS; i++) {
-		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
-		    false, false, &deferred_work_generated);
-		expect_ptr_not_null(edatas[i], "Unexpected null edata");
-	}
-
-	/* Deallocate 5 hugepages out of 16. */
-	for (int i = 0; i < 5 * (int)HUGEPAGE_PAGES; i++) {
-		pai_dalloc(tsdn, &shard->pai, edatas[i],
-		    &deferred_work_generated);
-	}
-	nstime_init2(&defer_curtime, 6, 0);
-	hpa_shard_do_deferred_work(tsdn, shard);
-
-	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
-	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
-	/*
-	 * Peak demand within sliding window is 16 hugepages, to purge anything
-	 * just yet.
-	 */
-	expect_zu_eq(0, ndefer_purge_calls, "Purged too early");
-
-	nstime_init2(&defer_curtime, 12, 0);
-	hpa_shard_do_deferred_work(tsdn, shard);
-
-	expect_zu_eq(11, ndefer_hugify_calls, "Expect hugification");
-	ndefer_hugify_calls = 0;
-	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
-	/*
-	 * 12 seconds passed now, peak demand is 11 hugepages.  We have
-	 * hpa_dirty_mult = 0, so we allowed to keep 11 * 0 = 0 dirty
-	 * hugepages, but we have 5, all of them should be purged.
-	 */
-	expect_zu_eq(5, ndefer_purge_calls, "Expect purges");
-	ndefer_purge_calls = 0;
-
-	destroy_test_data(shard);
-}
-TEST_END
-
 TEST_BEGIN(test_vectorized_opt_eq_zero) {
     test_skip_if(!hpa_supported() ||
 		(opt_process_madvise_max_batch != 0));
@@ -934,7 +810,5 @@ main(void) {
 	    test_min_purge_interval,
 	    test_purge,
 	    test_experimental_max_purge_nhp,
-	    test_demand_purge_slack,
-	    test_demand_purge_tight,
 	    test_vectorized_opt_eq_zero);
 }
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index cf9b88aa..68c3a705 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -295,7 +295,6 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(size_t, hpa_sec_bytes_after_flush, always);
 	TEST_MALLCTL_OPT(size_t, hpa_sec_batch_fill_extra, always);
 	TEST_MALLCTL_OPT(ssize_t, experimental_hpa_max_purge_nhp, always);
-	TEST_MALLCTL_OPT(uint64_t, hpa_peak_demand_window_ms, always);
 	TEST_MALLCTL_OPT(unsigned, narenas, always);
 	TEST_MALLCTL_OPT(const char *, percpu_arena, always);
 	TEST_MALLCTL_OPT(size_t, oversize_threshold, always);
diff --git a/test/unit/peak_demand.c b/test/unit/peak_demand.c
deleted file mode 100644
index ca2506b8..00000000
--- a/test/unit/peak_demand.c
+++ /dev/null
@@ -1,162 +0,0 @@
-#include "test/jemalloc_test.h"
-
-#include "jemalloc/internal/peak_demand.h"
-
-TEST_BEGIN(test_peak_demand_init) {
-	peak_demand_t peak_demand;
-	/*
-	 * Exact value doesn't matter here as we don't advance epoch in this
-	 * test.
-	 */
-	uint64_t interval_ms = 1000;
-	peak_demand_init(&peak_demand, interval_ms);
-
-	expect_zu_eq(peak_demand_nactive_max(&peak_demand), 0,
-	    "Unexpected ndirty_max value after initialization");
-}
-TEST_END
-
-TEST_BEGIN(test_peak_demand_update_basic) {
-	peak_demand_t peak_demand;
-	/* Make each bucket exactly one second to simplify math. */
-	uint64_t interval_ms = 1000 * PEAK_DEMAND_NBUCKETS;
-	peak_demand_init(&peak_demand, interval_ms);
-
-	nstime_t now;
-
-	nstime_init2(&now, /* sec */ 0, /* nsec */ 0);
-	peak_demand_update(&peak_demand, &now, /* nactive */ 1024);
-
-	nstime_init2(&now, /* sec */ 1, /* nsec */ 0);
-	peak_demand_update(&peak_demand, &now, /* nactive */ 512);
-
-	nstime_init2(&now, /* sec */ 2, /* nsec */ 0);
-	peak_demand_update(&peak_demand, &now, /* nactive */ 256);
-
-	expect_zu_eq(peak_demand_nactive_max(&peak_demand), 1024, "");
-}
-TEST_END
-
-TEST_BEGIN(test_peak_demand_update_skip_epochs) {
-	peak_demand_t peak_demand;
-	uint64_t interval_ms = 1000 * PEAK_DEMAND_NBUCKETS;
-	peak_demand_init(&peak_demand, interval_ms);
-
-	nstime_t now;
-
-	nstime_init2(&now, /* sec */ 0, /* nsec */ 0);
-	peak_demand_update(&peak_demand, &now, /* nactive */ 1024);
-
-	nstime_init2(&now, /* sec */ PEAK_DEMAND_NBUCKETS - 1, /* nsec */ 0);
-	peak_demand_update(&peak_demand, &now, /* nactive */ 512);
-
-	nstime_init2(&now, /* sec */ 2 * (PEAK_DEMAND_NBUCKETS - 1),
-	    /* nsec */ 0);
-	peak_demand_update(&peak_demand, &now, /* nactive */ 256);
-
-	/*
-	 * Updates are not evenly spread over time.  When we update at
-	 * 2 * (PEAK_DEMAND_NBUCKETS - 1) second, 1024 value is already out of
-	 * sliding window, but 512 is still present.
-	 */
-	expect_zu_eq(peak_demand_nactive_max(&peak_demand), 512, "");
-}
-TEST_END
-
-TEST_BEGIN(test_peak_demand_update_rewrite_optimization) {
-	peak_demand_t peak_demand;
-	uint64_t interval_ms = 1000 * PEAK_DEMAND_NBUCKETS;
-	peak_demand_init(&peak_demand, interval_ms);
-
-	nstime_t now;
-
-	nstime_init2(&now, /* sec */ 0, /* nsec */ 0);
-	peak_demand_update(&peak_demand, &now, /* nactive */ 1024);
-
-	nstime_init2(&now, /* sec */ 0, /* nsec */ UINT64_MAX);
-	/*
-	 * This update should take reasonable time if optimization is working
-	 * correctly, otherwise we'll loop from 0 to UINT64_MAX and this test
-	 * will take a long time to finish.
-	 */
-	peak_demand_update(&peak_demand, &now, /* nactive */ 512);
-
-	expect_zu_eq(peak_demand_nactive_max(&peak_demand), 512, "");
-}
-TEST_END
-
-TEST_BEGIN(test_peak_demand_update_out_of_interval) {
-	peak_demand_t peak_demand;
-	uint64_t interval_ms = 1000 * PEAK_DEMAND_NBUCKETS;
-	peak_demand_init(&peak_demand, interval_ms);
-
-	nstime_t now;
-
-	nstime_init2(&now, /* sec */ 0 * PEAK_DEMAND_NBUCKETS, /* nsec */ 0);
-	peak_demand_update(&peak_demand, &now, /* nactive */ 1024);
-
-	nstime_init2(&now, /* sec */ 1 * PEAK_DEMAND_NBUCKETS, /* nsec */ 0);
-	peak_demand_update(&peak_demand, &now, /* nactive */ 512);
-
-	nstime_init2(&now, /* sec */ 2 * PEAK_DEMAND_NBUCKETS, /* nsec */ 0);
-	peak_demand_update(&peak_demand, &now, /* nactive */ 256);
-
-	/*
-	 * Updates frequency is lower than tracking interval, so we should
-	 * have only last value.
-	 */
-	expect_zu_eq(peak_demand_nactive_max(&peak_demand), 256, "");
-}
-TEST_END
-
-TEST_BEGIN(test_peak_demand_update_static_epoch) {
-	peak_demand_t peak_demand;
-	uint64_t interval_ms = 1000 * PEAK_DEMAND_NBUCKETS;
-	peak_demand_init(&peak_demand, interval_ms);
-
-	nstime_t now;
-	nstime_init_zero(&now);
-
-	/* Big enough value to overwrite values in circular buffer. */
-	size_t nactive_max = 2 * PEAK_DEMAND_NBUCKETS;
-	for (size_t nactive = 0; nactive <= nactive_max; ++nactive) {
-		/*
-		 * We should override value in the same bucket as now value
-		 * doesn't change between iterations.
-		 */
-		peak_demand_update(&peak_demand, &now, nactive);
-	}
-
-	expect_zu_eq(peak_demand_nactive_max(&peak_demand), nactive_max, "");
-}
-TEST_END
-
-TEST_BEGIN(test_peak_demand_update_epoch_advance) {
-	peak_demand_t peak_demand;
-	uint64_t interval_ms = 1000 * PEAK_DEMAND_NBUCKETS;
-	peak_demand_init(&peak_demand, interval_ms);
-
-	nstime_t now;
-	/* Big enough value to overwrite values in circular buffer. */
-	size_t nactive_max = 2 * PEAK_DEMAND_NBUCKETS;
-	for (size_t nactive = 0; nactive <= nactive_max; ++nactive) {
-		uint64_t sec = nactive;
-		nstime_init2(&now, sec, /* nsec */ 0);
-		peak_demand_update(&peak_demand, &now, nactive);
-	}
-
-	expect_zu_eq(peak_demand_nactive_max(&peak_demand), nactive_max, "");
-}
-TEST_END
-
-int
-main(void) {
-	return test_no_reentrancy(
-	    test_peak_demand_init,
-	    test_peak_demand_update_basic,
-	    test_peak_demand_update_skip_epochs,
-	    test_peak_demand_update_rewrite_optimization,
-	    test_peak_demand_update_out_of_interval,
-	    test_peak_demand_update_static_epoch,
-	    test_peak_demand_update_epoch_advance);
-}

From 1972241cd204c60fb5b66f23c48a117879636161 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 2 Jun 2025 11:24:55 -0700
Subject: [PATCH 298/395] Remove unused options in the batched madvise unit
 tests.

---
 test/unit/hpa_vectorized_madvise.c             | 4 +---
 test/unit/hpa_vectorized_madvise_large_batch.c | 4 +---
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/test/unit/hpa_vectorized_madvise.c b/test/unit/hpa_vectorized_madvise.c
index ae25fdde..6770a9fa 100644
--- a/test/unit/hpa_vectorized_madvise.c
+++ b/test/unit/hpa_vectorized_madvise.c
@@ -37,9 +37,7 @@ static hpa_shard_opts_t test_hpa_shard_opts_default = {
 	/* min_purge_interval_ms */
 	5 * 1000,
 	/* experimental_max_purge_nhp */
-	-1,
-	/* peak_demand_window_ms */
-	0
+	-1
 };
 
 static hpa_shard_t *
diff --git a/test/unit/hpa_vectorized_madvise_large_batch.c b/test/unit/hpa_vectorized_madvise_large_batch.c
index 99ce15f4..561da7a2 100644
--- a/test/unit/hpa_vectorized_madvise_large_batch.c
+++ b/test/unit/hpa_vectorized_madvise_large_batch.c
@@ -37,9 +37,7 @@ static hpa_shard_opts_t test_hpa_shard_opts_default = {
 	/* min_purge_interval_ms */
 	5 * 1000,
 	/* experimental_max_purge_nhp */
-	-1,
-	/* peak_demand_window_ms */
-	0
+	-1
 };
 
 static hpa_shard_t *

From e6864c6075a9fdeea56f788588652f2cefb996b6 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Fri, 14 Mar 2025 06:34:05 -0700
Subject: [PATCH 299/395] [thread_event] Remove macros from thread_event and
 replace with dynamic event objects

---
 Makefile.in                                   |   1 +
 include/jemalloc/internal/peak_event.h        |  17 +-
 include/jemalloc/internal/prof_externs.h      |  63 +++-
 include/jemalloc/internal/prof_threshold.h    |   5 +-
 include/jemalloc/internal/stats.h             |   5 +-
 include/jemalloc/internal/tcache_externs.h    |   3 +
 include/jemalloc/internal/thread_event.h      |  84 +----
 .../jemalloc/internal/thread_event_registry.h |  58 ++++
 include/jemalloc/internal/tsd_internals.h     |  19 +-
 src/peak_event.c                              |  44 +--
 src/prof.c                                    |  54 ++-
 src/prof_threshold.c                          |  14 +-
 src/stats.c                                   |  22 +-
 src/tcache.c                                  |  27 +-
 src/thread_event.c                            | 311 ++++++++++--------
 src/thread_event_registry.c                   |  37 +++
 test/unit/thread_event.c                      |   9 +-
 17 files changed, 455 insertions(+), 318 deletions(-)
 create mode 100644 include/jemalloc/internal/thread_event_registry.h
 create mode 100644 src/thread_event_registry.c

diff --git a/Makefile.in b/Makefile.in
index 7085a22a..2519ed83 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -156,6 +156,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/tcache.c \
 	$(srcroot)src/test_hooks.c \
 	$(srcroot)src/thread_event.c \
+        $(srcroot)src/thread_event_registry.c \
 	$(srcroot)src/ticker.c \
 	$(srcroot)src/tsd.c \
 	$(srcroot)src/util.c \
diff --git a/include/jemalloc/internal/peak_event.h b/include/jemalloc/internal/peak_event.h
index cc2a1401..1e339ff8 100644
--- a/include/jemalloc/internal/peak_event.h
+++ b/include/jemalloc/internal/peak_event.h
@@ -4,6 +4,14 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/tsd_types.h"
 
+/*
+ * Update every 64K by default.  We're not exposing this as a configuration
+ * option for now; we don't want to bind ourselves too tightly to any particular
+ * performance requirements for small values, or guarantee that we'll even be
+ * able to provide fine-grained accuracy.
+ */
+#define PEAK_EVENT_WAIT (64 * 1024)
+
 /*
  * While peak.h contains the simple helper struct that tracks state, this
  * contains the allocator tie-ins (and knows about tsd, the event module, etc.).
@@ -15,13 +23,6 @@ void peak_event_update(tsd_t *tsd);
 void peak_event_zero(tsd_t *tsd);
 uint64_t peak_event_max(tsd_t *tsd);
 
-/* Manual hooks. */
-/* The activity-triggered hooks. */
-uint64_t peak_alloc_new_event_wait(tsd_t *tsd);
-uint64_t peak_alloc_postponed_event_wait(tsd_t *tsd);
-void peak_alloc_event_handler(tsd_t *tsd, uint64_t elapsed);
-uint64_t peak_dalloc_new_event_wait(tsd_t *tsd);
-uint64_t peak_dalloc_postponed_event_wait(tsd_t *tsd);
-void peak_dalloc_event_handler(tsd_t *tsd, uint64_t elapsed);
+extern te_base_cb_t peak_te_handler;
 
 #endif /* JEMALLOC_INTERNAL_PEAK_EVENT_H */
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 789e3811..7d962522 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -5,6 +5,7 @@
 #include "jemalloc/internal/base.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/prof_hook.h"
+#include "jemalloc/internal/thread_event_registry.h"
 
 extern bool opt_prof;
 extern bool opt_prof_active;
@@ -104,9 +105,65 @@ void prof_prefork1(tsdn_t *tsdn);
 void prof_postfork_parent(tsdn_t *tsdn);
 void prof_postfork_child(tsdn_t *tsdn);
 
-/* Only accessed by thread event. */
 uint64_t prof_sample_new_event_wait(tsd_t *tsd);
-uint64_t prof_sample_postponed_event_wait(tsd_t *tsd);
-void prof_sample_event_handler(tsd_t *tsd, uint64_t elapsed);
+uint64_t tsd_prof_sample_event_wait_get(tsd_t *tsd);
+
+/*
+ * The lookahead functionality facilitates events to be able to lookahead, i.e.
+ * without touching the event counters, to determine whether an event would be
+ * triggered.  The event counters are not advanced until the end of the
+ * allocation / deallocation calls, so the lookahead can be useful if some
+ * preparation work for some event must be done early in the allocation /
+ * deallocation calls.
+ *
+ * Currently only the profiling sampling event needs the lookahead
+ * functionality, so we don't yet define general purpose lookahead functions.
+ *
+ * Surplus is a terminology referring to the amount of bytes beyond what's
+ * needed for triggering an event, which can be a useful quantity to have in
+ * general when lookahead is being called.
+ *
+ * This function returns true if allocation of usize would go above the next
+ * trigger for prof event, and false otherwise.
+ * If function returns true surplus will contain number of bytes beyond that
+ * trigger.
+ */
+
+JEMALLOC_ALWAYS_INLINE bool
+te_prof_sample_event_lookahead_surplus(tsd_t *tsd, size_t usize,
+    size_t *surplus) {
+	if (surplus != NULL) {
+		/*
+		 * This is a dead store: the surplus will be overwritten before
+		 * any read.  The initialization suppresses compiler warnings.
+		 * Meanwhile, using SIZE_MAX to initialize is good for
+		 * debugging purpose, because a valid surplus value is strictly
+		 * less than usize, which is at most SIZE_MAX.
+		 */
+		*surplus = SIZE_MAX;
+	}
+	if (unlikely(!tsd_nominal(tsd) || tsd_reentrancy_level_get(tsd) > 0)) {
+		return false;
+	}
+	/* The subtraction is intentionally susceptible to underflow. */
+	uint64_t accumbytes = tsd_thread_allocated_get(tsd) + usize -
+	    tsd_thread_allocated_last_event_get(tsd);
+	uint64_t sample_wait = tsd_prof_sample_event_wait_get(tsd);
+	if (accumbytes < sample_wait) {
+		return false;
+	}
+	assert(accumbytes - sample_wait < (uint64_t)usize);
+	if (surplus != NULL) {
+		*surplus = (size_t)(accumbytes - sample_wait);
+	}
+	return true;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+te_prof_sample_event_lookahead(tsd_t *tsd, size_t usize) {
+	return te_prof_sample_event_lookahead_surplus(tsd, usize, NULL);
+}
+
+extern te_base_cb_t prof_sample_te_handler;
 
 #endif /* JEMALLOC_INTERNAL_PROF_EXTERNS_H */
diff --git a/include/jemalloc/internal/prof_threshold.h b/include/jemalloc/internal/prof_threshold.h
index dc9c8f2b..93e9478e 100644
--- a/include/jemalloc/internal/prof_threshold.h
+++ b/include/jemalloc/internal/prof_threshold.h
@@ -3,9 +3,6 @@
 
 #include "jemalloc/internal/tsd_types.h"
 
-/* The activity-triggered hooks. */
-uint64_t prof_threshold_new_event_wait(tsd_t *tsd);
-uint64_t prof_threshold_postponed_event_wait(tsd_t *tsd);
-void prof_threshold_event_handler(tsd_t *tsd, uint64_t elapsed);
+extern te_base_cb_t prof_threshold_te_handler;
 
 #endif /* JEMALLOC_INTERNAL_THRESHOLD_EVENT_H */
diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h
index 310178ea..a5f1be32 100644
--- a/include/jemalloc/internal/stats.h
+++ b/include/jemalloc/internal/stats.h
@@ -3,6 +3,7 @@
 
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
+#include "jemalloc/internal/thread_event_registry.h"
 #include "jemalloc/internal/tsd_types.h"
 
 /*  OPTION(opt,		var_name,	default,	set_value_to) */
@@ -43,9 +44,7 @@ extern char opt_stats_interval_opts[stats_print_tot_num_options+1];
 #define STATS_INTERVAL_ACCUM_BATCH_MAX (4 << 20)
 
 /* Only accessed by thread event. */
-uint64_t stats_interval_new_event_wait(tsd_t *tsd);
-uint64_t stats_interval_postponed_event_wait(tsd_t *tsd);
-void stats_interval_event_handler(tsd_t *tsd, uint64_t elapsed);
+extern te_base_cb_t stats_interval_te_handler;
 
 /* Implements je_malloc_stats_print. */
 void stats_print(write_cb_t *write_cb, void *cbopaque, const char *opts);
diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index 732adacb..024314fe 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -6,6 +6,7 @@
 #include "jemalloc/internal/cache_bin.h"
 #include "jemalloc/internal/sz.h"
 #include "jemalloc/internal/tcache_types.h"
+#include "jemalloc/internal/thread_event_registry.h"
 
 extern bool opt_tcache;
 extern size_t opt_tcache_max;
@@ -89,4 +90,6 @@ uint64_t tcache_gc_dalloc_new_event_wait(tsd_t *tsd);
 uint64_t tcache_gc_dalloc_postponed_event_wait(tsd_t *tsd);
 void tcache_gc_dalloc_event_handler(tsd_t *tsd, uint64_t elapsed);
 
+extern te_base_cb_t tcache_gc_te_handler;
+
 #endif /* JEMALLOC_INTERNAL_TCACHE_EXTERNS_H */
diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h
index ad46ffe7..e9631cbd 100644
--- a/include/jemalloc/internal/thread_event.h
+++ b/include/jemalloc/internal/thread_event.h
@@ -49,29 +49,12 @@ void te_event_trigger(tsd_t *tsd, te_ctx_t *ctx);
 void te_recompute_fast_threshold(tsd_t *tsd);
 void tsd_te_init(tsd_t *tsd);
 
-/*
- * List of all events, in the following format:
- *  E(event,		(condition), is_alloc_event)
- */
-#define ITERATE_OVER_ALL_EVENTS						\
-    E(tcache_gc,		(opt_tcache_gc_incr_bytes > 0), true)	\
-    E(prof_sample,		(config_prof && opt_prof), true)  	\
-    E(prof_threshold,		config_stats, true)  			\
-    E(stats_interval,		(opt_stats_interval >= 0), true)   	\
-    E(tcache_gc_dalloc,		(opt_tcache_gc_incr_bytes > 0), false)	\
-    E(peak_alloc,		config_stats, true)			\
-    E(peak_dalloc,		config_stats, false)
-
-#define E(event, condition_unused, is_alloc_event_unused)		\
-    C(event##_event_wait)
-
 /* List of all thread event counters. */
-#define ITERATE_OVER_ALL_COUNTERS					\
-    C(thread_allocated)							\
-    C(thread_allocated_last_event)					\
-    ITERATE_OVER_ALL_EVENTS						\
-    C(prof_sample_last_event)						\
-    C(stats_interval_last_event)
+#define ITERATE_OVER_ALL_COUNTERS                                       \
+        C(thread_allocated)						\
+	C(thread_allocated_last_event)					\
+	C(prof_sample_last_event)					\
+	C(stats_interval_last_event)
 
 /* Getters directly wrap TSD getters. */
 #define C(counter)							\
@@ -99,12 +82,6 @@ counter##_set(tsd_t *tsd, uint64_t v) {					\
 ITERATE_OVER_ALL_COUNTERS
 #undef C
 
-/*
- * For generating _event_wait getter / setter functions for each individual
- * event.
- */
-#undef E
-
 /*
  * The malloc and free fastpath getters -- use the unsafe getters since tsd may
  * be non-nominal, in which case the fast_threshold will be set to 0.  This
@@ -221,57 +198,6 @@ te_ctx_get(tsd_t *tsd, te_ctx_t *ctx, bool is_alloc) {
 	}
 }
 
-/*
- * The lookahead functionality facilitates events to be able to lookahead, i.e.
- * without touching the event counters, to determine whether an event would be
- * triggered.  The event counters are not advanced until the end of the
- * allocation / deallocation calls, so the lookahead can be useful if some
- * preparation work for some event must be done early in the allocation /
- * deallocation calls.
- *
- * Currently only the profiling sampling event needs the lookahead
- * functionality, so we don't yet define general purpose lookahead functions.
- *
- * Surplus is a terminology referring to the amount of bytes beyond what's
- * needed for triggering an event, which can be a useful quantity to have in
- * general when lookahead is being called.
- */
-
-JEMALLOC_ALWAYS_INLINE bool
-te_prof_sample_event_lookahead_surplus(tsd_t *tsd, size_t usize,
-    size_t *surplus) {
-	if (surplus != NULL) {
-		/*
-		 * This is a dead store: the surplus will be overwritten before
-		 * any read.  The initialization suppresses compiler warnings.
-		 * Meanwhile, using SIZE_MAX to initialize is good for
-		 * debugging purpose, because a valid surplus value is strictly
-		 * less than usize, which is at most SIZE_MAX.
-		 */
-		*surplus = SIZE_MAX;
-	}
-	if (unlikely(!tsd_nominal(tsd) || tsd_reentrancy_level_get(tsd) > 0)) {
-		return false;
-	}
-	/* The subtraction is intentionally susceptible to underflow. */
-	uint64_t accumbytes = tsd_thread_allocated_get(tsd) + usize -
-	    tsd_thread_allocated_last_event_get(tsd);
-	uint64_t sample_wait = tsd_prof_sample_event_wait_get(tsd);
-	if (accumbytes < sample_wait) {
-		return false;
-	}
-	assert(accumbytes - sample_wait < (uint64_t)usize);
-	if (surplus != NULL) {
-		*surplus = (size_t)(accumbytes - sample_wait);
-	}
-	return true;
-}
-
-JEMALLOC_ALWAYS_INLINE bool
-te_prof_sample_event_lookahead(tsd_t *tsd, size_t usize) {
-	return te_prof_sample_event_lookahead_surplus(tsd, usize, NULL);
-}
-
 JEMALLOC_ALWAYS_INLINE void
 te_event_advance(tsd_t *tsd, size_t usize, bool is_alloc) {
 	te_assert_invariants(tsd);
diff --git a/include/jemalloc/internal/thread_event_registry.h b/include/jemalloc/internal/thread_event_registry.h
new file mode 100644
index 00000000..aee7a4f2
--- /dev/null
+++ b/include/jemalloc/internal/thread_event_registry.h
@@ -0,0 +1,58 @@
+#ifndef JEMALLOC_INTERNAL_THREAD_EVENT_REGISTRY_H
+#define JEMALLOC_INTERNAL_THREAD_EVENT_REGISTRY_H
+
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/tsd.h"
+
+/* "te" is short for "thread_event" */
+enum te_alloc_e {
+#ifdef JEMALLOC_PROF
+    te_alloc_prof_sample,
+#endif
+    te_alloc_stats_interval,
+#ifdef JEMALLOC_STATS
+    te_alloc_prof_threshold,
+#endif
+    te_alloc_tcache_gc,
+#ifdef JEMALLOC_STATS
+    te_alloc_peak,
+    te_alloc_last = te_alloc_peak,
+#else
+    te_alloc_last = te_alloc_tcache_gc,
+#endif
+    te_alloc_count = te_alloc_last + 1
+};
+typedef enum te_alloc_e te_alloc_t;
+
+enum te_dalloc_e {
+    te_dalloc_tcache_gc,
+#ifdef JEMALLOC_STATS
+    te_dalloc_peak,
+    te_dalloc_last = te_dalloc_peak,
+#else
+    te_dalloc_last = te_dalloc_tcache_gc,
+#endif
+    te_dalloc_count = te_dalloc_last + 1
+};
+typedef enum te_dalloc_e te_dalloc_t;
+
+/* These will live in tsd */
+typedef struct te_data_s te_data_t;
+struct te_data_s {
+	uint64_t alloc_wait[te_alloc_count];
+	uint64_t dalloc_wait[te_dalloc_count];
+};
+#define TE_DATA_INITIALIZER { {0}, {0} }
+
+typedef struct te_base_cb_s te_base_cb_t;
+struct te_base_cb_s {
+    bool (*enabled)(void);
+    uint64_t (*new_event_wait)(tsd_t *tsd);
+    uint64_t (*postponed_event_wait)(tsd_t *tsd);
+    void (*event_handler)(tsd_t *tsd);
+};
+
+extern te_base_cb_t *te_alloc_handlers[te_alloc_count];
+extern te_base_cb_t *te_dalloc_handlers[te_dalloc_count];
+
+#endif /* JEMALLOC_INTERNAL_THREAD_EVENT_REGISTRY_H */
diff --git a/include/jemalloc/internal/tsd_internals.h b/include/jemalloc/internal/tsd_internals.h
index 0ed33234..69b60519 100644
--- a/include/jemalloc/internal/tsd_internals.h
+++ b/include/jemalloc/internal/tsd_internals.h
@@ -15,6 +15,7 @@
 #include "jemalloc/internal/rtree_tsd.h"
 #include "jemalloc/internal/tcache_structs.h"
 #include "jemalloc/internal/tcache_types.h"
+#include "jemalloc/internal/thread_event_registry.h"
 #include "jemalloc/internal/tsd_types.h"
 #include "jemalloc/internal/util.h"
 #include "jemalloc/internal/witness.h"
@@ -68,15 +69,9 @@ typedef ql_elm(tsd_t) tsd_link_t;
     O(thread_allocated_next_event,	uint64_t,	uint64_t)	\
     O(thread_deallocated_last_event,	uint64_t,	uint64_t)	\
     O(thread_deallocated_next_event,	uint64_t,	uint64_t)	\
-    O(tcache_gc_event_wait,	uint64_t,		uint64_t)	\
-    O(tcache_gc_dalloc_event_wait,	uint64_t,	uint64_t)	\
-    O(prof_sample_event_wait,	uint64_t,		uint64_t)	\
+    O(te_data, 			te_data_t,		te_data_t)	\
     O(prof_sample_last_event,	uint64_t,		uint64_t)	\
-    O(prof_threshold_event_wait,	uint64_t,	uint64_t)	\
-    O(stats_interval_event_wait,	uint64_t,	uint64_t)	\
-    O(stats_interval_last_event,	uint64_t,	uint64_t)	\
-    O(peak_alloc_event_wait,	uint64_t,		uint64_t)	\
-    O(peak_dalloc_event_wait,	uint64_t,	uint64_t)		\
+    O(stats_interval_last_event, uint64_t, 		uint64_t)	\
     O(prof_tdata,		prof_tdata_t *,		prof_tdata_t *)	\
     O(prng_state,		uint64_t,		uint64_t)	\
     O(san_extents_until_guard_small,	uint64_t,	uint64_t)	\
@@ -102,15 +97,9 @@ typedef ql_elm(tsd_t) tsd_link_t;
     /* thread_allocated_next_event */	0,				\
     /* thread_deallocated_last_event */	0,				\
     /* thread_deallocated_next_event */	0,				\
-    /* tcache_gc_event_wait */		0,				\
-    /* tcache_gc_dalloc_event_wait */	0,				\
-    /* prof_sample_event_wait */	0,				\
+    /* te_data */			TE_DATA_INITIALIZER,   		\
     /* prof_sample_last_event */	0,				\
-    /* prof_threshold_event_wait */	0,				\
-    /* stats_interval_event_wait */	0,				\
     /* stats_interval_last_event */	0,				\
-    /* peak_alloc_event_wait */		0,				\
-    /* peak_dalloc_event_wait */	0,				\
     /* prof_tdata */		NULL,					\
     /* prng_state */		0,					\
     /* san_extents_until_guard_small */	0,				\
diff --git a/src/peak_event.c b/src/peak_event.c
index 4093fbcc..430bfdea 100644
--- a/src/peak_event.c
+++ b/src/peak_event.c
@@ -5,14 +5,7 @@
 
 #include "jemalloc/internal/activity_callback.h"
 #include "jemalloc/internal/peak.h"
-
-/*
- * Update every 64K by default.  We're not exposing this as a configuration
- * option for now; we don't want to bind ourselves too tightly to any particular
- * performance requirements for small values, or guarantee that we'll even be
- * able to provide fine-grained accuracy.
- */
-#define PEAK_EVENT_WAIT (64 * 1024)
+#include "jemalloc/internal/thread_event_registry.h"
 
 /* Update the peak with current tsd state. */
 void
@@ -49,34 +42,31 @@ peak_event_max(tsd_t *tsd) {
 	return peak_max(peak);
 }
 
-uint64_t
-peak_alloc_new_event_wait(tsd_t *tsd) {
+static uint64_t
+peak_event_new_event_wait(tsd_t *tsd) {
 	return PEAK_EVENT_WAIT;
 }
 
-uint64_t
-peak_alloc_postponed_event_wait(tsd_t *tsd) {
+static uint64_t
+peak_event_postponed_event_wait(tsd_t *tsd) {
 	return TE_MIN_START_WAIT;
 }
 
-void
-peak_alloc_event_handler(tsd_t *tsd, uint64_t elapsed) {
+static void
+peak_event_handler(tsd_t *tsd) {
 	peak_event_update(tsd);
 	peak_event_activity_callback(tsd);
 }
 
-uint64_t
-peak_dalloc_new_event_wait(tsd_t *tsd) {
-	return PEAK_EVENT_WAIT;
+static bool
+peak_event_enabled(void) {
+	return config_stats;
 }
 
-uint64_t
-peak_dalloc_postponed_event_wait(tsd_t *tsd) {
-	return TE_MIN_START_WAIT;
-}
-
-void
-peak_dalloc_event_handler(tsd_t *tsd, uint64_t elapsed) {
-	peak_event_update(tsd);
-	peak_event_activity_callback(tsd);
-}
+/* Handles alloc and dalloc */
+te_base_cb_t peak_te_handler = {
+	.enabled = &peak_event_enabled,
+	.new_event_wait = &peak_event_new_event_wait,
+	.postponed_event_wait = &peak_event_postponed_event_wait,
+	.event_handler = &peak_event_handler,
+};
diff --git a/src/prof.c b/src/prof.c
index 8fdc6f71..94eddb6d 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -12,6 +12,7 @@
 #include "jemalloc/internal/prof_sys.h"
 #include "jemalloc/internal/prof_hook.h"
 #include "jemalloc/internal/thread_event.h"
+#include "jemalloc/internal/thread_event_registry.h"
 
 /*
  * This file implements the profiling "APIs" needed by other parts of jemalloc,
@@ -289,8 +290,40 @@ prof_sample_new_event_wait(tsd_t *tsd) {
 #endif
 }
 
+void
+prof_sample_event_handler(tsd_t *tsd) {
+	cassert(config_prof);
+	if (prof_interval == 0 || !prof_active_get_unlocked()) {
+		return;
+	}
+	uint64_t last_event = thread_allocated_last_event_get(tsd);
+	uint64_t last_sample_event = tsd_prof_sample_last_event_get(tsd);
+	tsd_prof_sample_last_event_set(tsd, last_event);
+	uint64_t elapsed = last_event - last_sample_event;
+	assert(elapsed > 0 && elapsed != TE_INVALID_ELAPSED);
+	if (counter_accum(tsd_tsdn(tsd), &prof_idump_accumulated, elapsed)) {
+		prof_idump(tsd_tsdn(tsd));
+	}
+}
+
+static bool
+prof_sample_enabled(void) {
+	return config_prof && opt_prof;
+}
+
 uint64_t
-prof_sample_postponed_event_wait(tsd_t *tsd) {
+tsd_prof_sample_event_wait_get(tsd_t *tsd) {
+#ifdef JEMALLOC_PROF
+	return tsd_te_datap_get_unsafe(tsd)->alloc_wait[te_alloc_prof_sample];
+#else
+	not_reached();
+	return TE_MAX_START_WAIT;
+#endif
+}
+
+te_base_cb_t prof_sample_te_handler = {
+	.enabled = &prof_sample_enabled,
+	.new_event_wait = &prof_sample_new_event_wait,
 	/*
 	 * The postponed wait time for prof sample event is computed as if we
 	 * want a new wait time (i.e. as if the event were triggered).  If we
@@ -298,21 +331,10 @@ prof_sample_postponed_event_wait(tsd_t *tsd) {
 	 * handling the other events, then we can have sampling bias, if e.g.
 	 * the allocation immediately following a reentrancy always comes from
 	 * the same stack trace.
-	 */
-	return prof_sample_new_event_wait(tsd);
-}
-
-void
-prof_sample_event_handler(tsd_t *tsd, uint64_t elapsed) {
-	cassert(config_prof);
-	assert(elapsed > 0 && elapsed != TE_INVALID_ELAPSED);
-	if (prof_interval == 0 || !prof_active_get_unlocked()) {
-		return;
-	}
-	if (counter_accum(tsd_tsdn(tsd), &prof_idump_accumulated, elapsed)) {
-		prof_idump(tsd_tsdn(tsd));
-	}
-}
+	*/
+	.postponed_event_wait = &prof_sample_new_event_wait,
+	.event_handler = &prof_sample_event_handler,
+};
 
 static void
 prof_fdump(void) {
diff --git a/src/prof_threshold.c b/src/prof_threshold.c
index 28a525fc..516b0bf6 100644
--- a/src/prof_threshold.c
+++ b/src/prof_threshold.c
@@ -52,6 +52,18 @@ prof_threshold_postponed_event_wait(tsd_t *tsd) {
 }
 
 void
-prof_threshold_event_handler(tsd_t *tsd, uint64_t elapsed) {
+prof_threshold_event_handler(tsd_t *tsd) {
 	prof_threshold_update(tsd);
 }
+
+static bool
+prof_threshold_enabled(void) {
+	return config_stats;
+}
+
+te_base_cb_t prof_threshold_te_handler = {
+	.enabled = &prof_threshold_enabled,
+	.new_event_wait = &prof_threshold_new_event_wait,
+	.postponed_event_wait = &prof_threshold_postponed_event_wait,
+	.event_handler = &prof_threshold_event_handler,
+};
diff --git a/src/stats.c b/src/stats.c
index 8496e457..efc73223 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -65,7 +65,7 @@ char opt_stats_interval_opts[stats_print_tot_num_options+1] = "";
 
 static counter_accum_t stats_interval_accumulated;
 /* Per thread batch accum size for stats_interval. */
-static uint64_t stats_interval_accum_batch;
+uint64_t stats_interval_accum_batch;
 
 /******************************************************************************/
 
@@ -2128,7 +2128,12 @@ stats_interval_postponed_event_wait(tsd_t *tsd) {
 }
 
 void
-stats_interval_event_handler(tsd_t *tsd, uint64_t elapsed) {
+stats_interval_event_handler(tsd_t *tsd) {
+	uint64_t last_event = thread_allocated_last_event_get(tsd);
+	uint64_t last_sample_event = tsd_stats_interval_last_event_get(tsd);
+	tsd_stats_interval_last_event_set(tsd, last_event);
+	uint64_t elapsed = last_event - last_sample_event;
+
 	assert(elapsed > 0 && elapsed != TE_INVALID_ELAPSED);
 	if (counter_accum(tsd_tsdn(tsd), &stats_interval_accumulated,
 	    elapsed)) {
@@ -2136,6 +2141,19 @@ stats_interval_event_handler(tsd_t *tsd, uint64_t elapsed) {
 	}
 }
 
+static bool
+stats_interval_enabled(void) {
+	return opt_stats_interval >= 0;
+}
+
+te_base_cb_t stats_interval_te_handler = {
+	.enabled = &stats_interval_enabled,
+	.new_event_wait = &stats_interval_new_event_wait,
+	.postponed_event_wait = &stats_interval_postponed_event_wait,
+	.event_handler = &stats_interval_event_handler,
+};
+
+
 bool
 stats_boot(void) {
 	uint64_t stats_interval;
diff --git a/src/tcache.c b/src/tcache.c
index 270d38ac..36af7d97 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -511,7 +511,7 @@ tcache_try_gc_bin(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 }
 
 static void
-tcache_event(tsd_t *tsd) {
+tcache_gc_event(tsd_t *tsd) {
 	tcache_t *tcache = tcache_get(tsd);
 	if (tcache == NULL) {
 		return;
@@ -581,18 +581,6 @@ tcache_event(tsd_t *tsd) {
 	tcache_slow->next_gc_bin_large = szind_large;
 }
 
-void
-tcache_gc_event_handler(tsd_t *tsd, uint64_t elapsed) {
-	assert(elapsed == TE_INVALID_ELAPSED);
-	tcache_event(tsd);
-}
-
-void
-tcache_gc_dalloc_event_handler(tsd_t *tsd, uint64_t elapsed) {
-	assert(elapsed == TE_INVALID_ELAPSED);
-	tcache_event(tsd);
-}
-
 void *
 tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena,
     tcache_t *tcache, cache_bin_t *cache_bin, szind_t binind,
@@ -1912,3 +1900,16 @@ tcache_postfork_child(tsdn_t *tsdn) {
 void tcache_assert_initialized(tcache_t *tcache) {
 	assert(!cache_bin_still_zero_initialized(&tcache->bins[0]));
 }
+
+static bool
+tcache_gc_enabled(void) {
+    return (opt_tcache_gc_incr_bytes > 0);
+}
+
+/* Handles alloc and dalloc the same way */
+te_base_cb_t tcache_gc_te_handler = {
+	.enabled = &tcache_gc_enabled,
+	.new_event_wait = &tcache_gc_new_event_wait,
+	.postponed_event_wait = &tcache_gc_postponed_event_wait,
+	.event_handler = &tcache_gc_event,
+};
diff --git a/src/thread_event.c b/src/thread_event.c
index a8276cd7..0b1adcc1 100644
--- a/src/thread_event.c
+++ b/src/thread_event.c
@@ -2,108 +2,46 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/thread_event.h"
-
-/*
- * Signatures for event specific functions.  These functions should be defined
- * by the modules owning each event.  The signatures here verify that the
- * definitions follow the right format.
- *
- * The first two are functions computing new / postponed event wait time.  New
- * event wait time is the time till the next event if an event is currently
- * being triggered; postponed event wait time is the time till the next event
- * if an event should be triggered but needs to be postponed, e.g. when the TSD
- * is not nominal or during reentrancy.
- *
- * The third is the event handler function, which is called whenever an event
- * is triggered.  The parameter is the elapsed time since the last time an
- * event of the same type was triggered.
- */
-#define E(event, condition_unused, is_alloc_event_unused)		\
-uint64_t event##_new_event_wait(tsd_t *tsd);				\
-uint64_t event##_postponed_event_wait(tsd_t *tsd);			\
-void event##_event_handler(tsd_t *tsd, uint64_t elapsed);
-
-ITERATE_OVER_ALL_EVENTS
-#undef E
-
-/* Signatures for internal functions fetching elapsed time. */
-#define E(event, condition_unused, is_alloc_event_unused)		\
-static uint64_t event##_fetch_elapsed(tsd_t *tsd);
-
-ITERATE_OVER_ALL_EVENTS
-#undef E
-
-static uint64_t
-tcache_gc_fetch_elapsed(tsd_t *tsd) {
-	return TE_INVALID_ELAPSED;
-}
-
-static uint64_t
-tcache_gc_dalloc_fetch_elapsed(tsd_t *tsd) {
-	return TE_INVALID_ELAPSED;
-}
-
-static uint64_t
-prof_sample_fetch_elapsed(tsd_t *tsd) {
-	uint64_t last_event = thread_allocated_last_event_get(tsd);
-	uint64_t last_sample_event = prof_sample_last_event_get(tsd);
-	prof_sample_last_event_set(tsd, last_event);
-	return last_event - last_sample_event;
-}
-
-static uint64_t
-stats_interval_fetch_elapsed(tsd_t *tsd) {
-	uint64_t last_event = thread_allocated_last_event_get(tsd);
-	uint64_t last_stats_event = stats_interval_last_event_get(tsd);
-	stats_interval_last_event_set(tsd, last_event);
-	return last_event - last_stats_event;
-}
-
-static uint64_t
-peak_alloc_fetch_elapsed(tsd_t *tsd) {
-	return TE_INVALID_ELAPSED;
-}
-
-static uint64_t
-peak_dalloc_fetch_elapsed(tsd_t *tsd) {
-	return TE_INVALID_ELAPSED;
-}
-
-static uint64_t
-prof_threshold_fetch_elapsed(tsd_t *tsd) {
-	return TE_INVALID_ELAPSED;
-}
-
-/* Per event facilities done. */
+#include "jemalloc/internal/thread_event_registry.h"
+#include "jemalloc/internal/peak_event.h"
 
 static bool
 te_ctx_has_active_events(te_ctx_t *ctx) {
 	assert(config_debug);
-#define E(event, condition, alloc_event)			       \
-	if (condition && alloc_event == ctx->is_alloc) {	       \
-		return true;					       \
+	if (ctx->is_alloc) {
+		for (int i = 0; i < te_alloc_count; ++i) {
+			if (te_alloc_handlers[i]->enabled()) {
+				return true;
+			}
+		}
+	} else {
+		for (int i = 0; i < te_dalloc_count; ++i) {
+			if (te_dalloc_handlers[i]->enabled()) {
+				return true;
+			}
+		}
 	}
-	ITERATE_OVER_ALL_EVENTS
-#undef E
 	return false;
 }
 
 static uint64_t
 te_next_event_compute(tsd_t *tsd, bool is_alloc) {
+	te_base_cb_t **handlers = is_alloc ? te_alloc_handlers : te_dalloc_handlers;
+	uint64_t *waits = is_alloc ? tsd_te_datap_get_unsafe(tsd)->alloc_wait : tsd_te_datap_get_unsafe(tsd)->dalloc_wait;
+	int count = is_alloc ? te_alloc_count : te_dalloc_count;
+	
 	uint64_t wait = TE_MAX_START_WAIT;
-#define E(event, condition, alloc_event)				\
-	if (is_alloc == alloc_event && condition) {			\
-		uint64_t event_wait =					\
-		    event##_event_wait_get(tsd);			\
-		assert(event_wait <= TE_MAX_START_WAIT);		\
-		if (event_wait > 0U && event_wait < wait) {		\
-			wait = event_wait;				\
-		}							\
+
+	for (int i = 0; i < count; i++) {
+		if (handlers[i]->enabled()) {
+			uint64_t ev_wait = waits[i];
+			assert(ev_wait <= TE_MAX_START_WAIT);
+			if (ev_wait > 0U && ev_wait < wait) {
+				wait = ev_wait;
+			}
+		}
 	}
 
-	ITERATE_OVER_ALL_EVENTS
-#undef E
-	assert(wait <= TE_MAX_START_WAIT);
 	return wait;
 }
 
@@ -238,18 +176,132 @@ te_adjust_thresholds_helper(tsd_t *tsd, te_ctx_t *ctx,
 	te_ctx_next_event_set(tsd, ctx, next_event);
 }
 
-static uint64_t
-te_clip_event_wait(uint64_t event_wait) {
-	assert(event_wait > 0U);
-	if (TE_MIN_START_WAIT > 1U &&
-	    unlikely(event_wait < TE_MIN_START_WAIT)) {
-		event_wait = TE_MIN_START_WAIT;
+static void
+te_init_waits(tsd_t *tsd, uint64_t *wait, bool is_alloc) {
+	te_base_cb_t **handlers = is_alloc ? te_alloc_handlers : te_dalloc_handlers;
+	uint64_t *waits = is_alloc ? tsd_te_datap_get_unsafe(tsd)->alloc_wait : tsd_te_datap_get_unsafe(tsd)->dalloc_wait;
+	int count = is_alloc ? te_alloc_count : te_dalloc_count;
+	for (int i = 0; i < count; i++) {
+		if (handlers[i]->enabled()) {
+			uint64_t ev_wait = handlers[i]->new_event_wait(tsd);
+			assert(ev_wait > 0);
+			waits[i] = ev_wait;
+			if (ev_wait < *wait) {
+				*wait = ev_wait;
+			}
+		}
 	}
-	if (TE_MAX_START_WAIT < UINT64_MAX &&
-	    unlikely(event_wait > TE_MAX_START_WAIT)) {
-		event_wait = TE_MAX_START_WAIT;
+}
+
+static inline bool
+te_update_wait(tsd_t *tsd, uint64_t accumbytes, bool allow,
+	       uint64_t *ev_wait, uint64_t *wait, te_base_cb_t *handler,
+	       uint64_t new_wait) {
+	bool ret = false;
+	if (*ev_wait > accumbytes) {
+                *ev_wait -= accumbytes;
+        } else if (!allow) {
+                *ev_wait = handler->postponed_event_wait(tsd);
+        } else {
+                ret = true;
+                *ev_wait = new_wait == 0 ?
+		    handler->new_event_wait(tsd) :
+		    new_wait;
+        }
+
+        assert(*ev_wait > 0);
+        if (*ev_wait < *wait) {
+                *wait = *ev_wait;
+        }
+	return ret;
+}
+
+extern uint64_t stats_interval_accum_batch;
+/* Return number of handlers enqueued into to_trigger array */
+static inline size_t
+te_update_alloc_events(tsd_t *tsd, te_base_cb_t **to_trigger,
+		       uint64_t accumbytes, bool allow, uint64_t *wait) {
+	/*
+	 * We do not loop and invoke the functions via interface because
+	 * of the perf cost.  This path is relatively hot, so we sacrifice
+	 * elegance for perf.
+	 */
+	size_t nto_trigger = 0;
+	uint64_t *waits = tsd_te_datap_get_unsafe(tsd)->alloc_wait;
+	if (opt_tcache_gc_incr_bytes > 0) {
+		assert(te_alloc_handlers[te_alloc_tcache_gc]->enabled());
+		if (te_update_wait(tsd, accumbytes, allow,
+				   &waits[te_alloc_tcache_gc], wait,
+				   te_alloc_handlers[te_alloc_tcache_gc],
+				   opt_tcache_gc_incr_bytes)) {
+			to_trigger[nto_trigger++] =
+			    te_alloc_handlers[te_alloc_tcache_gc];
+		}
 	}
-	return event_wait;
+#ifdef JEMALLOC_PROF
+        if (opt_prof) {
+		assert(te_alloc_handlers[te_alloc_prof_sample]->enabled());
+		if(te_update_wait(tsd, accumbytes, allow,
+				  &waits[te_alloc_prof_sample], wait,
+				  te_alloc_handlers[te_alloc_prof_sample], 0)) {
+			to_trigger[nto_trigger++] =
+			    te_alloc_handlers[te_alloc_prof_sample];
+		}
+	}
+#endif
+	if (opt_stats_interval >= 0) {
+		if (te_update_wait(tsd, accumbytes, allow,
+				   &waits[te_alloc_stats_interval],
+				   wait,
+				   te_alloc_handlers[te_alloc_stats_interval],
+				   stats_interval_accum_batch)) {
+			assert(te_alloc_handlers[te_alloc_stats_interval]->enabled());
+			to_trigger[nto_trigger++] =
+			    te_alloc_handlers[te_alloc_stats_interval];
+		}
+	}
+
+#ifdef JEMALLOC_STATS
+	assert(te_alloc_handlers[te_alloc_peak]->enabled());
+ 	if(te_update_wait(tsd, accumbytes, allow, &waits[te_alloc_peak], wait,
+			  te_alloc_handlers[te_alloc_peak], PEAK_EVENT_WAIT)) {
+		to_trigger[nto_trigger++] = te_alloc_handlers[te_alloc_peak];
+ 	}
+
+        assert(te_alloc_handlers[te_alloc_prof_threshold]->enabled());
+        if(te_update_wait(tsd, accumbytes, allow,
+			  &waits[te_alloc_prof_threshold], wait,
+			  te_alloc_handlers[te_alloc_prof_threshold],
+			  1 << opt_experimental_lg_prof_threshold)) {
+		to_trigger[nto_trigger++] = te_alloc_handlers[te_alloc_prof_threshold];
+ 	}
+#endif
+	return nto_trigger;
+}
+
+static inline size_t
+te_update_dalloc_events(tsd_t *tsd, te_base_cb_t **to_trigger, uint64_t accumbytes,
+			bool allow, uint64_t *wait) {
+	size_t nto_trigger = 0;
+	uint64_t *waits = tsd_te_datap_get_unsafe(tsd)->dalloc_wait;
+	if (opt_tcache_gc_incr_bytes > 0) {
+		assert(te_dalloc_handlers[te_dalloc_tcache_gc]->enabled());
+		if (te_update_wait(tsd, accumbytes, allow,
+				   &waits[te_dalloc_tcache_gc], wait,
+				   te_dalloc_handlers[te_dalloc_tcache_gc],
+				   opt_tcache_gc_incr_bytes)) {
+			to_trigger[nto_trigger++] =
+			    te_dalloc_handlers[te_dalloc_tcache_gc];
+		}
+        }
+#ifdef JEMALLOC_STATS
+	assert(te_dalloc_handlers[te_dalloc_peak]->enabled());
+        if(te_update_wait(tsd, accumbytes, allow, &waits[te_dalloc_peak], wait,
+			  te_dalloc_handlers[te_dalloc_peak], PEAK_EVENT_WAIT)) {
+		to_trigger[nto_trigger++] = te_dalloc_handlers[te_dalloc_peak];
+ 	}
+#endif
+	return nto_trigger;
 }
 
 void
@@ -263,47 +315,32 @@ te_event_trigger(tsd_t *tsd, te_ctx_t *ctx) {
 
 	bool allow_event_trigger = tsd_nominal(tsd) &&
 	    tsd_reentrancy_level_get(tsd) == 0;
-	bool is_alloc = ctx->is_alloc;
 	uint64_t wait = TE_MAX_START_WAIT;
 
-#define E(event, condition, alloc_event)				\
-	bool is_##event##_triggered = false;				\
-	if (is_alloc == alloc_event && condition) {			\
-		uint64_t event_wait = event##_event_wait_get(tsd);	\
-		assert(event_wait <= TE_MAX_START_WAIT);		\
-		if (event_wait > accumbytes) {				\
-			event_wait -= accumbytes;			\
-		} else if (!allow_event_trigger) {			\
-			event_wait = event##_postponed_event_wait(tsd);	\
-		} else {						\
-			is_##event##_triggered = true;			\
-			event_wait = event##_new_event_wait(tsd);	\
-		}							\
-		event_wait = te_clip_event_wait(event_wait);		\
-		event##_event_wait_set(tsd, event_wait);		\
-		if (event_wait < wait) {				\
-			wait = event_wait;				\
-		}							\
+	assert((int)te_alloc_count >= (int) te_dalloc_count);
+	te_base_cb_t *to_trigger[te_alloc_count];
+	size_t nto_trigger;
+	if (ctx->is_alloc) {
+		nto_trigger = te_update_alloc_events(tsd, to_trigger,
+						     accumbytes,
+						     allow_event_trigger,
+						     &wait);
+	} else {
+		nto_trigger = te_update_dalloc_events(tsd, to_trigger,
+						      accumbytes,
+						      allow_event_trigger,
+						      &wait);
 	}
 
-	ITERATE_OVER_ALL_EVENTS
-#undef E
-
-	assert(wait <= TE_MAX_START_WAIT);
+        assert(wait <= TE_MAX_START_WAIT);
 	te_adjust_thresholds_helper(tsd, ctx, wait);
 	te_assert_invariants(tsd);
 
-#define E(event, condition, alloc_event)				\
-	if (is_alloc == alloc_event && condition &&			\
-	    is_##event##_triggered) {					\
-		assert(allow_event_trigger);				\
-		uint64_t elapsed = event##_fetch_elapsed(tsd);		\
-		event##_event_handler(tsd, elapsed);			\
+	for (size_t i = 0; i < nto_trigger; i++) {
+		assert(allow_event_trigger);
+		to_trigger[i]->event_handler(tsd);
 	}
 
-	ITERATE_OVER_ALL_EVENTS
-#undef E
-
 	te_assert_invariants(tsd);
 }
 
@@ -323,18 +360,8 @@ te_init(tsd_t *tsd, bool is_alloc) {
 	te_ctx_last_event_set(&ctx, te_ctx_current_bytes_get(&ctx));
 
 	uint64_t wait = TE_MAX_START_WAIT;
-#define E(event, condition, alloc_event)				\
-	if (is_alloc == alloc_event && condition) {			\
-		uint64_t event_wait = event##_new_event_wait(tsd);	\
-		event_wait = te_clip_event_wait(event_wait);		\
-		event##_event_wait_set(tsd, event_wait);		\
-		if (event_wait < wait) {				\
-			wait = event_wait;				\
-		}							\
-	}
+	te_init_waits(tsd, &wait, is_alloc);
 
-	ITERATE_OVER_ALL_EVENTS
-#undef E
 	te_adjust_thresholds_helper(tsd, &ctx, wait);
 }
 
diff --git a/src/thread_event_registry.c b/src/thread_event_registry.c
new file mode 100644
index 00000000..7543cfda
--- /dev/null
+++ b/src/thread_event_registry.c
@@ -0,0 +1,37 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/thread_event.h"
+#include "jemalloc/internal/thread_event_registry.h"
+#include "jemalloc/internal/thread_event_registry.h"
+#include "jemalloc/internal/tcache_externs.h"
+#include "jemalloc/internal/peak_event.h"
+#include "jemalloc/internal/prof_externs.h"
+#include "jemalloc/internal/prof_threshold.h"
+#include "jemalloc/internal/stats.h"
+
+
+/* Table of all the thread events.
+ *  Events share interface, but internally they will know thier
+ *  data layout in tsd.
+ */
+te_base_cb_t *te_alloc_handlers[te_alloc_count] = {
+#ifdef JEMALLOC_PROF
+    &prof_sample_te_handler,
+#endif
+    &stats_interval_te_handler,
+#ifdef JEMALLOC_STATS
+    &prof_threshold_te_handler,
+#endif
+    &tcache_gc_te_handler,
+#ifdef JEMALLOC_STATS
+    &peak_te_handler,
+#endif
+};
+
+te_base_cb_t *te_dalloc_handlers[te_dalloc_count] = {
+	&tcache_gc_te_handler,
+#ifdef JEMALLOC_STATS
+	&peak_te_handler,
+#endif
+};
diff --git a/test/unit/thread_event.c b/test/unit/thread_event.c
index e0b88a92..8b4fb1d6 100644
--- a/test/unit/thread_event.c
+++ b/test/unit/thread_event.c
@@ -8,12 +8,11 @@ TEST_BEGIN(test_next_event_fast) {
 	te_ctx_last_event_set(&ctx, 0);
 	te_ctx_current_bytes_set(&ctx, TE_NEXT_EVENT_FAST_MAX - 8U);
 	te_ctx_next_event_set(tsd, &ctx, TE_NEXT_EVENT_FAST_MAX);
-#define E(event, condition, is_alloc)					\
-	if (is_alloc && condition) {					\
-		event##_event_wait_set(tsd, TE_NEXT_EVENT_FAST_MAX);	\
+
+	uint64_t *waits = tsd_te_datap_get_unsafe(tsd)->alloc_wait;
+	for (size_t i = 0; i < te_alloc_count; i++) {
+		waits[i] = TE_NEXT_EVENT_FAST_MAX;
 	}
-	ITERATE_OVER_ALL_EVENTS
-#undef E
 
 	/* Test next_event_fast rolling back to 0. */
 	void *p = malloc(16U);

From 015b017973d47f3047f8f4d7349c937fefd30f99 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Fri, 28 Mar 2025 07:35:53 -0700
Subject: [PATCH 300/395] [thread_event] Add support for user events in thread
 events when stats are enabled

---
 include/jemalloc/internal/tcache_externs.h    |   8 -
 include/jemalloc/internal/thread_event.h      |   6 +-
 .../jemalloc/internal/thread_event_registry.h |  96 ++++++--
 include/jemalloc/internal/witness.h           |   1 +
 src/ctl.c                                     |  19 ++
 src/jemalloc.c                                |   1 +
 src/peak_event.c                              |   4 +-
 src/prof.c                                    |  10 +-
 src/prof_threshold.c                          |   6 +-
 src/stats.c                                   |   5 +-
 src/tcache.c                                  |   4 +-
 src/thread_event.c                            |  98 ++++++--
 src/thread_event_registry.c                   | 233 +++++++++++++++++-
 test/unit/mallctl.c                           |  40 ++-
 test/unit/thread_event.c                      |  20 ++
 15 files changed, 470 insertions(+), 81 deletions(-)

diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index 024314fe..76d601c3 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -82,14 +82,6 @@ void tcache_enabled_set(tsd_t *tsd, bool enabled);
 
 void tcache_assert_initialized(tcache_t *tcache);
 
-/* Only accessed by thread event. */
-uint64_t tcache_gc_new_event_wait(tsd_t *tsd);
-uint64_t tcache_gc_postponed_event_wait(tsd_t *tsd);
-void tcache_gc_event_handler(tsd_t *tsd, uint64_t elapsed);
-uint64_t tcache_gc_dalloc_new_event_wait(tsd_t *tsd);
-uint64_t tcache_gc_dalloc_postponed_event_wait(tsd_t *tsd);
-void tcache_gc_dalloc_event_handler(tsd_t *tsd, uint64_t elapsed);
-
 extern te_base_cb_t tcache_gc_te_handler;
 
 #endif /* JEMALLOC_INTERNAL_TCACHE_EXTERNS_H */
diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h
index e9631cbd..bf9ca3cc 100644
--- a/include/jemalloc/internal/thread_event.h
+++ b/include/jemalloc/internal/thread_event.h
@@ -48,10 +48,12 @@ void te_assert_invariants_debug(tsd_t *tsd);
 void te_event_trigger(tsd_t *tsd, te_ctx_t *ctx);
 void te_recompute_fast_threshold(tsd_t *tsd);
 void tsd_te_init(tsd_t *tsd);
+void te_adjust_thresholds_helper(tsd_t *tsd, te_ctx_t *ctx,
+    uint64_t wait);
 
 /* List of all thread event counters. */
-#define ITERATE_OVER_ALL_COUNTERS                                       \
-        C(thread_allocated)						\
+#define ITERATE_OVER_ALL_COUNTERS					\
+	C(thread_allocated)						\
 	C(thread_allocated_last_event)					\
 	C(prof_sample_last_event)					\
 	C(stats_interval_last_event)
diff --git a/include/jemalloc/internal/thread_event_registry.h b/include/jemalloc/internal/thread_event_registry.h
index aee7a4f2..1957e727 100644
--- a/include/jemalloc/internal/thread_event_registry.h
+++ b/include/jemalloc/internal/thread_event_registry.h
@@ -2,37 +2,41 @@
 #define JEMALLOC_INTERNAL_THREAD_EVENT_REGISTRY_H
 
 #include "jemalloc/internal/jemalloc_preamble.h"
-#include "jemalloc/internal/tsd.h"
+#include "jemalloc/internal/tsd_types.h"
+
+#define TE_MAX_USER_EVENTS 4
 
 /* "te" is short for "thread_event" */
 enum te_alloc_e {
 #ifdef JEMALLOC_PROF
-    te_alloc_prof_sample,
+	te_alloc_prof_sample,
 #endif
-    te_alloc_stats_interval,
+	te_alloc_stats_interval,
+	te_alloc_tcache_gc,
 #ifdef JEMALLOC_STATS
-    te_alloc_prof_threshold,
+	te_alloc_prof_threshold,
+	te_alloc_peak,
 #endif
-    te_alloc_tcache_gc,
-#ifdef JEMALLOC_STATS
-    te_alloc_peak,
-    te_alloc_last = te_alloc_peak,
-#else
-    te_alloc_last = te_alloc_tcache_gc,
-#endif
-    te_alloc_count = te_alloc_last + 1
+	te_alloc_user0,
+	te_alloc_user1,
+	te_alloc_user2,
+	te_alloc_user3,
+	te_alloc_last = te_alloc_user3,
+	te_alloc_count = te_alloc_last + 1
 };
 typedef enum te_alloc_e te_alloc_t;
 
 enum te_dalloc_e {
-    te_dalloc_tcache_gc,
+	te_dalloc_tcache_gc,
 #ifdef JEMALLOC_STATS
-    te_dalloc_peak,
-    te_dalloc_last = te_dalloc_peak,
-#else
-    te_dalloc_last = te_dalloc_tcache_gc,
+	te_dalloc_peak,
 #endif
-    te_dalloc_count = te_dalloc_last + 1
+	te_dalloc_user0,
+	te_dalloc_user1,
+	te_dalloc_user2,
+	te_dalloc_user3,
+	te_dalloc_last = te_dalloc_user3,
+	te_dalloc_count = te_dalloc_last + 1
 };
 typedef enum te_dalloc_e te_dalloc_t;
 
@@ -42,17 +46,63 @@ struct te_data_s {
 	uint64_t alloc_wait[te_alloc_count];
 	uint64_t dalloc_wait[te_dalloc_count];
 };
-#define TE_DATA_INITIALIZER { {0}, {0} }
+#define TE_DATA_INITIALIZER                                                    \
+	{                                                                      \
+		{0}, {                                                         \
+			0                                                      \
+		}                                                              \
+	}
+
+/*
+ * Check if user event is installed, installed and enabled, or not
+ * installed.
+ *
+ */
+enum te_enabled_e { te_enabled_not_installed, te_enabled_yes, te_enabled_no };
+typedef enum te_enabled_e te_enabled_t;
 
 typedef struct te_base_cb_s te_base_cb_t;
 struct te_base_cb_s {
-    bool (*enabled)(void);
-    uint64_t (*new_event_wait)(tsd_t *tsd);
-    uint64_t (*postponed_event_wait)(tsd_t *tsd);
-    void (*event_handler)(tsd_t *tsd);
+	te_enabled_t (*enabled)(void);
+	uint64_t (*new_event_wait)(tsd_t *tsd);
+	uint64_t (*postponed_event_wait)(tsd_t *tsd);
+	void (*event_handler)(tsd_t *tsd);
 };
 
 extern te_base_cb_t *te_alloc_handlers[te_alloc_count];
 extern te_base_cb_t *te_dalloc_handlers[te_dalloc_count];
 
+bool experimental_thread_events_boot(void);
+
+/*
+ *  User callback for thread events
+ *
+ *  is_alloc - true if event is allocation, false if event is free
+ *  tallocated  - number of bytes allocated on current thread so far
+ *  tdallocated - number of bytes allocated on current thread so far
+ */
+typedef void (*user_event_cb_t)(
+    bool is_alloc, uint64_t tallocated, uint64_t tdallocated);
+
+typedef struct user_hook_object_s user_hook_object_t;
+struct user_hook_object_s {
+	user_event_cb_t callback;
+	uint64_t interval;
+	bool is_alloc_only;
+};
+
+/*
+ * register user callback
+ *
+ * return zero if event was registered
+ *
+ * if interval is zero or callback is NULL, or
+ * no more slots are available event will not be registered
+ * and non-zero value will be returned
+ *
+ */
+int te_register_user_handler(tsdn_t *tsdn, user_hook_object_t *te_uobj);
+
+te_enabled_t te_user_event_enabled(size_t ue_idx, bool is_alloc);
+
 #endif /* JEMALLOC_INTERNAL_THREAD_EVENT_REGISTRY_H */
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index afee1246..acf7860d 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -78,6 +78,7 @@ enum witness_rank_e {
 	WITNESS_RANK_PROF_RECENT_ALLOC = WITNESS_RANK_LEAF,
 	WITNESS_RANK_PROF_STATS = WITNESS_RANK_LEAF,
 	WITNESS_RANK_PROF_THREAD_ACTIVE_INIT = WITNESS_RANK_LEAF,
+	WITNESS_RANK_THREAD_EVENTS_USER = WITNESS_RANK_LEAF,
 };
 typedef enum witness_rank_e witness_rank_t;
 
diff --git a/src/ctl.c b/src/ctl.c
index a30adc52..4f06363a 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -362,6 +362,7 @@ CTL_PROTO(experimental_hooks_prof_dump)
 CTL_PROTO(experimental_hooks_prof_sample)
 CTL_PROTO(experimental_hooks_prof_sample_free)
 CTL_PROTO(experimental_hooks_prof_threshold)
+CTL_PROTO(experimental_hooks_thread_event)
 CTL_PROTO(experimental_hooks_safety_check_abort)
 CTL_PROTO(experimental_thread_activity_callback)
 CTL_PROTO(experimental_utilization_query)
@@ -976,6 +977,7 @@ static const ctl_named_node_t experimental_hooks_node[] = {
 	{NAME("prof_sample_free"),	CTL(experimental_hooks_prof_sample_free)},
 	{NAME("prof_threshold"),	CTL(experimental_hooks_prof_threshold)},
 	{NAME("safety_check_abort"),	CTL(experimental_hooks_safety_check_abort)},
+	{NAME("thread_event"),	CTL(experimental_hooks_thread_event)},
 };
 
 static const ctl_named_node_t experimental_thread_node[] = {
@@ -3818,6 +3820,23 @@ label_return:
 	return ret;
 }
 
+static int
+experimental_hooks_thread_event_ctl(tsd_t *tsd, const size_t *mib,
+	size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+
+	if (newp == NULL) {
+		ret = EINVAL;
+		goto label_return;
+	}
+
+	user_hook_object_t t_new = {NULL, 0, false};
+	WRITE(t_new, user_hook_object_t);
+	ret = te_register_user_handler(tsd_tsdn(tsd), &t_new);
+
+label_return:
+	return ret;
+}
 
 /* For integration test purpose only.  No plan to move out of experimental. */
 static int
diff --git a/src/jemalloc.c b/src/jemalloc.c
index d958c8ca..a4509e68 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1965,6 +1965,7 @@ malloc_init_hard_a0_locked(void) {
 		return true;
 	}
 	hook_boot();
+	experimental_thread_events_boot();
 	/*
 	 * Create enough scaffolding to allow recursive allocation in
 	 * malloc_ncpus().
diff --git a/src/peak_event.c b/src/peak_event.c
index 430bfdea..e7f3ced6 100644
--- a/src/peak_event.c
+++ b/src/peak_event.c
@@ -58,9 +58,9 @@ peak_event_handler(tsd_t *tsd) {
 	peak_event_activity_callback(tsd);
 }
 
-static bool
+static te_enabled_t
 peak_event_enabled(void) {
-	return config_stats;
+	return config_stats ? te_enabled_yes : te_enabled_no;
 }
 
 /* Handles alloc and dalloc */
diff --git a/src/prof.c b/src/prof.c
index 94eddb6d..ec13afbd 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -306,11 +306,6 @@ prof_sample_event_handler(tsd_t *tsd) {
 	}
 }
 
-static bool
-prof_sample_enabled(void) {
-	return config_prof && opt_prof;
-}
-
 uint64_t
 tsd_prof_sample_event_wait_get(tsd_t *tsd) {
 #ifdef JEMALLOC_PROF
@@ -321,6 +316,11 @@ tsd_prof_sample_event_wait_get(tsd_t *tsd) {
 #endif
 }
 
+static te_enabled_t
+prof_sample_enabled(void) {
+	return config_prof && opt_prof ? te_enabled_yes : te_enabled_no;
+}
+
 te_base_cb_t prof_sample_te_handler = {
 	.enabled = &prof_sample_enabled,
 	.new_event_wait = &prof_sample_new_event_wait,
diff --git a/src/prof_threshold.c b/src/prof_threshold.c
index 516b0bf6..0b5cb53c 100644
--- a/src/prof_threshold.c
+++ b/src/prof_threshold.c
@@ -27,7 +27,7 @@ prof_threshold_hook_get(void) {
 }
 
 /* Invoke callback for threshold reached */
-static void
+static inline void
 prof_threshold_update(tsd_t *tsd) {
 	prof_threshold_hook_t prof_threshold_hook = prof_threshold_hook_get();
 	if (prof_threshold_hook == NULL) {
@@ -56,9 +56,9 @@ prof_threshold_event_handler(tsd_t *tsd) {
 	prof_threshold_update(tsd);
 }
 
-static bool
+static te_enabled_t
 prof_threshold_enabled(void) {
-	return config_stats;
+	return config_stats ? te_enabled_yes : te_enabled_no;
 }
 
 te_base_cb_t prof_threshold_te_handler = {
diff --git a/src/stats.c b/src/stats.c
index efc73223..b2a00319 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -2141,9 +2141,9 @@ stats_interval_event_handler(tsd_t *tsd) {
 	}
 }
 
-static bool
+static te_enabled_t
 stats_interval_enabled(void) {
-	return opt_stats_interval >= 0;
+	return opt_stats_interval >= 0 ? te_enabled_yes : te_enabled_no;
 }
 
 te_base_cb_t stats_interval_te_handler = {
@@ -2153,7 +2153,6 @@ te_base_cb_t stats_interval_te_handler = {
 	.event_handler = &stats_interval_event_handler,
 };
 
-
 bool
 stats_boot(void) {
 	uint64_t stats_interval;
diff --git a/src/tcache.c b/src/tcache.c
index 36af7d97..0154403d 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -1901,9 +1901,9 @@ void tcache_assert_initialized(tcache_t *tcache) {
 	assert(!cache_bin_still_zero_initialized(&tcache->bins[0]));
 }
 
-static bool
+static te_enabled_t
 tcache_gc_enabled(void) {
-    return (opt_tcache_gc_incr_bytes > 0);
+	return (opt_tcache_gc_incr_bytes > 0) ? te_enabled_yes : te_enabled_no;
 }
 
 /* Handles alloc and dalloc the same way */
diff --git a/src/thread_event.c b/src/thread_event.c
index 0b1adcc1..496c16be 100644
--- a/src/thread_event.c
+++ b/src/thread_event.c
@@ -10,13 +10,13 @@ te_ctx_has_active_events(te_ctx_t *ctx) {
 	assert(config_debug);
 	if (ctx->is_alloc) {
 		for (int i = 0; i < te_alloc_count; ++i) {
-			if (te_alloc_handlers[i]->enabled()) {
+			if (te_enabled_yes == te_alloc_handlers[i]->enabled()) {
 				return true;
 			}
 		}
 	} else {
 		for (int i = 0; i < te_dalloc_count; ++i) {
-			if (te_dalloc_handlers[i]->enabled()) {
+			if (te_enabled_yes == te_dalloc_handlers[i]->enabled()) {
 				return true;
 			}
 		}
@@ -26,14 +26,17 @@ te_ctx_has_active_events(te_ctx_t *ctx) {
 
 static uint64_t
 te_next_event_compute(tsd_t *tsd, bool is_alloc) {
-	te_base_cb_t **handlers = is_alloc ? te_alloc_handlers : te_dalloc_handlers;
-	uint64_t *waits = is_alloc ? tsd_te_datap_get_unsafe(tsd)->alloc_wait : tsd_te_datap_get_unsafe(tsd)->dalloc_wait;
+	te_base_cb_t **handlers = is_alloc ?
+	    te_alloc_handlers : te_dalloc_handlers;
+	uint64_t *waits = is_alloc ?
+	    tsd_te_datap_get_unsafe(tsd)->alloc_wait :
+	    tsd_te_datap_get_unsafe(tsd)->dalloc_wait;
 	int count = is_alloc ? te_alloc_count : te_dalloc_count;
-	
+
 	uint64_t wait = TE_MAX_START_WAIT;
 
 	for (int i = 0; i < count; i++) {
-		if (handlers[i]->enabled()) {
+		if (te_enabled_yes == handlers[i]->enabled()) {
 			uint64_t ev_wait = waits[i];
 			assert(ev_wait <= TE_MAX_START_WAIT);
 			if (ev_wait > 0U && ev_wait < wait) {
@@ -41,7 +44,6 @@ te_next_event_compute(tsd_t *tsd, bool is_alloc) {
 			}
 		}
 	}
-
 	return wait;
 }
 
@@ -64,6 +66,19 @@ te_assert_invariants_impl(tsd_t *tsd, te_ctx_t *ctx) {
 
 	/* The subtraction is intentionally susceptible to underflow. */
 	assert(current_bytes - last_event < interval);
+
+	/* This computation assumes that event did not become active in the
+	 * time since the last trigger. This works fine if waits for inactive
+	 * events are initialized with 0 as those are ignored
+	 * If we wanted to initialize user events to anything other than
+	 * zero, computation would take it into account and min_wait could
+	 * be smaller than interval (as it was not part of the calc setting
+	 * next_event).
+	 *
+	 * If we ever wanted to unregister the events assert would also
+	 * need to account for the possibility that next_event was set, by
+	 * event that is now gone
+	 */
 	uint64_t min_wait = te_next_event_compute(tsd, te_ctx_is_alloc(ctx));
 	/*
 	 * next_event should have been pushed up only except when no event is
@@ -161,8 +176,8 @@ te_recompute_fast_threshold(tsd_t *tsd) {
 	}
 }
 
-static void
-te_adjust_thresholds_helper(tsd_t *tsd, te_ctx_t *ctx,
+static inline void
+te_adjust_thresholds_impl(tsd_t *tsd, te_ctx_t *ctx,
     uint64_t wait) {
 	/*
 	 * The next threshold based on future events can only be adjusted after
@@ -175,14 +190,21 @@ te_adjust_thresholds_helper(tsd_t *tsd, te_ctx_t *ctx,
 	    TE_MAX_INTERVAL ? wait : TE_MAX_INTERVAL);
 	te_ctx_next_event_set(tsd, ctx, next_event);
 }
+void
+te_adjust_thresholds_helper(tsd_t *tsd, te_ctx_t *ctx,
+    uint64_t wait) {
+	te_adjust_thresholds_impl(tsd, ctx, wait);
+}
 
 static void
 te_init_waits(tsd_t *tsd, uint64_t *wait, bool is_alloc) {
 	te_base_cb_t **handlers = is_alloc ? te_alloc_handlers : te_dalloc_handlers;
-	uint64_t *waits = is_alloc ? tsd_te_datap_get_unsafe(tsd)->alloc_wait : tsd_te_datap_get_unsafe(tsd)->dalloc_wait;
+	uint64_t *waits = is_alloc ?
+	    tsd_te_datap_get_unsafe(tsd)->alloc_wait :
+	    tsd_te_datap_get_unsafe(tsd)->dalloc_wait;
 	int count = is_alloc ? te_alloc_count : te_dalloc_count;
 	for (int i = 0; i < count; i++) {
-		if (handlers[i]->enabled()) {
+		if (te_enabled_yes == handlers[i]->enabled()) {
 			uint64_t ev_wait = handlers[i]->new_event_wait(tsd);
 			assert(ev_wait > 0);
 			waits[i] = ev_wait;
@@ -229,7 +251,8 @@ te_update_alloc_events(tsd_t *tsd, te_base_cb_t **to_trigger,
 	size_t nto_trigger = 0;
 	uint64_t *waits = tsd_te_datap_get_unsafe(tsd)->alloc_wait;
 	if (opt_tcache_gc_incr_bytes > 0) {
-		assert(te_alloc_handlers[te_alloc_tcache_gc]->enabled());
+		assert(te_enabled_yes ==
+		       te_alloc_handlers[te_alloc_tcache_gc]->enabled());
 		if (te_update_wait(tsd, accumbytes, allow,
 				   &waits[te_alloc_tcache_gc], wait,
 				   te_alloc_handlers[te_alloc_tcache_gc],
@@ -240,7 +263,8 @@ te_update_alloc_events(tsd_t *tsd, te_base_cb_t **to_trigger,
 	}
 #ifdef JEMALLOC_PROF
         if (opt_prof) {
-		assert(te_alloc_handlers[te_alloc_prof_sample]->enabled());
+		assert(te_enabled_yes ==
+		       te_alloc_handlers[te_alloc_prof_sample]->enabled());
 		if(te_update_wait(tsd, accumbytes, allow,
 				  &waits[te_alloc_prof_sample], wait,
 				  te_alloc_handlers[te_alloc_prof_sample], 0)) {
@@ -255,27 +279,44 @@ te_update_alloc_events(tsd_t *tsd, te_base_cb_t **to_trigger,
 				   wait,
 				   te_alloc_handlers[te_alloc_stats_interval],
 				   stats_interval_accum_batch)) {
-			assert(te_alloc_handlers[te_alloc_stats_interval]->enabled());
+			assert(te_enabled_yes ==
+			       te_alloc_handlers[te_alloc_stats_interval]->enabled());
 			to_trigger[nto_trigger++] =
 			    te_alloc_handlers[te_alloc_stats_interval];
 		}
 	}
 
 #ifdef JEMALLOC_STATS
-	assert(te_alloc_handlers[te_alloc_peak]->enabled());
+	assert(te_enabled_yes == te_alloc_handlers[te_alloc_peak]->enabled());
  	if(te_update_wait(tsd, accumbytes, allow, &waits[te_alloc_peak], wait,
 			  te_alloc_handlers[te_alloc_peak], PEAK_EVENT_WAIT)) {
 		to_trigger[nto_trigger++] = te_alloc_handlers[te_alloc_peak];
  	}
 
-        assert(te_alloc_handlers[te_alloc_prof_threshold]->enabled());
+        assert(te_enabled_yes ==
+	       te_alloc_handlers[te_alloc_prof_threshold]->enabled());
         if(te_update_wait(tsd, accumbytes, allow,
 			  &waits[te_alloc_prof_threshold], wait,
 			  te_alloc_handlers[te_alloc_prof_threshold],
 			  1 << opt_experimental_lg_prof_threshold)) {
-		to_trigger[nto_trigger++] = te_alloc_handlers[te_alloc_prof_threshold];
+		to_trigger[nto_trigger++] =
+		    te_alloc_handlers[te_alloc_prof_threshold];
  	}
 #endif
+
+	for (te_alloc_t ue = te_alloc_user0; ue <= te_alloc_user3; ue++) {
+		te_enabled_t status =
+		    te_user_event_enabled(ue - te_alloc_user0, true);
+		if (status == te_enabled_not_installed) {
+			break;
+		} else if (status == te_enabled_yes) {
+			if (te_update_wait(tsd, accumbytes, allow, &waits[ue],
+					   wait, te_alloc_handlers[ue], 0)) {
+				to_trigger[nto_trigger++] =
+				    te_alloc_handlers[ue];
+			}
+		}
+	}
 	return nto_trigger;
 }
 
@@ -285,7 +326,8 @@ te_update_dalloc_events(tsd_t *tsd, te_base_cb_t **to_trigger, uint64_t accumbyt
 	size_t nto_trigger = 0;
 	uint64_t *waits = tsd_te_datap_get_unsafe(tsd)->dalloc_wait;
 	if (opt_tcache_gc_incr_bytes > 0) {
-		assert(te_dalloc_handlers[te_dalloc_tcache_gc]->enabled());
+		assert(te_enabled_yes ==
+		       te_dalloc_handlers[te_dalloc_tcache_gc]->enabled());
 		if (te_update_wait(tsd, accumbytes, allow,
 				   &waits[te_dalloc_tcache_gc], wait,
 				   te_dalloc_handlers[te_dalloc_tcache_gc],
@@ -295,12 +337,26 @@ te_update_dalloc_events(tsd_t *tsd, te_base_cb_t **to_trigger, uint64_t accumbyt
 		}
         }
 #ifdef JEMALLOC_STATS
-	assert(te_dalloc_handlers[te_dalloc_peak]->enabled());
+	assert(te_enabled_yes == te_dalloc_handlers[te_dalloc_peak]->enabled());
         if(te_update_wait(tsd, accumbytes, allow, &waits[te_dalloc_peak], wait,
-			  te_dalloc_handlers[te_dalloc_peak], PEAK_EVENT_WAIT)) {
+			  te_dalloc_handlers[te_dalloc_peak],
+			  PEAK_EVENT_WAIT)) {
 		to_trigger[nto_trigger++] = te_dalloc_handlers[te_dalloc_peak];
  	}
 #endif
+	for (te_dalloc_t ue = te_dalloc_user0; ue <= te_dalloc_user3; ue++) {
+		te_enabled_t status =
+		    te_user_event_enabled(ue - te_dalloc_user0, false);
+		if (status == te_enabled_not_installed) {
+			break;
+		} else if (status == te_enabled_yes) {
+			if (te_update_wait(tsd, accumbytes, allow, &waits[ue],
+					   wait, te_dalloc_handlers[ue], 0)) {
+				to_trigger[nto_trigger++] =
+				    te_dalloc_handlers[ue];
+			}
+		}
+	}
 	return nto_trigger;
 }
 
@@ -362,7 +418,7 @@ te_init(tsd_t *tsd, bool is_alloc) {
 	uint64_t wait = TE_MAX_START_WAIT;
 	te_init_waits(tsd, &wait, is_alloc);
 
-	te_adjust_thresholds_helper(tsd, &ctx, wait);
+	te_adjust_thresholds_impl(tsd, &ctx, wait);
 }
 
 void
diff --git a/src/thread_event_registry.c b/src/thread_event_registry.c
index 7543cfda..f5408178 100644
--- a/src/thread_event_registry.c
+++ b/src/thread_event_registry.c
@@ -3,30 +3,160 @@
 
 #include "jemalloc/internal/thread_event.h"
 #include "jemalloc/internal/thread_event_registry.h"
-#include "jemalloc/internal/thread_event_registry.h"
 #include "jemalloc/internal/tcache_externs.h"
 #include "jemalloc/internal/peak_event.h"
 #include "jemalloc/internal/prof_externs.h"
 #include "jemalloc/internal/prof_threshold.h"
 #include "jemalloc/internal/stats.h"
 
+static malloc_mutex_t uevents_mu;
 
-/* Table of all the thread events.
- *  Events share interface, but internally they will know thier
- *  data layout in tsd.
+bool
+experimental_thread_events_boot(void) {
+	return malloc_mutex_init(&uevents_mu, "thread_events",
+	    WITNESS_RANK_THREAD_EVENTS_USER, malloc_mutex_rank_exclusive);
+}
+
+#define TE_REGISTER_ERRCODE_FULL_SLOTS -1
+#define TE_REGISTER_ERRCODE_ALREADY_REGISTERED -2
+
+static user_hook_object_t uevents_storage[TE_MAX_USER_EVENTS] = {
+    {NULL, 0, false},
+};
+
+static atomic_p_t uevent_obj_p[TE_MAX_USER_EVENTS] = {
+    NULL,
+};
+
+static inline bool
+user_object_eq(user_hook_object_t *lhs, user_hook_object_t *rhs) {
+	assert(lhs != NULL && rhs != NULL);
+
+	return lhs->callback == rhs->callback && lhs->interval == rhs->interval
+	    && lhs->is_alloc_only == rhs->is_alloc_only;
+}
+
+/*
+ * Return slot number that event is registered at on success
+ *     it will be [0, TE_MAX_USER_EVENTS)
+ * Return negative value on some error
  */
+static inline int
+te_register_user_handler_locked(user_hook_object_t *new_obj) {
+	/* Attempt to find the free slot in global register */
+	for (int i = 0; i < TE_MAX_USER_EVENTS; ++i) {
+		user_hook_object_t *p = (user_hook_object_t *)atomic_load_p(
+		    &uevent_obj_p[i], ATOMIC_ACQUIRE);
+
+		if (p && user_object_eq(p, new_obj)) {
+			/* Same callback and interval are registered - no error. */
+			return TE_REGISTER_ERRCODE_ALREADY_REGISTERED;
+		} else if (p == NULL) {
+			/* Empty slot */
+			uevents_storage[i] = *new_obj;
+			atomic_fence(ATOMIC_SEQ_CST);
+			atomic_store_p(&uevent_obj_p[i], &uevents_storage[i],
+			    ATOMIC_RELEASE);
+			return i;
+		}
+	}
+
+	return TE_REGISTER_ERRCODE_FULL_SLOTS;
+}
+
+static inline user_hook_object_t *
+uobj_get(size_t cb_idx) {
+	assert(cb_idx < TE_MAX_USER_EVENTS);
+	return (user_hook_object_t *)atomic_load_p(
+	    &uevent_obj_p[cb_idx], ATOMIC_ACQUIRE);
+}
+
+te_enabled_t
+te_user_event_enabled(size_t ue_idx, bool is_alloc) {
+	assert(ue_idx < TE_MAX_USER_EVENTS);
+	user_hook_object_t *obj = uobj_get(ue_idx);
+	if (!obj) {
+		return te_enabled_not_installed;
+	}
+	if (is_alloc || !obj->is_alloc_only) {
+		return te_enabled_yes;
+	}
+	return te_enabled_no;
+}
+
+static inline uint64_t
+new_event_wait(size_t cb_idx) {
+	user_hook_object_t *obj = uobj_get(cb_idx);
+	/* Enabled should have guarded it */
+	assert(obj);
+	return obj->interval;
+}
+
+static uint64_t
+postponed_event_wait(tsd_t *tsd) {
+	return TE_MIN_START_WAIT;
+}
+
+static inline void
+handler_wrapper(tsd_t *tsd, bool is_alloc, size_t cb_idx) {
+	user_hook_object_t *obj = uobj_get(cb_idx);
+	/* Enabled should have guarded it */
+	assert(obj);
+	uint64_t alloc = tsd_thread_allocated_get(tsd);
+	uint64_t dalloc = tsd_thread_deallocated_get(tsd);
+
+	pre_reentrancy(tsd, NULL);
+	obj->callback(is_alloc, alloc, dalloc);
+	post_reentrancy(tsd);
+}
+
+#define TE_USER_HANDLER_BINDING_IDX(i)                                         \
+	static te_enabled_t te_user_alloc_enabled##i(void) {                   \
+		return te_user_event_enabled(i, true);                         \
+	}                                                                      \
+	static te_enabled_t te_user_dalloc_enabled##i(void) {                  \
+		return te_user_event_enabled(i, false);                        \
+	}                                                                      \
+	static uint64_t te_user_new_event_wait_##i(tsd_t *tsd) {               \
+		return new_event_wait(i);                                      \
+	}                                                                      \
+	static void te_user_alloc_handler_call##i(tsd_t *tsd) {                \
+		handler_wrapper(tsd, true, i);                                 \
+	}                                                                      \
+	static void te_user_dalloc_handler_call##i(tsd_t *tsd) {               \
+		handler_wrapper(tsd, false, i);                                \
+	}                                                                      \
+	static te_base_cb_t user_alloc_handler##i = {                          \
+	    .enabled = &te_user_alloc_enabled##i,                              \
+	    .new_event_wait = &te_user_new_event_wait_##i,                     \
+	    .postponed_event_wait = &postponed_event_wait,                     \
+	    .event_handler = &te_user_alloc_handler_call##i};                  \
+	static te_base_cb_t user_dalloc_handler##i = {                         \
+	    .enabled = &te_user_dalloc_enabled##i,                             \
+	    .new_event_wait = &te_user_new_event_wait_##i,                     \
+	    .postponed_event_wait = &postponed_event_wait,                     \
+	    .event_handler = &te_user_dalloc_handler_call##i}
+
+TE_USER_HANDLER_BINDING_IDX(0);
+TE_USER_HANDLER_BINDING_IDX(1);
+TE_USER_HANDLER_BINDING_IDX(2);
+TE_USER_HANDLER_BINDING_IDX(3);
+
+/* Table of all the thread events. */
 te_base_cb_t *te_alloc_handlers[te_alloc_count] = {
 #ifdef JEMALLOC_PROF
-    &prof_sample_te_handler,
+	&prof_sample_te_handler,
 #endif
-    &stats_interval_te_handler,
+	&stats_interval_te_handler,
+	&tcache_gc_te_handler,
 #ifdef JEMALLOC_STATS
-    &prof_threshold_te_handler,
-#endif
-    &tcache_gc_te_handler,
-#ifdef JEMALLOC_STATS
-    &peak_te_handler,
+	&prof_threshold_te_handler,
+	&peak_te_handler,
 #endif
+	&user_alloc_handler0,
+	&user_alloc_handler1,
+	&user_alloc_handler2,
+	&user_alloc_handler3
 };
 
 te_base_cb_t *te_dalloc_handlers[te_dalloc_count] = {
@@ -34,4 +164,85 @@ te_base_cb_t *te_dalloc_handlers[te_dalloc_count] = {
 #ifdef JEMALLOC_STATS
 	&peak_te_handler,
 #endif
+	&user_dalloc_handler0,
+	&user_dalloc_handler1,
+	&user_dalloc_handler2,
+	&user_dalloc_handler3
 };
+
+static inline bool
+te_update_tsd(tsd_t *tsd, uint64_t new_wait, size_t ue_idx, bool is_alloc) {
+	bool needs_recompute = false;
+	te_ctx_t ctx;
+	uint64_t next, current, cur_wait;
+
+	if (is_alloc) {
+		tsd_te_datap_get_unsafe(tsd)
+		    ->alloc_wait[te_alloc_user0 + ue_idx] = new_wait;
+	} else {
+		tsd_te_datap_get_unsafe(tsd)
+		    ->dalloc_wait[te_dalloc_user0 + ue_idx] = new_wait;
+	}
+	te_ctx_get(tsd, &ctx, is_alloc);
+
+	next = te_ctx_next_event_get(&ctx);
+	current = te_ctx_current_bytes_get(&ctx);
+	cur_wait = next - current;
+
+	if (new_wait < cur_wait) {
+		/*
+		 * Set last event to current (same as when te inits).  This
+		 * will make sure that all the invariants are correct, before
+		 * we adjust next_event and next_event fast.
+		 */
+		te_ctx_last_event_set(&ctx, te_ctx_current_bytes_get(&ctx));
+		te_adjust_thresholds_helper(tsd, &ctx, new_wait);
+		needs_recompute = true;
+	}
+	return needs_recompute;
+}
+
+static inline void
+te_recalculate_current_thread_data(tsdn_t *tsdn, int ue_idx, bool alloc_only) {
+	bool recompute = false;
+	/* we do not need lock to recalculate the events on the current thread */
+	assert(ue_idx < TE_MAX_USER_EVENTS);
+	tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
+	if (tsd) {
+		uint64_t new_wait = new_event_wait(ue_idx);
+		recompute = te_update_tsd(tsd, new_wait, ue_idx, true);
+		if (!alloc_only) {
+			recompute = te_update_tsd(tsd, new_wait, ue_idx, false)
+			    || recompute;
+		}
+
+		if (recompute) {
+			te_recompute_fast_threshold(tsd);
+		}
+	}
+}
+
+int
+te_register_user_handler(tsdn_t *tsdn, user_hook_object_t *te_uobj) {
+	int ret;
+	int reg_retcode;
+	if (!te_uobj || !te_uobj->callback || te_uobj->interval == 0) {
+		return EINVAL;
+	}
+
+	malloc_mutex_lock(tsdn, &uevents_mu);
+	reg_retcode = te_register_user_handler_locked(te_uobj);
+	malloc_mutex_unlock(tsdn, &uevents_mu);
+
+	if (reg_retcode >= 0) {
+		te_recalculate_current_thread_data(
+		    tsdn, reg_retcode, te_uobj->is_alloc_only);
+		ret = 0;
+	} else if (reg_retcode == TE_REGISTER_ERRCODE_ALREADY_REGISTERED) {
+		ret = 0;
+	} else {
+		ret = EINVAL;
+	}
+
+	return ret;
+}
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 68c3a705..838a4445 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -1347,6 +1347,43 @@ TEST_BEGIN(test_thread_activity_callback) {
 }
 TEST_END
 
+
+
+static unsigned nuser_thread_event_cb_calls;
+static void
+user_thread_event_cb(bool is_alloc, uint64_t tallocated, uint64_t tdallocated) {
+	(void)tdallocated;
+	(void)tallocated;
+	++nuser_thread_event_cb_calls;
+}
+static user_hook_object_t user_te_obj = {
+	.callback = user_thread_event_cb,
+	.interval = 100,
+	.is_alloc_only = false,
+};
+
+TEST_BEGIN(test_thread_event_hook) {
+	const size_t big_size = 10 * 1024 * 1024;
+	void *ptr;
+	int err;
+
+	unsigned current_calls = nuser_thread_event_cb_calls;
+	err = mallctl("experimental.hooks.thread_event", NULL, 0,
+	    &user_te_obj, sizeof(user_te_obj));
+	assert_d_eq(0, err, "");
+
+	err = mallctl("experimental.hooks.thread_event", NULL, 0,
+	    &user_te_obj, sizeof(user_te_obj));
+	assert_d_eq(0, err, "Not an error to provide object with same interval and cb");
+
+
+	ptr = mallocx(big_size, 0);
+	free(ptr);
+	expect_u64_lt(current_calls, nuser_thread_event_cb_calls, "");
+}
+TEST_END
+
+
 int
 main(void) {
 	return test(
@@ -1387,5 +1424,6 @@ main(void) {
 	    test_hooks_exhaustion,
 	    test_thread_idle,
 	    test_thread_peak,
-	    test_thread_activity_callback);
+	    test_thread_activity_callback,
+	    test_thread_event_hook);
 }
diff --git a/test/unit/thread_event.c b/test/unit/thread_event.c
index 8b4fb1d6..66d61cd2 100644
--- a/test/unit/thread_event.c
+++ b/test/unit/thread_event.c
@@ -1,5 +1,18 @@
 #include "test/jemalloc_test.h"
 
+static uint32_t nuser_hook_calls;
+static bool is_registered = false;
+static void
+test_cb(bool is_alloc, uint64_t tallocated, uint64_t tdallocated) {
+	++nuser_hook_calls;
+}
+
+static user_hook_object_t tobj = {
+	.callback = &test_cb,
+	.interval = 10,
+	.is_alloc_only = false
+};
+
 TEST_BEGIN(test_next_event_fast) {
 	tsd_t *tsd = tsd_fetch();
 	te_ctx_t ctx;
@@ -9,6 +22,12 @@ TEST_BEGIN(test_next_event_fast) {
 	te_ctx_current_bytes_set(&ctx, TE_NEXT_EVENT_FAST_MAX - 8U);
 	te_ctx_next_event_set(tsd, &ctx, TE_NEXT_EVENT_FAST_MAX);
 
+	if (!is_registered) {
+		is_registered = 0 == te_register_user_handler(tsd_tsdn(tsd), &tobj);
+	}
+	assert_true(is_registered || !config_stats, "Register user handler");
+	nuser_hook_calls = 0;
+
 	uint64_t *waits = tsd_te_datap_get_unsafe(tsd)->alloc_wait;
 	for (size_t i = 0; i < te_alloc_count; i++) {
 		waits[i] = TE_NEXT_EVENT_FAST_MAX;
@@ -16,6 +35,7 @@ TEST_BEGIN(test_next_event_fast) {
 
 	/* Test next_event_fast rolling back to 0. */
 	void *p = malloc(16U);
+	assert_true(nuser_hook_calls == 1 || !config_stats, "Expected alloc call");
 	assert_ptr_not_null(p, "malloc() failed");
 	free(p);
 

From c5547f9e64da41ccefa43d349b6bb79d09d5d63b Mon Sep 17 00:00:00 2001
From: "dzhao.ampere" <di.zhao@amperecomputing.com>
Date: Wed, 11 Jun 2025 15:15:25 +0800
Subject: [PATCH 301/395] test/unit/psset.c: fix SIGSEGV when PAGESIZE is large

When hugepage is enabled and PAGESIZE is large, the test could
ask for a stack size larger than user limit. Allocating the
memory instead can avoid the failure.

Closes: #2408
---
 test/unit/psset.c | 45 ++++++++++++++++++++++++++++++++++-----------
 1 file changed, 34 insertions(+), 11 deletions(-)

diff --git a/test/unit/psset.c b/test/unit/psset.c
index b15d9af3..c834e531 100644
--- a/test/unit/psset.c
+++ b/test/unit/psset.c
@@ -146,7 +146,7 @@ TEST_BEGIN(test_fill) {
 	hpdata_t pageslab;
 	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
 
-	edata_t alloc[HUGEPAGE_PAGES];
+	edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
 
 	psset_t psset;
 	psset_init(&psset);
@@ -169,6 +169,8 @@ TEST_BEGIN(test_fill) {
 	edata_init_test(&extra_alloc);
 	err = test_psset_alloc_reuse(&psset, &extra_alloc, PAGE);
 	expect_true(err, "Alloc succeeded even though psset should be empty");
+
+	free(alloc);
 }
 TEST_END
 
@@ -180,7 +182,7 @@ TEST_BEGIN(test_reuse) {
 	hpdata_t pageslab;
 	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
 
-	edata_t alloc[HUGEPAGE_PAGES];
+	edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
 
 	psset_t psset;
 	psset_init(&psset);
@@ -262,6 +264,8 @@ TEST_BEGIN(test_reuse) {
 	err = test_psset_alloc_reuse(&psset, &alloc[index_of_4], 4 * PAGE);
 	expect_false(err, "Should have been able to find alloc.");
 	edata_expect(&alloc[index_of_4], index_of_4, 4);
+
+	free(alloc);
 }
 TEST_END
 
@@ -273,7 +277,7 @@ TEST_BEGIN(test_evict) {
 	hpdata_t pageslab;
 	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
 
-	edata_t alloc[HUGEPAGE_PAGES];
+	edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
 
 	psset_t psset;
 	psset_init(&psset);
@@ -297,6 +301,8 @@ TEST_BEGIN(test_evict) {
 
 	err = test_psset_alloc_reuse(&psset, &alloc[0], PAGE);
 	expect_true(err, "psset should be empty.");
+
+	free(alloc);
 }
 TEST_END
 
@@ -311,7 +317,9 @@ TEST_BEGIN(test_multi_pageslab) {
 	    (void *)((uintptr_t)PAGESLAB_ADDR + HUGEPAGE),
 	    PAGESLAB_AGE + 1);
 
-	edata_t alloc[2][HUGEPAGE_PAGES];
+	edata_t* alloc[2];
+	alloc[0] = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
+	alloc[1] = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
 
 	psset_t psset;
 	psset_init(&psset);
@@ -361,6 +369,9 @@ TEST_BEGIN(test_multi_pageslab) {
 	 */
 	err = test_psset_alloc_reuse(&psset, &alloc[1][0], 2 * PAGE);
 	expect_false(err, "Allocation should have succeeded");
+
+	free(alloc[0]);
+	free(alloc[1]);
 }
 TEST_END
 
@@ -368,7 +379,7 @@ TEST_BEGIN(test_stats_merged) {
 	hpdata_t pageslab;
 	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
 
-	edata_t alloc[HUGEPAGE_PAGES];
+	edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
 
 	psset_t psset;
 	psset_init(&psset);
@@ -422,6 +433,8 @@ TEST_BEGIN(test_stats_merged) {
 	expect_zu_eq(1, psset.stats.merged.npageslabs, "");
 	expect_zu_eq(1, psset.stats.merged.nactive, "");
 	expect_zu_eq(0, psset.stats.merged.ndirty, "");
+
+	free(alloc);
 }
 TEST_END
 
@@ -432,7 +445,7 @@ TEST_BEGIN(test_stats_huge) {
 	hpdata_t pageslab;
 	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
 
-	edata_t alloc[HUGEPAGE_PAGES];
+	edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
 
 	psset_t psset;
 	psset_init(&psset);
@@ -505,6 +518,8 @@ TEST_BEGIN(test_stats_huge) {
 		expect_zu_eq(0, psset.stats.slabs[huge].nactive, "");
 		expect_zu_eq(0, psset.stats.slabs[huge].ndirty, "");
 	}
+
+	free(alloc);
 }
 TEST_END
 
@@ -557,7 +572,7 @@ TEST_BEGIN(test_stats_fullness) {
 	hpdata_t pageslab;
 	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
 
-	edata_t alloc[HUGEPAGE_PAGES];
+	edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
 
 	psset_t psset;
 	psset_init(&psset);
@@ -587,6 +602,8 @@ TEST_BEGIN(test_stats_fullness) {
 	stats_expect(&psset, 0);
 	psset_update_end(&psset, &pageslab);
 	stats_expect(&psset, 1);
+
+	free(alloc);
 }
 TEST_END
 
@@ -648,8 +665,8 @@ init_test_pageslabs(psset_t *psset, hpdata_t *pageslab,
 TEST_BEGIN(test_oldest_fit) {
 	test_skip_if(hpa_hugepage_size_exceeds_limit());
 	bool err;
-	edata_t alloc[HUGEPAGE_PAGES];
-	edata_t worse_alloc[HUGEPAGE_PAGES];
+	edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
+	edata_t *worse_alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
 
 	hpdata_t pageslab;
 	hpdata_t worse_pageslab;
@@ -666,6 +683,9 @@ TEST_BEGIN(test_oldest_fit) {
 	expect_false(err, "Nonempty psset failed page allocation");
 	expect_ptr_eq(&pageslab, edata_ps_get(&test_edata),
 	    "Allocated from the wrong pageslab");
+
+	free(alloc);
+	free(worse_alloc);
 }
 TEST_END
 
@@ -673,8 +693,8 @@ TEST_BEGIN(test_insert_remove) {
 	test_skip_if(hpa_hugepage_size_exceeds_limit());
 	bool err;
 	hpdata_t *ps;
-	edata_t alloc[HUGEPAGE_PAGES];
-	edata_t worse_alloc[HUGEPAGE_PAGES];
+	edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
+	edata_t *worse_alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
 
 	hpdata_t pageslab;
 	hpdata_t worse_pageslab;
@@ -713,6 +733,9 @@ TEST_BEGIN(test_insert_remove) {
 	psset_update_begin(&psset, &worse_pageslab);
 	err = test_psset_alloc_reuse(&psset, &alloc[HUGEPAGE_PAGES - 1], PAGE);
 	expect_true(err, "psset should be empty, but an alloc succeeded");
+
+	free(alloc);
+	free(worse_alloc);
 }
 TEST_END
 

From 95fc091b0f4f8d4e7a2209baf2e8411a21b234a4 Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@meta.com>
Date: Fri, 13 Jun 2025 14:15:50 -0700
Subject: [PATCH 302/395] Update appveyor settings.

---
 .appveyor.yml | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/.appveyor.yml b/.appveyor.yml
index dedc7867..c74e89db 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -5,36 +5,42 @@ environment:
   - MSYSTEM: MINGW64
     CPU: x86_64
     MSVC: amd64
-    CONFIG_FLAGS: --enable-debug --enable-limit-usize-gap
+    CONFIG_FLAGS: --enable-debug
   - MSYSTEM: MINGW64
     CPU: x86_64
-    CONFIG_FLAGS: --enable-debug --enable-limit-usize-gap
+    CONFIG_FLAGS: --enable-debug
+    EXTRA_CFLAGS: "-fcommon"
   - MSYSTEM: MINGW32
     CPU: i686
     MSVC: x86
-    CONFIG_FLAGS: --enable-debug --enable-limit-usize-gap
+    CONFIG_FLAGS: --enable-debug
   - MSYSTEM: MINGW32
     CPU: i686
-    CONFIG_FLAGS: --enable-debug --enable-limit-usize-gap
+    CONFIG_FLAGS: --enable-debug
+    EXTRA_CFLAGS: "-fcommon"
   - MSYSTEM: MINGW64
     CPU: x86_64
     MSVC: amd64
-    CONFIG_FLAGS: --enable-limit-usize-gap
+    CONFIG_FLAGS:
   - MSYSTEM: MINGW64
     CPU: x86_64
-    CONFIG_FLAGS: --enable-limit-usize-gap
+    CONFIG_FLAGS:
+    EXTRA_CFLAGS: "-fcommon"
   - MSYSTEM: MINGW32
     CPU: i686
     MSVC: x86
-    CONFIG_FLAGS: --enable-limit-usize-gap
+    CONFIG_FLAGS:
   - MSYSTEM: MINGW32
     CPU: i686
-    CONFIG_FLAGS: --enable-limit-usize-gap
+    CONFIG_FLAGS:
+    EXTRA_CFLAGS: "-fcommon"
 
 install:
   - set PATH=c:\msys64\%MSYSTEM%\bin;c:\msys64\usr\bin;%PATH%
   - if defined MSVC call "c:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" %MSVC%
   - if defined MSVC pacman --noconfirm -Rsc mingw-w64-%CPU%-gcc gcc
+  - pacman --noconfirm -Syuu
+  - pacman --noconfirm -S autoconf
 
 build_script:
   - bash -c "autoconf"

From e350c715719efc7c13195c842e44c1f5ac8e28a7 Mon Sep 17 00:00:00 2001
From: Guangli Dai <gdai@uh.edu>
Date: Fri, 13 Jun 2025 15:46:51 -0700
Subject: [PATCH 303/395] Remove --enable-limit-usize-gap for cirrus CI since
 the config-time option is removed.

---
 .cirrus.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.cirrus.yml b/.cirrus.yml
index 8051272c..585aa42f 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -40,7 +40,7 @@ task:
     # We don't perfectly track freebsd stdlib.h definitions.  This is fine when
     # we count as a system header, but breaks otherwise, like during these
     # tests.
-    - ./configure --with-jemalloc-prefix=ci_ --enable-limit-usize-gap ${DEBUG_CONFIG} ${PROF_CONFIG} ${UNCOMMON_CONFIG}
+    - ./configure --with-jemalloc-prefix=ci_ ${DEBUG_CONFIG} ${PROF_CONFIG} ${UNCOMMON_CONFIG}
     - export JFLAG=`sysctl -n kern.smp.cpus`
     - gmake -j${JFLAG}
     - gmake -j${JFLAG} tests

From a952a3b8b08a63609172c8c84cf6eb09de9fc7be Mon Sep 17 00:00:00 2001
From: Shirui Cheng <sherrycheng@meta.com>
Date: Wed, 4 Jun 2025 13:28:37 -0700
Subject: [PATCH 304/395] Update the default value for
 opt_experimental_tcache_gc and opt_calloc_madvise_threshold

---
 include/jemalloc/internal/jemalloc_internal_types.h | 2 ++
 src/jemalloc.c                                      | 5 +++--
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_types.h b/include/jemalloc/internal/jemalloc_internal_types.h
index 6a81f3cd..cddbfb65 100644
--- a/include/jemalloc/internal/jemalloc_internal_types.h
+++ b/include/jemalloc/internal/jemalloc_internal_types.h
@@ -145,4 +145,6 @@ typedef enum malloc_init_e malloc_init_t;
 	assert(sizeof(type) * (count) <= VARIABLE_ARRAY_SIZE_MAX);	\
 	VARIABLE_ARRAY_UNSAFE(type, name, count)
 
+#define CALLOC_MADVISE_THRESHOLD_DEFAULT (((size_t)1) << 23) /* 8 MB */
+
 #endif /* JEMALLOC_INTERNAL_TYPES_H */
diff --git a/src/jemalloc.c b/src/jemalloc.c
index a4509e68..c6621a79 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -158,7 +158,7 @@ void (*JET_MUTABLE invalid_conf_abort)(void) = &abort;
 bool	opt_utrace = false;
 bool	opt_xmalloc = false;
 bool	opt_experimental_infallible_new = false;
-bool	opt_experimental_tcache_gc = false;
+bool	opt_experimental_tcache_gc = true;
 bool	opt_zero = false;
 unsigned	opt_narenas = 0;
 static fxp_t		opt_narenas_ratio = FXP_INIT_INT(4);
@@ -168,7 +168,8 @@ unsigned	ncpus;
 unsigned opt_debug_double_free_max_scan =
     SAFETY_CHECK_DOUBLE_FREE_MAX_SCAN_DEFAULT;
 
-size_t opt_calloc_madvise_threshold = 0;
+size_t opt_calloc_madvise_threshold =
+    CALLOC_MADVISE_THRESHOLD_DEFAULT;
 
 /* Protects arenas initialization. */
 static malloc_mutex_t arenas_lock;

From 6200e8987feb5eae198b95b14cd89d09695f7b3c Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@meta.com>
Date: Fri, 13 Jun 2025 12:31:12 -0700
Subject: [PATCH 305/395] Reformat the codebase with the clang-format 18.

---
 include/jemalloc/internal/activity_callback.h |    9 +-
 include/jemalloc/internal/arena_externs.h     |  136 +-
 include/jemalloc/internal/arena_inlines_b.h   |  252 +-
 include/jemalloc/internal/arena_stats.h       |   56 +-
 include/jemalloc/internal/arena_structs.h     |   35 +-
 include/jemalloc/internal/arena_types.h       |   26 +-
 include/jemalloc/internal/assert.h            |   74 +-
 include/jemalloc/internal/atomic.h            |   53 +-
 include/jemalloc/internal/atomic_c11.h        |   54 +-
 include/jemalloc/internal/atomic_gcc_atomic.h |  165 +-
 include/jemalloc/internal/atomic_gcc_sync.h   |   92 +-
 include/jemalloc/internal/atomic_msvc.h       |  151 +-
 .../internal/background_thread_externs.h      |   22 +-
 .../internal/background_thread_inlines.h      |    8 +-
 .../internal/background_thread_structs.h      |   26 +-
 include/jemalloc/internal/base.h              |   44 +-
 include/jemalloc/internal/batcher.h           |   14 +-
 include/jemalloc/internal/bin.h               |   24 +-
 include/jemalloc/internal/bin_info.h          |   12 +-
 include/jemalloc/internal/bin_stats.h         |   30 +-
 include/jemalloc/internal/bin_types.h         |    5 +-
 include/jemalloc/internal/bit_util.h          |   52 +-
 include/jemalloc/internal/bitmap.h            |  217 +-
 include/jemalloc/internal/buf_writer.h        |   24 +-
 include/jemalloc/internal/cache_bin.h         |  117 +-
 include/jemalloc/internal/ckh.h               |   10 +-
 include/jemalloc/internal/counter.h           |    2 +-
 include/jemalloc/internal/ctl.h               |  135 +-
 include/jemalloc/internal/decay.h             |   14 +-
 include/jemalloc/internal/ecache.h            |   16 +-
 include/jemalloc/internal/edata.h             |  281 +-
 include/jemalloc/internal/edata_cache.h       |   16 +-
 include/jemalloc/internal/ehooks.h            |   39 +-
 include/jemalloc/internal/emap.h              |   99 +-
 include/jemalloc/internal/emitter.h           |  106 +-
 include/jemalloc/internal/exp_grow.h          |    4 +-
 include/jemalloc/internal/extent.h            |   61 +-
 include/jemalloc/internal/extent_dss.h        |   18 +-
 include/jemalloc/internal/extent_mmap.h       |    4 +-
 include/jemalloc/internal/fb.h                |   29 +-
 include/jemalloc/internal/fxp.h               |    2 +-
 include/jemalloc/internal/hash.h              |  316 +-
 include/jemalloc/internal/hook.h              |   20 +-
 include/jemalloc/internal/hpa.h               |   15 +-
 include/jemalloc/internal/hpa_hooks.h         |    2 +-
 include/jemalloc/internal/hpa_utils.h         |   77 +-
 include/jemalloc/internal/hpdata.h            |   21 +-
 include/jemalloc/internal/inspect.h           |   10 +-
 .../internal/jemalloc_internal_decls.h        |  125 +-
 .../internal/jemalloc_internal_externs.h      |   74 +-
 .../internal/jemalloc_internal_inlines_a.h    |    8 +-
 .../internal/jemalloc_internal_inlines_b.h    |   23 +-
 .../internal/jemalloc_internal_inlines_c.h    |  357 +--
 .../internal/jemalloc_internal_macros.h       |  171 +-
 .../internal/jemalloc_internal_overrides.h    |    9 +-
 .../internal/jemalloc_internal_types.h        |  128 +-
 include/jemalloc/internal/large_externs.h     |   18 +-
 include/jemalloc/internal/lockedint.h         |   63 +-
 include/jemalloc/internal/log.h               |   66 +-
 include/jemalloc/internal/malloc_io.h         |   98 +-
 include/jemalloc/internal/mutex.h             |  188 +-
 include/jemalloc/internal/mutex_prof.h        |  112 +-
 include/jemalloc/internal/nstime.h            |   39 +-
 include/jemalloc/internal/pa.h                |   28 +-
 include/jemalloc/internal/pac.h               |   20 +-
 include/jemalloc/internal/pages.h             |   70 +-
 include/jemalloc/internal/pai.h               |   13 +-
 include/jemalloc/internal/peak.h              |    3 +-
 include/jemalloc/internal/peak_event.h        |    2 +-
 include/jemalloc/internal/ph.h                |  214 +-
 include/jemalloc/internal/prng.h              |   10 +-
 include/jemalloc/internal/prof_data.h         |   16 +-
 include/jemalloc/internal/prof_externs.h      |   95 +-
 include/jemalloc/internal/prof_hook.h         |    6 +-
 include/jemalloc/internal/prof_inlines.h      |   19 +-
 include/jemalloc/internal/prof_log.h          |    6 +-
 include/jemalloc/internal/prof_structs.h      |  106 +-
 include/jemalloc/internal/prof_sys.h          |   14 +-
 include/jemalloc/internal/prof_types.h        |   48 +-
 include/jemalloc/internal/psset.h             |    2 +-
 include/jemalloc/internal/ql.h                |  171 +-
 include/jemalloc/internal/qr.h                |   66 +-
 include/jemalloc/internal/quantum.h           |  148 +-
 include/jemalloc/internal/rb.h                |    2 +-
 include/jemalloc/internal/rtree.h             |  263 +-
 include/jemalloc/internal/rtree_tsd.h         |   19 +-
 include/jemalloc/internal/safety_check.h      |   27 +-
 include/jemalloc/internal/san.h               |   48 +-
 include/jemalloc/internal/san_bump.h          |    9 +-
 include/jemalloc/internal/sc.h                |   54 +-
 include/jemalloc/internal/sec.h               |   14 +-
 include/jemalloc/internal/sec_opts.h          |   22 +-
 include/jemalloc/internal/smoothstep.h        |  410 +--
 include/jemalloc/internal/spin.h              |    9 +-
 include/jemalloc/internal/stats.h             |   30 +-
 include/jemalloc/internal/sz.h                |   72 +-
 include/jemalloc/internal/tcache_externs.h    |   52 +-
 include/jemalloc/internal/tcache_inlines.h    |   56 +-
 include/jemalloc/internal/tcache_structs.h    |   30 +-
 include/jemalloc/internal/tcache_types.h      |   20 +-
 include/jemalloc/internal/test_hooks.h        |   24 +-
 include/jemalloc/internal/thread_event.h      |   31 +-
 .../jemalloc/internal/thread_event_registry.h |    4 +-
 include/jemalloc/internal/ticker.h            |   20 +-
 include/jemalloc/internal/tsd.h               |   66 +-
 include/jemalloc/internal/tsd_generic.h       |   47 +-
 include/jemalloc/internal/tsd_internals.h     |  201 +-
 .../internal/tsd_malloc_thread_cleanup.h      |    2 +-
 include/jemalloc/internal/tsd_tls.h           |    4 +-
 include/jemalloc/internal/tsd_types.h         |    4 +-
 include/jemalloc/internal/tsd_win.h           |   49 +-
 include/jemalloc/internal/typed_list.h        |   93 +-
 include/jemalloc/internal/util.h              |   55 +-
 include/jemalloc/internal/witness.h           |   86 +-
 include/msvc_compat/C99/stdint.h              |  302 +-
 include/msvc_compat/strings.h                 |   39 +-
 msvc/test_threads/test_threads.cpp            |  172 +-
 msvc/test_threads/test_threads_main.cpp       |    7 +-
 src/arena.c                                   |  504 ++-
 src/background_thread.c                       |  297 +-
 src/base.c                                    |  209 +-
 src/batcher.c                                 |   10 +-
 src/bin.c                                     |   10 +-
 src/bin_info.c                                |    8 +-
 src/bitmap.c                                  |   21 +-
 src/buf_writer.c                              |   15 +-
 src/cache_bin.c                               |   30 +-
 src/ckh.c                                     |  101 +-
 src/counter.c                                 |    2 +-
 src/ctl.c                                     | 2262 +++++++-------
 src/decay.c                                   |   71 +-
 src/ecache.c                                  |    2 +-
 src/edata.c                                   |    5 +-
 src/edata_cache.c                             |   15 +-
 src/ehooks.c                                  |   70 +-
 src/emap.c                                    |  105 +-
 src/eset.c                                    |  161 +-
 src/extent.c                                  |  416 +--
 src/extent_dss.c                              |   89 +-
 src/extent_mmap.c                             |    6 +-
 src/fxp.c                                     |   14 +-
 src/hook.c                                    |   87 +-
 src/hpa.c                                     |  227 +-
 src/hpa_hooks.c                               |   33 +-
 src/hpdata.c                                  |   62 +-
 src/inspect.c                                 |    6 +-
 src/jemalloc.c                                | 1499 ++++-----
 src/jemalloc_cpp.cpp                          |  109 +-
 src/large.c                                   |   84 +-
 src/log.c                                     |   14 +-
 src/malloc_io.c                               |  451 +--
 src/mutex.c                                   |   52 +-
 src/nstime.c                                  |   42 +-
 src/pa.c                                      |   32 +-
 src/pa_extra.c                                |   20 +-
 src/pac.c                                     |  186 +-
 src/pages.c                                   |  227 +-
 src/pai.c                                     |    6 +-
 src/peak_event.c                              |   12 +-
 src/prof.c                                    |  140 +-
 src/prof_data.c                               |  297 +-
 src/prof_log.c                                |  130 +-
 src/prof_recent.c                             |   79 +-
 src/prof_stack_range.c                        |  209 +-
 src/prof_stats.c                              |    4 +-
 src/prof_sys.c                                |  223 +-
 src/prof_threshold.c                          |   16 +-
 src/psset.c                                   |   67 +-
 src/rtree.c                                   |  125 +-
 src/safety_check.c                            |   23 +-
 src/san.c                                     |   31 +-
 src/san_bump.c                                |   35 +-
 src/sc.c                                      |   10 +-
 src/sec.c                                     |   76 +-
 src/stats.c                                   | 1045 +++----
 src/sz.c                                      |   14 +-
 src/tcache.c                                  |  456 +--
 src/thread_event.c                            |  192 +-
 src/thread_event_registry.c                   |   29 +-
 src/ticker.c                                  |   15 +-
 src/tsd.c                                     |  123 +-
 src/util.c                                    |    5 +-
 src/witness.c                                 |   14 +-
 src/zone.c                                    |  113 +-
 test/analyze/prof_bias.c                      |    8 +-
 test/analyze/rand.c                           |   64 +-
 test/analyze/sizes.c                          |    9 +-
 test/include/test/SFMT-alti.h                 |  186 +-
 test/include/test/SFMT-params.h               |   40 +-
 test/include/test/SFMT-params11213.h          |   88 +-
 test/include/test/SFMT-params1279.h           |   88 +-
 test/include/test/SFMT-params132049.h         |   88 +-
 test/include/test/SFMT-params19937.h          |   88 +-
 test/include/test/SFMT-params216091.h         |   88 +-
 test/include/test/SFMT-params2281.h           |   88 +-
 test/include/test/SFMT-params4253.h           |   88 +-
 test/include/test/SFMT-params44497.h          |   88 +-
 test/include/test/SFMT-params607.h            |   88 +-
 test/include/test/SFMT-params86243.h          |   88 +-
 test/include/test/SFMT-sse2.h                 |  150 +-
 test/include/test/SFMT.h                      |   84 +-
 test/include/test/arena_util.h                |   41 +-
 test/include/test/bench.h                     |   32 +-
 test/include/test/bgthd.h                     |    4 +-
 test/include/test/btalloc.h                   |   52 +-
 test/include/test/extent_hooks.h              |  191 +-
 test/include/test/fork.h                      |    8 +-
 test/include/test/math.h                      |  194 +-
 test/include/test/mq.h                        |  148 +-
 test/include/test/mtx.h                       |   14 +-
 test/include/test/nbits.h                     |  208 +-
 test/include/test/san.h                       |    9 +-
 test/include/test/test.h                      |  956 +++---
 test/include/test/timer.h                     |    8 +-
 test/integration/MALLOCX_ARENA.c              |   24 +-
 test/integration/aligned_alloc.c              |   47 +-
 test/integration/allocated.c                  |   36 +-
 test/integration/cpp/basic.cpp                |    3 +-
 test/integration/cpp/infallible_new_false.cpp |    4 +-
 test/integration/cpp/infallible_new_true.cpp  |   12 +-
 test/integration/extent.c                     |  108 +-
 test/integration/malloc.c                     |    3 +-
 test/integration/mallocx.c                    |  117 +-
 test/integration/overflow.c                   |   19 +-
 test/integration/posix_memalign.c             |   54 +-
 test/integration/rallocx.c                    |  132 +-
 test/integration/sdallocx.c                   |   25 +-
 test/integration/slab_sizes.c                 |   24 +-
 test/integration/smallocx.c                   |  130 +-
 test/integration/thread_arena.c               |   29 +-
 test/integration/thread_tcache_enabled.c      |   57 +-
 test/integration/xallocx.c                    |  111 +-
 test/src/SFMT.c                               |  739 ++---
 test/src/mtx.c                                |    6 +-
 test/src/sleep.c                              |    4 +-
 test/src/test.c                               |   49 +-
 test/src/thd.c                                |    5 +-
 test/src/timer.c                              |   22 +-
 test/stress/batch_alloc.c                     |   35 +-
 test/stress/cpp/microbench.cpp                |   42 +-
 test/stress/fill_flush.c                      |   18 +-
 test/stress/hookbench.c                       |   27 +-
 test/stress/large_microbench.c                |    6 +-
 test/stress/mallctl.c                         |   35 +-
 test/stress/microbench.c                      |   19 +-
 test/unit/SFMT.c                              | 2779 ++++++++---------
 test/unit/a0.c                                |    3 +-
 test/unit/arena_decay.c                       |  129 +-
 test/unit/arena_reset.c                       |  115 +-
 test/unit/atomic.c                            |   11 +-
 test/unit/background_thread.c                 |   37 +-
 test/unit/background_thread_enable.c          |   50 +-
 test/unit/base.c                              |  125 +-
 test/unit/batch_alloc.c                       |   54 +-
 test/unit/batcher.c                           |   75 +-
 test/unit/bin_batching.c                      |   44 +-
 test/unit/binshard.c                          |   40 +-
 test/unit/bit_util.c                          |  168 +-
 test/unit/bitmap.c                            |  130 +-
 test/unit/buf_writer.c                        |   72 +-
 test/unit/cache_bin.c                         |  138 +-
 test/unit/ckh.c                               |   87 +-
 test/unit/counter.c                           |   13 +-
 test/unit/decay.c                             |   76 +-
 test/unit/div.c                               |   11 +-
 test/unit/double_free.c                       |   22 +-
 test/unit/edata_cache.c                       |   20 +-
 test/unit/emitter.c                           |  535 ++--
 test/unit/extent_quantize.c                   |   76 +-
 test/unit/fb.c                                |  220 +-
 test/unit/fork.c                              |   10 +-
 test/unit/fxp.c                               |  138 +-
 test/unit/hash.c                              |  101 +-
 test/unit/hook.c                              |  177 +-
 test/unit/hpa.c                               |  211 +-
 test/unit/hpa_background_thread.c             |   52 +-
 test/unit/hpa_vectorized_madvise.c            |   73 +-
 .../unit/hpa_vectorized_madvise_large_batch.c |   48 +-
 test/unit/hpdata.c                            |   95 +-
 test/unit/huge.c                              |   64 +-
 test/unit/inspect.c                           |  120 +-
 test/unit/junk.c                              |  107 +-
 test/unit/log.c                               |   58 +-
 test/unit/mallctl.c                           |  780 ++---
 test/unit/malloc_conf_2.c                     |   26 +-
 test/unit/malloc_io.c                         |  171 +-
 test/unit/math.c                              |  512 ++-
 test/unit/mpsc_queue.c                        |   54 +-
 test/unit/mq.c                                |   31 +-
 test/unit/mtx.c                               |   22 +-
 test/unit/ncached_max.c                       |  116 +-
 test/unit/nstime.c                            |  106 +-
 test/unit/oversize_threshold.c                |   21 +-
 test/unit/pa.c                                |   38 +-
 test/unit/pack.c                              |   64 +-
 test/unit/pages.c                             |   18 +-
 test/unit/peak.c                              |   10 +-
 test/unit/ph.c                                |  109 +-
 test/unit/prng.c                              |   80 +-
 test/unit/prof_accum.c                        |   31 +-
 test/unit/prof_active.c                       |   46 +-
 test/unit/prof_gdump.c                        |   21 +-
 test/unit/prof_hook.c                         |  125 +-
 test/unit/prof_idump.c                        |   20 +-
 test/unit/prof_log.c                          |   45 +-
 test/unit/prof_mdump.c                        |   23 +-
 test/unit/prof_recent.c                       |  331 +-
 test/unit/prof_reset.c                        |   77 +-
 test/unit/prof_small.c                        |    9 +-
 test/unit/prof_stats.c                        |   84 +-
 test/unit/prof_sys_thread_name.c              |   21 +-
 test/unit/prof_tctx.c                         |   19 +-
 test/unit/prof_thread_name.c                  |   54 +-
 test/unit/prof_threshold.c                    |   33 +-
 test/unit/psset.c                             |  129 +-
 test/unit/ql.c                                |  101 +-
 test/unit/qr.c                                |   73 +-
 test/unit/rb.c                                |  285 +-
 test/unit/retained.c                          |   72 +-
 test/unit/rtree.c                             |  158 +-
 test/unit/safety_check.c                      |   32 +-
 test/unit/san.c                               |   35 +-
 test/unit/san_bump.c                          |   38 +-
 test/unit/sc.c                                |   11 +-
 test/unit/sec.c                               |  116 +-
 test/unit/seq.c                               |   17 +-
 test/unit/size_check.c                        |    6 +-
 test/unit/size_classes.c                      |  112 +-
 test/unit/slab.c                              |   24 +-
 test/unit/smoothstep.c                        |   27 +-
 test/unit/spin.c                              |    3 +-
 test/unit/stats.c                             |  254 +-
 test/unit/stats_print.c                       |  717 +++--
 test/unit/sz.c                                |   31 +-
 test/unit/tcache_max.c                        |   24 +-
 test/unit/test_hooks.c                        |    4 +-
 test/unit/thread_event.c                      |   18 +-
 test/unit/ticker.c                            |   55 +-
 test/unit/tsd.c                               |   28 +-
 test/unit/uaf.c                               |   45 +-
 test/unit/witness.c                           |   47 +-
 test/unit/zero.c                              |   26 +-
 test/unit/zero_realloc_abort.c                |    7 +-
 test/unit/zero_realloc_alloc.c                |   17 +-
 test/unit/zero_realloc_free.c                 |   10 +-
 test/unit/zero_reallocs.c                     |    8 +-
 346 files changed, 18286 insertions(+), 17770 deletions(-)

diff --git a/include/jemalloc/internal/activity_callback.h b/include/jemalloc/internal/activity_callback.h
index 0f4f3962..6745f1a2 100644
--- a/include/jemalloc/internal/activity_callback.h
+++ b/include/jemalloc/internal/activity_callback.h
@@ -13,13 +13,14 @@
  *
  * The calls to this thunk get driven by the peak_event module.
  */
-#define ACTIVITY_CALLBACK_THUNK_INITIALIZER {NULL, NULL}
-typedef void (*activity_callback_t)(void *uctx, uint64_t allocated,
-    uint64_t deallocated);
+#define ACTIVITY_CALLBACK_THUNK_INITIALIZER                                    \
+	{ NULL, NULL }
+typedef void (*activity_callback_t)(
+    void *uctx, uint64_t allocated, uint64_t deallocated);
 typedef struct activity_callback_thunk_s activity_callback_thunk_t;
 struct activity_callback_thunk_s {
 	activity_callback_t callback;
-	void *uctx;
+	void               *uctx;
 };
 
 #endif /* JEMALLOC_INTERNAL_ACTIVITY_CALLBACK_H */
diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 91fed258..39d2099d 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -21,7 +21,7 @@ extern ssize_t opt_dirty_decay_ms;
 extern ssize_t opt_muzzy_decay_ms;
 
 extern percpu_arena_mode_t opt_percpu_arena;
-extern const char *const percpu_arena_mode_names[];
+extern const char *const   percpu_arena_mode_names[];
 
 extern div_info_t arena_binind_div_info[SC_NBINS];
 
@@ -30,7 +30,7 @@ extern emap_t arena_emap_global;
 extern size_t opt_oversize_threshold;
 extern size_t oversize_threshold;
 
-extern bool opt_huge_arena_pac_thp;
+extern bool      opt_huge_arena_pac_thp;
 extern pac_thp_t huge_arena_pac_thp;
 
 /*
@@ -39,90 +39,90 @@ extern pac_thp_t huge_arena_pac_thp;
  */
 extern uint32_t arena_bin_offsets[SC_NBINS];
 
-void arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena,
-    unsigned *nthreads, const char **dss, ssize_t *dirty_decay_ms,
-    ssize_t *muzzy_decay_ms, size_t *nactive, size_t *ndirty, size_t *nmuzzy);
+void arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
+    const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
+    size_t *nactive, size_t *ndirty, size_t *nmuzzy);
 void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
     size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
-    bin_stats_data_t *bstats, arena_stats_large_t *lstats,
-    pac_estats_t *estats, hpa_shard_stats_t *hpastats, sec_stats_t *secstats);
+    bin_stats_data_t *bstats, arena_stats_large_t *lstats, pac_estats_t *estats,
+    hpa_shard_stats_t *hpastats, sec_stats_t *secstats);
 void arena_handle_deferred_work(tsdn_t *tsdn, arena_t *arena);
-edata_t *arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena,
-    size_t usize, size_t alignment, bool zero);
-void arena_extent_dalloc_large_prep(tsdn_t *tsdn, arena_t *arena,
-    edata_t *edata);
-void arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena,
-    edata_t *edata, size_t oldusize);
-void arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena,
-    edata_t *edata, size_t oldusize);
-bool arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, extent_state_t state,
-    ssize_t decay_ms);
+edata_t *arena_extent_alloc_large(
+    tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, bool zero);
+void arena_extent_dalloc_large_prep(
+    tsdn_t *tsdn, arena_t *arena, edata_t *edata);
+void arena_extent_ralloc_large_shrink(
+    tsdn_t *tsdn, arena_t *arena, edata_t *edata, size_t oldusize);
+void arena_extent_ralloc_large_expand(
+    tsdn_t *tsdn, arena_t *arena, edata_t *edata, size_t oldusize);
+bool arena_decay_ms_set(
+    tsdn_t *tsdn, arena_t *arena, extent_state_t state, ssize_t decay_ms);
 ssize_t arena_decay_ms_get(arena_t *arena, extent_state_t state);
-void arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
-    bool all);
+void    arena_decay(
+       tsdn_t *tsdn, arena_t *arena, bool is_background_thread, bool all);
 uint64_t arena_time_until_deferred(tsdn_t *tsdn, arena_t *arena);
-void arena_do_deferred_work(tsdn_t *tsdn, arena_t *arena);
-void arena_reset(tsd_t *tsd, arena_t *arena);
-void arena_destroy(tsd_t *tsd, arena_t *arena);
-void arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena,
-    cache_bin_t *cache_bin, szind_t binind, const cache_bin_sz_t nfill_min,
-    const cache_bin_sz_t nfill_max);
+void     arena_do_deferred_work(tsdn_t *tsdn, arena_t *arena);
+void     arena_reset(tsd_t *tsd, arena_t *arena);
+void     arena_destroy(tsd_t *tsd, arena_t *arena);
+void     arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena,
+        cache_bin_t *cache_bin, szind_t binind, const cache_bin_sz_t nfill_min,
+        const cache_bin_sz_t nfill_max);
 
-void *arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size,
-    szind_t ind, bool zero, bool slab);
-void *arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize,
-    size_t alignment, bool zero, bool slab, tcache_t *tcache);
-void arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize,
-    size_t bumped_usize);
-void arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
-    bool slow_path);
+void *arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind,
+    bool zero, bool slab);
+void *arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
+    bool zero, bool slab, tcache_t *tcache);
+void  arena_prof_promote(
+     tsdn_t *tsdn, void *ptr, size_t usize, size_t bumped_usize);
+void arena_dalloc_promoted(
+    tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path);
 void arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, edata_t *slab);
 
-void arena_dalloc_bin_locked_handle_newly_empty(tsdn_t *tsdn, arena_t *arena,
-    edata_t *slab, bin_t *bin);
-void arena_dalloc_bin_locked_handle_newly_nonempty(tsdn_t *tsdn, arena_t *arena,
-    edata_t *slab, bin_t *bin);
-void arena_dalloc_small(tsdn_t *tsdn, void *ptr);
-bool arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
-    size_t extra, bool zero, size_t *newsize);
+void arena_dalloc_bin_locked_handle_newly_empty(
+    tsdn_t *tsdn, arena_t *arena, edata_t *slab, bin_t *bin);
+void arena_dalloc_bin_locked_handle_newly_nonempty(
+    tsdn_t *tsdn, arena_t *arena, edata_t *slab, bin_t *bin);
+void  arena_dalloc_small(tsdn_t *tsdn, void *ptr);
+bool  arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
+     size_t extra, bool zero, size_t *newsize);
 void *arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
     size_t size, size_t alignment, bool zero, bool slab, tcache_t *tcache,
     hook_ralloc_args_t *hook_args);
-dss_prec_t arena_dss_prec_get(arena_t *arena);
-ehooks_t *arena_get_ehooks(arena_t *arena);
-extent_hooks_t *arena_set_extent_hooks(tsd_t *tsd, arena_t *arena,
-    extent_hooks_t *extent_hooks);
-bool arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec);
-void arena_name_get(arena_t *arena, char *name);
-void arena_name_set(arena_t *arena, const char *name);
+dss_prec_t      arena_dss_prec_get(arena_t *arena);
+ehooks_t       *arena_get_ehooks(arena_t *arena);
+extent_hooks_t *arena_set_extent_hooks(
+    tsd_t *tsd, arena_t *arena, extent_hooks_t *extent_hooks);
+bool    arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec);
+void    arena_name_get(arena_t *arena, char *name);
+void    arena_name_set(arena_t *arena, const char *name);
 ssize_t arena_dirty_decay_ms_default_get(void);
-bool arena_dirty_decay_ms_default_set(ssize_t decay_ms);
+bool    arena_dirty_decay_ms_default_set(ssize_t decay_ms);
 ssize_t arena_muzzy_decay_ms_default_get(void);
-bool arena_muzzy_decay_ms_default_set(ssize_t decay_ms);
-bool arena_retain_grow_limit_get_set(tsd_t *tsd, arena_t *arena,
-    size_t *old_limit, size_t *new_limit);
+bool    arena_muzzy_decay_ms_default_set(ssize_t decay_ms);
+bool    arena_retain_grow_limit_get_set(
+       tsd_t *tsd, arena_t *arena, size_t *old_limit, size_t *new_limit);
 unsigned arena_nthreads_get(arena_t *arena, bool internal);
-void arena_nthreads_inc(arena_t *arena, bool internal);
-void arena_nthreads_dec(arena_t *arena, bool internal);
+void     arena_nthreads_inc(arena_t *arena, bool internal);
+void     arena_nthreads_dec(arena_t *arena, bool internal);
 arena_t *arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config);
-bool arena_init_huge(tsdn_t *tsdn, arena_t *a0);
+bool     arena_init_huge(tsdn_t *tsdn, arena_t *a0);
 arena_t *arena_choose_huge(tsd_t *tsd);
-bin_t *arena_bin_choose(tsdn_t *tsdn, arena_t *arena, szind_t binind,
-    unsigned *binshard);
+bin_t   *arena_bin_choose(
+      tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned *binshard);
 size_t arena_fill_small_fresh(tsdn_t *tsdn, arena_t *arena, szind_t binind,
     void **ptrs, size_t nfill, bool zero);
-bool arena_boot(sc_data_t *sc_data, base_t *base, bool hpa);
-void arena_prefork0(tsdn_t *tsdn, arena_t *arena);
-void arena_prefork1(tsdn_t *tsdn, arena_t *arena);
-void arena_prefork2(tsdn_t *tsdn, arena_t *arena);
-void arena_prefork3(tsdn_t *tsdn, arena_t *arena);
-void arena_prefork4(tsdn_t *tsdn, arena_t *arena);
-void arena_prefork5(tsdn_t *tsdn, arena_t *arena);
-void arena_prefork6(tsdn_t *tsdn, arena_t *arena);
-void arena_prefork7(tsdn_t *tsdn, arena_t *arena);
-void arena_prefork8(tsdn_t *tsdn, arena_t *arena);
-void arena_postfork_parent(tsdn_t *tsdn, arena_t *arena);
-void arena_postfork_child(tsdn_t *tsdn, arena_t *arena);
+bool   arena_boot(sc_data_t *sc_data, base_t *base, bool hpa);
+void   arena_prefork0(tsdn_t *tsdn, arena_t *arena);
+void   arena_prefork1(tsdn_t *tsdn, arena_t *arena);
+void   arena_prefork2(tsdn_t *tsdn, arena_t *arena);
+void   arena_prefork3(tsdn_t *tsdn, arena_t *arena);
+void   arena_prefork4(tsdn_t *tsdn, arena_t *arena);
+void   arena_prefork5(tsdn_t *tsdn, arena_t *arena);
+void   arena_prefork6(tsdn_t *tsdn, arena_t *arena);
+void   arena_prefork7(tsdn_t *tsdn, arena_t *arena);
+void   arena_prefork8(tsdn_t *tsdn, arena_t *arena);
+void   arena_postfork_parent(tsdn_t *tsdn, arena_t *arena);
+void   arena_postfork_child(tsdn_t *tsdn, arena_t *arena);
 
 #endif /* JEMALLOC_INTERNAL_ARENA_EXTERNS_H */
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 61008b59..549dfb8a 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -21,8 +21,8 @@
 
 static inline arena_t *
 arena_get_from_edata(edata_t *edata) {
-	return (arena_t *)atomic_load_p(&arenas[edata_arena_ind_get(edata)],
-	    ATOMIC_RELAXED);
+	return (arena_t *)atomic_load_p(
+	    &arenas[edata_arena_ind_get(edata)], ATOMIC_RELAXED);
 }
 
 JEMALLOC_ALWAYS_INLINE arena_t *
@@ -61,15 +61,17 @@ large_dalloc_safety_checks(edata_t *edata, const void *ptr, size_t input_size) {
 	 * The cost is low enough (as edata will be accessed anyway) to be
 	 * enabled all the time.
 	 */
-	if (unlikely(edata == NULL ||
-	    edata_state_get(edata) != extent_state_active)) {
-		safety_check_fail("Invalid deallocation detected: "
+	if (unlikely(edata == NULL
+	        || edata_state_get(edata) != extent_state_active)) {
+		safety_check_fail(
+		    "Invalid deallocation detected: "
 		    "pages being freed (%p) not currently active, "
-		    "possibly caused by double free bugs.", ptr);
+		    "possibly caused by double free bugs.",
+		    ptr);
 		return true;
 	}
-	if (unlikely(input_size != edata_usize_get(edata) ||
-	    input_size > SC_LARGE_MAXCLASS)) {
+	if (unlikely(input_size != edata_usize_get(edata)
+	        || input_size > SC_LARGE_MAXCLASS)) {
 		safety_check_fail_sized_dealloc(/* current_dealloc */ true, ptr,
 		    /* true_size */ edata_usize_get(edata), input_size);
 		return true;
@@ -86,25 +88,26 @@ arena_prof_info_get(tsd_t *tsd, const void *ptr, emap_alloc_ctx_t *alloc_ctx,
 	assert(prof_info != NULL);
 
 	edata_t *edata = NULL;
-	bool is_slab;
+	bool     is_slab;
 
 	/* Static check. */
 	if (alloc_ctx == NULL) {
-		edata = emap_edata_lookup(tsd_tsdn(tsd), &arena_emap_global,
-		    ptr);
+		edata = emap_edata_lookup(
+		    tsd_tsdn(tsd), &arena_emap_global, ptr);
 		is_slab = edata_slab_get(edata);
 	} else if (unlikely(!(is_slab = alloc_ctx->slab))) {
-		edata = emap_edata_lookup(tsd_tsdn(tsd), &arena_emap_global,
-		    ptr);
+		edata = emap_edata_lookup(
+		    tsd_tsdn(tsd), &arena_emap_global, ptr);
 	}
 
 	if (unlikely(!is_slab)) {
 		/* edata must have been initialized at this point. */
 		assert(edata != NULL);
-		size_t usize = (alloc_ctx == NULL)? edata_usize_get(edata):
-		    emap_alloc_ctx_usize_get(alloc_ctx);
-		if (reset_recent &&
-		    large_dalloc_safety_checks(edata, ptr, usize)) {
+		size_t usize = (alloc_ctx == NULL)
+		    ? edata_usize_get(edata)
+		    : emap_alloc_ctx_usize_get(alloc_ctx);
+		if (reset_recent
+		    && large_dalloc_safety_checks(edata, ptr, usize)) {
 			prof_info->alloc_tctx = PROF_TCTX_SENTINEL;
 			return;
 		}
@@ -119,22 +122,22 @@ arena_prof_info_get(tsd_t *tsd, const void *ptr, emap_alloc_ctx_t *alloc_ctx,
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_prof_tctx_reset(tsd_t *tsd, const void *ptr,
-    emap_alloc_ctx_t *alloc_ctx) {
+arena_prof_tctx_reset(
+    tsd_t *tsd, const void *ptr, emap_alloc_ctx_t *alloc_ctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
 	/* Static check. */
 	if (alloc_ctx == NULL) {
-		edata_t *edata = emap_edata_lookup(tsd_tsdn(tsd),
-		    &arena_emap_global, ptr);
+		edata_t *edata = emap_edata_lookup(
+		    tsd_tsdn(tsd), &arena_emap_global, ptr);
 		if (unlikely(!edata_slab_get(edata))) {
 			large_prof_tctx_reset(edata);
 		}
 	} else {
 		if (unlikely(!alloc_ctx->slab)) {
-			edata_t *edata = emap_edata_lookup(tsd_tsdn(tsd),
-			    &arena_emap_global, ptr);
+			edata_t *edata = emap_edata_lookup(
+			    tsd_tsdn(tsd), &arena_emap_global, ptr);
 			large_prof_tctx_reset(edata);
 		}
 	}
@@ -145,16 +148,16 @@ arena_prof_tctx_reset_sampled(tsd_t *tsd, const void *ptr) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	edata_t *edata = emap_edata_lookup(tsd_tsdn(tsd), &arena_emap_global,
-	    ptr);
+	edata_t *edata = emap_edata_lookup(
+	    tsd_tsdn(tsd), &arena_emap_global, ptr);
 	assert(!edata_slab_get(edata));
 
 	large_prof_tctx_reset(edata);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_prof_info_set(tsd_t *tsd, edata_t *edata, prof_tctx_t *tctx,
-    size_t size) {
+arena_prof_info_set(
+    tsd_t *tsd, edata_t *edata, prof_tctx_t *tctx, size_t size) {
 	cassert(config_prof);
 
 	assert(!edata_slab_get(edata));
@@ -177,9 +180,9 @@ arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks) {
 	 * use a single ticker for all of them.
 	 */
 	ticker_geom_t *decay_ticker = tsd_arena_decay_tickerp_get(tsd);
-	uint64_t *prng_state = tsd_prng_statep_get(tsd);
+	uint64_t      *prng_state = tsd_prng_statep_get(tsd);
 	if (unlikely(ticker_geom_ticks(decay_ticker, prng_state, nticks,
-	    tsd_reentrancy_level_get(tsd) > 0))) {
+	        tsd_reentrancy_level_get(tsd) > 0))) {
 		arena_decay(tsdn, arena, false, false);
 	}
 }
@@ -197,14 +200,13 @@ arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero,
 	if (likely(tcache != NULL)) {
 		if (likely(slab)) {
 			assert(sz_can_use_slab(size));
-			return tcache_alloc_small(tsdn_tsd(tsdn), arena,
-			    tcache, size, ind, zero, slow_path);
-		} else if (likely(
-		    ind < tcache_nbins_get(tcache->tcache_slow) &&
-		    !tcache_bin_disabled(ind, &tcache->bins[ind],
-		    tcache->tcache_slow))) {
-			return tcache_alloc_large(tsdn_tsd(tsdn), arena,
-			    tcache, size, ind, zero, slow_path);
+			return tcache_alloc_small(tsdn_tsd(tsdn), arena, tcache,
+			    size, ind, zero, slow_path);
+		} else if (likely(ind < tcache_nbins_get(tcache->tcache_slow)
+		               && !tcache_bin_disabled(ind, &tcache->bins[ind],
+		                   tcache->tcache_slow))) {
+			return tcache_alloc_large(tsdn_tsd(tsdn), arena, tcache,
+			    size, ind, zero, slow_path);
 		}
 		/* (size > tcache_max) case falls through. */
 	}
@@ -241,8 +243,8 @@ arena_vsalloc(tsdn_t *tsdn, const void *ptr) {
 	 */
 
 	emap_full_alloc_ctx_t full_alloc_ctx;
-	bool missing = emap_full_alloc_ctx_try_lookup(tsdn, &arena_emap_global,
-	    ptr, &full_alloc_ctx);
+	bool                  missing = emap_full_alloc_ctx_try_lookup(
+            tsdn, &arena_emap_global, ptr, &full_alloc_ctx);
 	if (missing) {
 		return 0;
 	}
@@ -261,8 +263,8 @@ arena_vsalloc(tsdn_t *tsdn, const void *ptr) {
 }
 
 static inline void
-arena_dalloc_large_no_tcache(tsdn_t *tsdn, void *ptr, szind_t szind,
-    size_t usize) {
+arena_dalloc_large_no_tcache(
+    tsdn_t *tsdn, void *ptr, szind_t szind, size_t usize) {
 	/*
 	 * szind is still needed in this function mainly becuase
 	 * szind < SC_NBINS determines not only if this is a small alloc,
@@ -272,8 +274,8 @@ arena_dalloc_large_no_tcache(tsdn_t *tsdn, void *ptr, szind_t szind,
 	if (config_prof && unlikely(szind < SC_NBINS)) {
 		arena_dalloc_promoted(tsdn, ptr, NULL, true);
 	} else {
-		edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global,
-		    ptr);
+		edata_t *edata = emap_edata_lookup(
+		    tsdn, &arena_emap_global, ptr);
 		if (large_dalloc_safety_checks(edata, ptr, usize)) {
 			/* See the comment in isfree. */
 			return;
@@ -290,13 +292,13 @@ arena_dalloc_no_tcache(tsdn_t *tsdn, void *ptr) {
 	emap_alloc_ctx_lookup(tsdn, &arena_emap_global, ptr, &alloc_ctx);
 
 	if (config_debug) {
-		edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global,
-		    ptr);
+		edata_t *edata = emap_edata_lookup(
+		    tsdn, &arena_emap_global, ptr);
 		assert(alloc_ctx.szind == edata_szind_get(edata));
 		assert(alloc_ctx.szind < SC_NSIZES);
 		assert(alloc_ctx.slab == edata_slab_get(edata));
-		assert(emap_alloc_ctx_usize_get(&alloc_ctx) ==
-		    edata_usize_get(edata));
+		assert(emap_alloc_ctx_usize_get(&alloc_ctx)
+		    == edata_usize_get(edata));
 	}
 
 	if (likely(alloc_ctx.slab)) {
@@ -311,19 +313,19 @@ arena_dalloc_no_tcache(tsdn_t *tsdn, void *ptr) {
 JEMALLOC_ALWAYS_INLINE void
 arena_dalloc_large(tsdn_t *tsdn, void *ptr, tcache_t *tcache, szind_t szind,
     size_t usize, bool slow_path) {
-	assert (!tsdn_null(tsdn) && tcache != NULL);
+	assert(!tsdn_null(tsdn) && tcache != NULL);
 	bool is_sample_promoted = config_prof && szind < SC_NBINS;
 	if (unlikely(is_sample_promoted)) {
 		arena_dalloc_promoted(tsdn, ptr, tcache, slow_path);
 	} else {
-		if (szind < tcache_nbins_get(tcache->tcache_slow) &&
-		    !tcache_bin_disabled(szind, &tcache->bins[szind],
-		    tcache->tcache_slow)) {
-			tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr, szind,
-			    slow_path);
+		if (szind < tcache_nbins_get(tcache->tcache_slow)
+		    && !tcache_bin_disabled(
+		        szind, &tcache->bins[szind], tcache->tcache_slow)) {
+			tcache_dalloc_large(
+			    tsdn_tsd(tsdn), tcache, ptr, szind, slow_path);
 		} else {
-			edata_t *edata = emap_edata_lookup(tsdn,
-			    &arena_emap_global, ptr);
+			edata_t *edata = emap_edata_lookup(
+			    tsdn, &arena_emap_global, ptr);
 			if (large_dalloc_safety_checks(edata, ptr, usize)) {
 				/* See the comment in isfree. */
 				return;
@@ -335,16 +337,17 @@ arena_dalloc_large(tsdn_t *tsdn, void *ptr, tcache_t *tcache, szind_t szind,
 
 /* Find the region index of a pointer. */
 JEMALLOC_ALWAYS_INLINE size_t
-arena_slab_regind_impl(div_info_t* div_info, szind_t binind,
-    edata_t *slab, const void *ptr) {
+arena_slab_regind_impl(
+    div_info_t *div_info, szind_t binind, edata_t *slab, const void *ptr) {
 	size_t diff, regind;
 
 	/* Freeing a pointer outside the slab can cause assertion failure. */
 	assert((uintptr_t)ptr >= (uintptr_t)edata_addr_get(slab));
 	assert((uintptr_t)ptr < (uintptr_t)edata_past_get(slab));
 	/* Freeing an interior pointer can cause assertion failure. */
-	assert(((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab)) %
-	    (uintptr_t)bin_infos[binind].reg_size == 0);
+	assert(((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab))
+	        % (uintptr_t)bin_infos[binind].reg_size
+	    == 0);
 
 	diff = (size_t)((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab));
 
@@ -360,22 +363,23 @@ arena_tcache_dalloc_small_safety_check(tsdn_t *tsdn, void *ptr) {
 	if (!config_debug) {
 		return false;
 	}
-	edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
-	szind_t binind = edata_szind_get(edata);
+	edata_t   *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
+	szind_t    binind = edata_szind_get(edata);
 	div_info_t div_info = arena_binind_div_info[binind];
 	/*
 	 * Calls the internal function arena_slab_regind_impl because the
 	 * safety check does not require a lock.
 	 */
 	size_t regind = arena_slab_regind_impl(&div_info, binind, edata, ptr);
-	slab_data_t *slab_data = edata_slab_data_get(edata);
+	slab_data_t      *slab_data = edata_slab_data_get(edata);
 	const bin_info_t *bin_info = &bin_infos[binind];
 	assert(edata_nfree_get(edata) < bin_info->nregs);
-	if (unlikely(!bitmap_get(slab_data->bitmap, &bin_info->bitmap_info,
-	    regind))) {
+	if (unlikely(!bitmap_get(
+	        slab_data->bitmap, &bin_info->bitmap_info, regind))) {
 		safety_check_fail(
 		    "Invalid deallocation detected: the pointer being freed (%p) not "
-		    "currently active, possibly caused by double free bugs.\n", ptr);
+		    "currently active, possibly caused by double free bugs.\n",
+		    ptr);
 		return true;
 	}
 	return false;
@@ -397,18 +401,18 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 		alloc_ctx = *caller_alloc_ctx;
 	} else {
 		util_assume(tsdn != NULL);
-		emap_alloc_ctx_lookup(tsdn, &arena_emap_global, ptr,
-		    &alloc_ctx);
+		emap_alloc_ctx_lookup(
+		    tsdn, &arena_emap_global, ptr, &alloc_ctx);
 	}
 
 	if (config_debug) {
-		edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global,
-		    ptr);
+		edata_t *edata = emap_edata_lookup(
+		    tsdn, &arena_emap_global, ptr);
 		assert(alloc_ctx.szind == edata_szind_get(edata));
 		assert(alloc_ctx.szind < SC_NSIZES);
 		assert(alloc_ctx.slab == edata_slab_get(edata));
-		assert(emap_alloc_ctx_usize_get(&alloc_ctx) ==
-		    edata_usize_get(edata));
+		assert(emap_alloc_ctx_usize_get(&alloc_ctx)
+		    == edata_usize_get(edata));
 	}
 
 	if (likely(alloc_ctx.slab)) {
@@ -416,8 +420,8 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 		if (arena_tcache_dalloc_small_safety_check(tsdn, ptr)) {
 			return;
 		}
-		tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr,
-		    alloc_ctx.szind, slow_path);
+		tcache_dalloc_small(
+		    tsdn_tsd(tsdn), tcache, ptr, alloc_ctx.szind, slow_path);
 	} else {
 		arena_dalloc_large(tsdn, ptr, tcache, alloc_ctx.szind,
 		    emap_alloc_ctx_usize_get(&alloc_ctx), slow_path);
@@ -436,21 +440,21 @@ arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) {
 		 * object, so base szind and slab on the given size.
 		 */
 		szind_t szind = sz_size2index(size);
-		emap_alloc_ctx_init(&alloc_ctx, szind, (szind < SC_NBINS),
-		    size);
+		emap_alloc_ctx_init(
+		    &alloc_ctx, szind, (szind < SC_NBINS), size);
 	}
 
 	if ((config_prof && opt_prof) || config_debug) {
-		emap_alloc_ctx_lookup(tsdn, &arena_emap_global, ptr,
-		    &alloc_ctx);
+		emap_alloc_ctx_lookup(
+		    tsdn, &arena_emap_global, ptr, &alloc_ctx);
 
 		assert(alloc_ctx.szind == sz_size2index(size));
 		assert((config_prof && opt_prof)
 		    || alloc_ctx.slab == (alloc_ctx.szind < SC_NBINS));
 
 		if (config_debug) {
-			edata_t *edata = emap_edata_lookup(tsdn,
-			    &arena_emap_global, ptr);
+			edata_t *edata = emap_edata_lookup(
+			    tsdn, &arena_emap_global, ptr);
 			assert(alloc_ctx.szind == edata_szind_get(edata));
 			assert(alloc_ctx.slab == edata_slab_get(edata));
 		}
@@ -481,8 +485,8 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 	if (config_prof && opt_prof) {
 		if (caller_alloc_ctx == NULL) {
 			/* Uncommon case and should be a static check. */
-			emap_alloc_ctx_lookup(tsdn, &arena_emap_global, ptr,
-			    &alloc_ctx);
+			emap_alloc_ctx_lookup(
+			    tsdn, &arena_emap_global, ptr, &alloc_ctx);
 			assert(alloc_ctx.szind == sz_size2index(size));
 			assert(emap_alloc_ctx_usize_get(&alloc_ctx) == size);
 		} else {
@@ -498,14 +502,14 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 	}
 
 	if (config_debug) {
-		edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global,
-		    ptr);
+		edata_t *edata = emap_edata_lookup(
+		    tsdn, &arena_emap_global, ptr);
 		assert(alloc_ctx.szind == edata_szind_get(edata));
 		assert(alloc_ctx.slab == edata_slab_get(edata));
-		emap_alloc_ctx_init(&alloc_ctx, alloc_ctx.szind, alloc_ctx.slab,
-		    sz_s2u(size));
-		assert(emap_alloc_ctx_usize_get(&alloc_ctx) ==
-		    edata_usize_get(edata));
+		emap_alloc_ctx_init(
+		    &alloc_ctx, alloc_ctx.szind, alloc_ctx.slab, sz_s2u(size));
+		assert(emap_alloc_ctx_usize_get(&alloc_ctx)
+		    == edata_usize_get(edata));
 	}
 
 	if (likely(alloc_ctx.slab)) {
@@ -513,8 +517,8 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 		if (arena_tcache_dalloc_small_safety_check(tsdn, ptr)) {
 			return;
 		}
-		tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr,
-		    alloc_ctx.szind, slow_path);
+		tcache_dalloc_small(
+		    tsdn_tsd(tsdn), tcache, ptr, alloc_ctx.szind, slow_path);
 	} else {
 		arena_dalloc_large(tsdn, ptr, tcache, alloc_ctx.szind,
 		    sz_s2u(size), slow_path);
@@ -522,13 +526,13 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 }
 
 static inline void
-arena_cache_oblivious_randomize(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
-    size_t alignment) {
+arena_cache_oblivious_randomize(
+    tsdn_t *tsdn, arena_t *arena, edata_t *edata, size_t alignment) {
 	assert(edata_base_get(edata) == edata_addr_get(edata));
 
 	if (alignment < PAGE) {
-		unsigned lg_range = LG_PAGE -
-		    lg_floor(CACHELINE_CEILING(alignment));
+		unsigned lg_range = LG_PAGE
+		    - lg_floor(CACHELINE_CEILING(alignment));
 		size_t r;
 		if (!tsdn_null(tsdn)) {
 			tsd_t *tsd = tsdn_tsd(tsdn);
@@ -538,12 +542,12 @@ arena_cache_oblivious_randomize(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
 			uint64_t stack_value = (uint64_t)(uintptr_t)&r;
 			r = (size_t)prng_lg_range_u64(&stack_value, lg_range);
 		}
-		uintptr_t random_offset = ((uintptr_t)r) << (LG_PAGE -
-		    lg_range);
-		edata->e_addr = (void *)((byte_t *)edata->e_addr +
-		    random_offset);
-		assert(ALIGNMENT_ADDR2BASE(edata->e_addr, alignment) ==
-		    edata->e_addr);
+		uintptr_t random_offset = ((uintptr_t)r)
+		    << (LG_PAGE - lg_range);
+		edata->e_addr = (void *)((byte_t *)edata->e_addr
+		    + random_offset);
+		assert(ALIGNMENT_ADDR2BASE(edata->e_addr, alignment)
+		    == edata->e_addr);
 	}
 }
 
@@ -556,20 +560,21 @@ arena_cache_oblivious_randomize(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
 typedef struct arena_dalloc_bin_locked_info_s arena_dalloc_bin_locked_info_t;
 struct arena_dalloc_bin_locked_info_s {
 	div_info_t div_info;
-	uint32_t nregs;
-	uint64_t ndalloc;
+	uint32_t   nregs;
+	uint64_t   ndalloc;
 };
 
 JEMALLOC_ALWAYS_INLINE size_t
 arena_slab_regind(arena_dalloc_bin_locked_info_t *info, szind_t binind,
     edata_t *slab, const void *ptr) {
-	size_t regind = arena_slab_regind_impl(&info->div_info, binind, slab, ptr);
+	size_t regind = arena_slab_regind_impl(
+	    &info->div_info, binind, slab, ptr);
 	return regind;
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_dalloc_bin_locked_begin(arena_dalloc_bin_locked_info_t *info,
-    szind_t binind) {
+arena_dalloc_bin_locked_begin(
+    arena_dalloc_bin_locked_info_t *info, szind_t binind) {
 	info->div_info = arena_binind_div_info[binind];
 	info->nregs = bin_infos[binind].nregs;
 	info->ndalloc = 0;
@@ -589,8 +594,8 @@ arena_dalloc_bin_locked_step(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
     void *ptr, edata_t **dalloc_slabs, unsigned ndalloc_slabs,
     unsigned *dalloc_slabs_count, edata_list_active_t *dalloc_slabs_extra) {
 	const bin_info_t *bin_info = &bin_infos[binind];
-	size_t regind = arena_slab_regind(info, binind, slab, ptr);
-	slab_data_t *slab_data = edata_slab_data_get(slab);
+	size_t            regind = arena_slab_regind(info, binind, slab, ptr);
+	slab_data_t      *slab_data = edata_slab_data_get(slab);
 
 	assert(edata_nfree_get(slab) < bin_info->nregs);
 	/* Freeing an unallocated pointer can cause assertion failure. */
@@ -605,8 +610,8 @@ arena_dalloc_bin_locked_step(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
 
 	unsigned nfree = edata_nfree_get(slab);
 	if (nfree == bin_info->nregs) {
-		arena_dalloc_bin_locked_handle_newly_empty(tsdn, arena, slab,
-		    bin);
+		arena_dalloc_bin_locked_handle_newly_empty(
+		    tsdn, arena, slab, bin);
 
 		if (*dalloc_slabs_count < ndalloc_slabs) {
 			dalloc_slabs[*dalloc_slabs_count] = slab;
@@ -615,8 +620,8 @@ arena_dalloc_bin_locked_step(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
 			edata_list_active_append(dalloc_slabs_extra, slab);
 		}
 	} else if (nfree == 1 && slab != bin->slabcur) {
-		arena_dalloc_bin_locked_handle_newly_nonempty(tsdn, arena, slab,
-		    bin);
+		arena_dalloc_bin_locked_handle_newly_nonempty(
+		    tsdn, arena, slab, bin);
 	}
 }
 
@@ -637,21 +642,20 @@ arena_bin_flush_batch_impl(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
     edata_list_active_t *dalloc_slabs_extra) {
 	assert(binind < bin_info_nbatched_sizes);
 	bin_with_batch_t *batched_bin = (bin_with_batch_t *)bin;
-	size_t nelems_to_pop = batcher_pop_begin(tsdn,
-	    &batched_bin->remote_frees);
+	size_t            nelems_to_pop = batcher_pop_begin(
+            tsdn, &batched_bin->remote_frees);
 
 	bin_batching_test_mid_pop(nelems_to_pop);
 	if (nelems_to_pop == BATCHER_NO_IDX) {
-		malloc_mutex_assert_not_owner(tsdn,
-		    &batched_bin->remote_frees.mtx);
+		malloc_mutex_assert_not_owner(
+		    tsdn, &batched_bin->remote_frees.mtx);
 		return;
 	} else {
-		malloc_mutex_assert_owner(tsdn,
-		    &batched_bin->remote_frees.mtx);
+		malloc_mutex_assert_owner(tsdn, &batched_bin->remote_frees.mtx);
 	}
 
-	size_t npushes = batcher_pop_get_pushes(tsdn,
-	    &batched_bin->remote_frees);
+	size_t npushes = batcher_pop_get_pushes(
+	    tsdn, &batched_bin->remote_frees);
 	bin_remote_free_data_t remote_free_data[BIN_REMOTE_FREE_ELEMS_MAX];
 	for (size_t i = 0; i < nelems_to_pop; i++) {
 		remote_free_data[i] = batched_bin->remote_free_data[i];
@@ -682,8 +686,8 @@ struct arena_bin_flush_batch_state_s {
 	 * backup array for any "extra" slabs, as well as a a list to allow a
 	 * dynamic number of ones exceeding that array.
 	 */
-	edata_t *dalloc_slabs[8];
-	unsigned dalloc_slab_count;
+	edata_t            *dalloc_slabs[8];
+	unsigned            dalloc_slab_count;
 	edata_list_active_t dalloc_slabs_extra;
 };
 
@@ -712,8 +716,8 @@ arena_bin_flush_batch_after_lock(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
 	    preallocated_slabs);
 
 	arena_bin_flush_batch_impl(tsdn, arena, bin, &state->info, binind,
-	    state->dalloc_slabs, ndalloc_slabs,
-	    &state->dalloc_slab_count, &state->dalloc_slabs_extra);
+	    state->dalloc_slabs, ndalloc_slabs, &state->dalloc_slab_count,
+	    &state->dalloc_slabs_extra);
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -769,8 +773,8 @@ arena_get_bin(arena_t *arena, szind_t binind, unsigned binshard) {
 		ret = shard0 + binshard;
 	}
 	assert(binind >= SC_NBINS - 1
-	    || (uintptr_t)ret < (uintptr_t)arena
-	    + arena_bin_offsets[binind + 1]);
+	    || (uintptr_t)ret
+	        < (uintptr_t)arena + arena_bin_offsets[binind + 1]);
 
 	return ret;
 }
diff --git a/include/jemalloc/internal/arena_stats.h b/include/jemalloc/internal/arena_stats.h
index 7f075114..01012f68 100644
--- a/include/jemalloc/internal/arena_stats.h
+++ b/include/jemalloc/internal/arena_stats.h
@@ -17,31 +17,31 @@ struct arena_stats_large_s {
 	 * Total number of large allocation/deallocation requests served directly
 	 * by the arena.
 	 */
-	locked_u64_t	nmalloc;
-	locked_u64_t	ndalloc;
+	locked_u64_t nmalloc;
+	locked_u64_t ndalloc;
 
 	/*
 	 * Total large active bytes (allocated - deallocated) served directly
 	 * by the arena.
 	 */
-	locked_u64_t	active_bytes;
+	locked_u64_t active_bytes;
 
 	/*
 	 * Number of allocation requests that correspond to this size class.
 	 * This includes requests served by tcache, though tcache only
 	 * periodically merges into this counter.
 	 */
-	locked_u64_t	nrequests; /* Partially derived. */
+	locked_u64_t nrequests; /* Partially derived. */
 	/*
 	 * Number of tcache fills / flushes for large (similarly, periodically
 	 * merged).  Note that there is no large tcache batch-fill currently
 	 * (i.e. only fill 1 at a time); however flush may be batched.
 	 */
-	locked_u64_t	nfills; /* Partially derived. */
-	locked_u64_t	nflushes; /* Partially derived. */
+	locked_u64_t nfills;   /* Partially derived. */
+	locked_u64_t nflushes; /* Partially derived. */
 
 	/* Current number of allocations of this size class. */
-	size_t		curlextents; /* Derived. */
+	size_t curlextents; /* Derived. */
 };
 
 /*
@@ -57,40 +57,40 @@ struct arena_stats_s {
 	 * resident includes the base stats -- that's why it lives here and not
 	 * in pa_shard_stats_t.
 	 */
-	size_t			base; /* Derived. */
-	size_t			metadata_edata; /* Derived. */
-	size_t			metadata_rtree; /* Derived. */
-	size_t			resident; /* Derived. */
-	size_t			metadata_thp; /* Derived. */
-	size_t			mapped; /* Derived. */
+	size_t base;           /* Derived. */
+	size_t metadata_edata; /* Derived. */
+	size_t metadata_rtree; /* Derived. */
+	size_t resident;       /* Derived. */
+	size_t metadata_thp;   /* Derived. */
+	size_t mapped;         /* Derived. */
 
-	atomic_zu_t		internal;
+	atomic_zu_t internal;
 
-	size_t			allocated_large; /* Derived. */
-	uint64_t		nmalloc_large; /* Derived. */
-	uint64_t		ndalloc_large; /* Derived. */
-	uint64_t		nfills_large; /* Derived. */
-	uint64_t		nflushes_large; /* Derived. */
-	uint64_t		nrequests_large; /* Derived. */
+	size_t   allocated_large; /* Derived. */
+	uint64_t nmalloc_large;   /* Derived. */
+	uint64_t ndalloc_large;   /* Derived. */
+	uint64_t nfills_large;    /* Derived. */
+	uint64_t nflushes_large;  /* Derived. */
+	uint64_t nrequests_large; /* Derived. */
 
 	/*
 	 * The stats logically owned by the pa_shard in the same arena.  This
 	 * lives here only because it's convenient for the purposes of the ctl
 	 * module -- it only knows about the single arena_stats.
 	 */
-	pa_shard_stats_t	pa_shard_stats;
+	pa_shard_stats_t pa_shard_stats;
 
 	/* Number of bytes cached in tcache associated with this arena. */
-	size_t			tcache_bytes; /* Derived. */
-	size_t			tcache_stashed_bytes; /* Derived. */
+	size_t tcache_bytes;         /* Derived. */
+	size_t tcache_stashed_bytes; /* Derived. */
 
 	mutex_prof_data_t mutex_prof_data[mutex_prof_num_arena_mutexes];
 
 	/* One element for each large size class. */
-	arena_stats_large_t	lstats[SC_NSIZES - SC_NBINS];
+	arena_stats_large_t lstats[SC_NSIZES - SC_NBINS];
 
 	/* Arena uptime. */
-	nstime_t		uptime;
+	nstime_t uptime;
 };
 
 static inline bool
@@ -101,7 +101,7 @@ arena_stats_init(tsdn_t *tsdn, arena_stats_t *arena_stats) {
 		}
 	}
 	if (LOCKEDINT_MTX_INIT(arena_stats->mtx, "arena_stats",
-	    WITNESS_RANK_ARENA_STATS, malloc_mutex_rank_exclusive)) {
+	        WITNESS_RANK_ARENA_STATS, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 	/* Memory is zeroed, so there is no need to clear stats. */
@@ -115,8 +115,8 @@ arena_stats_large_flush_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
 	arena_stats_large_t *lstats = &arena_stats->lstats[szind - SC_NBINS];
 	locked_inc_u64(tsdn, LOCKEDINT_MTX(arena_stats->mtx),
 	    &lstats->nrequests, nrequests);
-	locked_inc_u64(tsdn, LOCKEDINT_MTX(arena_stats->mtx),
-	    &lstats->nflushes, 1);
+	locked_inc_u64(
+	    tsdn, LOCKEDINT_MTX(arena_stats->mtx), &lstats->nflushes, 1);
 	LOCKEDINT_MTX_UNLOCK(tsdn, arena_stats->mtx);
 }
 
diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index 56e12f95..4778ca1b 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -32,20 +32,20 @@ struct arena_s {
 	 *
 	 * Synchronization: atomic.
 	 */
-	atomic_u_t		nthreads[2];
+	atomic_u_t nthreads[2];
 
 	/* Next bin shard for binding new threads. Synchronization: atomic. */
-	atomic_u_t		binshard_next;
+	atomic_u_t binshard_next;
 
 	/*
 	 * When percpu_arena is enabled, to amortize the cost of reading /
 	 * updating the current CPU id, track the most recent thread accessing
 	 * this arena, and only read CPU if there is a mismatch.
 	 */
-	tsdn_t		*last_thd;
+	tsdn_t *last_thd;
 
 	/* Synchronization: internal. */
-	arena_stats_t		stats;
+	arena_stats_t stats;
 
 	/*
 	 * Lists of tcaches and cache_bin_array_descriptors for extant threads
@@ -54,28 +54,28 @@ struct arena_s {
 	 *
 	 * Synchronization: tcache_ql_mtx.
 	 */
-	ql_head(tcache_slow_t)			tcache_ql;
-	ql_head(cache_bin_array_descriptor_t)	cache_bin_array_descriptor_ql;
-	malloc_mutex_t				tcache_ql_mtx;
+	ql_head(tcache_slow_t) tcache_ql;
+	ql_head(cache_bin_array_descriptor_t) cache_bin_array_descriptor_ql;
+	malloc_mutex_t tcache_ql_mtx;
 
 	/*
 	 * Represents a dss_prec_t, but atomically.
 	 *
 	 * Synchronization: atomic.
 	 */
-	atomic_u_t		dss_prec;
+	atomic_u_t dss_prec;
 
 	/*
 	 * Extant large allocations.
 	 *
 	 * Synchronization: large_mtx.
 	 */
-	edata_list_active_t	large;
+	edata_list_active_t large;
 	/* Synchronizes all large allocation/update/deallocation. */
-	malloc_mutex_t		large_mtx;
+	malloc_mutex_t large_mtx;
 
 	/* The page-level allocator shard this arena uses. */
-	pa_shard_t		pa_shard;
+	pa_shard_t pa_shard;
 
 	/*
 	 * A cached copy of base->ind.  This can get accessed on hot paths;
@@ -88,12 +88,12 @@ struct arena_s {
 	 *
 	 * Synchronization: internal.
 	 */
-	base_t			*base;
+	base_t *base;
 	/* Used to determine uptime.  Read-only after initialization. */
-	nstime_t		create_time;
+	nstime_t create_time;
 
 	/* The name of the arena. */
-	char 			name[ARENA_NAME_LEN];
+	char name[ARENA_NAME_LEN];
 
 	/*
 	 * The arena is allocated alongside its bins; really this is a
@@ -101,10 +101,11 @@ struct arena_s {
 	 * Enforcing cacheline-alignment to minimize the number of cachelines
 	 * touched on the hot paths.
 	 */
-	JEMALLOC_WARN_ON_USAGE("Do not use this field directly. "
-	                       "Use `arena_get_bin` instead.")
+	JEMALLOC_WARN_ON_USAGE(
+	    "Do not use this field directly. "
+	    "Use `arena_get_bin` instead.")
 	JEMALLOC_ALIGNED(CACHELINE)
-	bin_with_batch_t			all_bins[0];
+	bin_with_batch_t all_bins[0];
 };
 
 #endif /* JEMALLOC_INTERNAL_ARENA_STRUCTS_H */
diff --git a/include/jemalloc/internal/arena_types.h b/include/jemalloc/internal/arena_types.h
index a1fc8926..7ed2b968 100644
--- a/include/jemalloc/internal/arena_types.h
+++ b/include/jemalloc/internal/arena_types.h
@@ -5,38 +5,38 @@
 #include "jemalloc/internal/sc.h"
 
 /* Default decay times in milliseconds. */
-#define DIRTY_DECAY_MS_DEFAULT	ZD(10 * 1000)
-#define MUZZY_DECAY_MS_DEFAULT	(0)
+#define DIRTY_DECAY_MS_DEFAULT ZD(10 * 1000)
+#define MUZZY_DECAY_MS_DEFAULT (0)
 /* Number of event ticks between time checks. */
-#define ARENA_DECAY_NTICKS_PER_UPDATE	1000
+#define ARENA_DECAY_NTICKS_PER_UPDATE 1000
 /* Maximum length of the arena name. */
 #define ARENA_NAME_LEN 32
 
 typedef struct arena_decay_s arena_decay_t;
-typedef struct arena_s arena_t;
+typedef struct arena_s       arena_t;
 
 typedef enum {
-	percpu_arena_mode_names_base   = 0, /* Used for options processing. */
+	percpu_arena_mode_names_base = 0, /* Used for options processing. */
 
 	/*
 	 * *_uninit are used only during bootstrapping, and must correspond
 	 * to initialized variant plus percpu_arena_mode_enabled_base.
 	 */
-	percpu_arena_uninit            = 0,
-	per_phycpu_arena_uninit        = 1,
+	percpu_arena_uninit = 0,
+	per_phycpu_arena_uninit = 1,
 
 	/* All non-disabled modes must come after percpu_arena_disabled. */
-	percpu_arena_disabled          = 2,
+	percpu_arena_disabled = 2,
 
-	percpu_arena_mode_names_limit  = 3, /* Used for options processing. */
+	percpu_arena_mode_names_limit = 3, /* Used for options processing. */
 	percpu_arena_mode_enabled_base = 3,
 
-	percpu_arena                   = 3,
-	per_phycpu_arena               = 4  /* Hyper threads share arena. */
+	percpu_arena = 3,
+	per_phycpu_arena = 4 /* Hyper threads share arena. */
 } percpu_arena_mode_t;
 
-#define PERCPU_ARENA_ENABLED(m)	((m) >= percpu_arena_mode_enabled_base)
-#define PERCPU_ARENA_DEFAULT	percpu_arena_disabled
+#define PERCPU_ARENA_ENABLED(m) ((m) >= percpu_arena_mode_enabled_base)
+#define PERCPU_ARENA_DEFAULT percpu_arena_disabled
 
 /*
  * When allocation_size >= oversize_threshold, use the dedicated huge arena
diff --git a/include/jemalloc/internal/assert.h b/include/jemalloc/internal/assert.h
index 38eb2a2c..1b5da72f 100644
--- a/include/jemalloc/internal/assert.h
+++ b/include/jemalloc/internal/assert.h
@@ -7,51 +7,57 @@
  * assertion failure.
  */
 #ifndef assert
-#define assert(e) do {							\
-	if (unlikely(config_debug && !(e))) {				\
-		malloc_printf(						\
-		    "<jemalloc>: %s:%d: Failed assertion: \"%s\"\n",	\
-		    __FILE__, __LINE__, #e);				\
-		abort();						\
-	}								\
-} while (0)
+#	define assert(e)                                                            \
+		do {                                                                 \
+			if (unlikely(config_debug && !(e))) {                        \
+				malloc_printf(                                       \
+				    "<jemalloc>: %s:%d: Failed assertion: \"%s\"\n", \
+				    __FILE__, __LINE__, #e);                         \
+				abort();                                             \
+			}                                                            \
+		} while (0)
 #endif
 
 #ifndef not_reached
-#define not_reached() do {						\
-	if (config_debug) {						\
-		malloc_printf(						\
-		    "<jemalloc>: %s:%d: Unreachable code reached\n",	\
-		    __FILE__, __LINE__);				\
-		abort();						\
-	}								\
-	unreachable();							\
-} while (0)
+#	define not_reached()                                                        \
+		do {                                                                 \
+			if (config_debug) {                                          \
+				malloc_printf(                                       \
+				    "<jemalloc>: %s:%d: Unreachable code reached\n", \
+				    __FILE__, __LINE__);                             \
+				abort();                                             \
+			}                                                            \
+			unreachable();                                               \
+		} while (0)
 #endif
 
 #ifndef not_implemented
-#define not_implemented() do {						\
-	if (config_debug) {						\
-		malloc_printf("<jemalloc>: %s:%d: Not implemented\n",	\
-		    __FILE__, __LINE__);				\
-		abort();						\
-	}								\
-} while (0)
+#	define not_implemented()                                              \
+		do {                                                           \
+			if (config_debug) {                                    \
+				malloc_printf(                                 \
+				    "<jemalloc>: %s:%d: Not implemented\n",    \
+				    __FILE__, __LINE__);                       \
+				abort();                                       \
+			}                                                      \
+		} while (0)
 #endif
 
 #ifndef assert_not_implemented
-#define assert_not_implemented(e) do {					\
-	if (unlikely(config_debug && !(e))) {				\
-		not_implemented();					\
-	}								\
-} while (0)
+#	define assert_not_implemented(e)                                      \
+		do {                                                           \
+			if (unlikely(config_debug && !(e))) {                  \
+				not_implemented();                             \
+			}                                                      \
+		} while (0)
 #endif
 
 /* Use to assert a particular configuration, e.g., cassert(config_debug). */
 #ifndef cassert
-#define cassert(c) do {							\
-	if (unlikely(!(c))) {						\
-		not_reached();						\
-	}								\
-} while (0)
+#	define cassert(c)                                                     \
+		do {                                                           \
+			if (unlikely(!(c))) {                                  \
+				not_reached();                                 \
+			}                                                      \
+		} while (0)
 #endif
diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h
index 6dd2a7c6..ddd9341e 100644
--- a/include/jemalloc/internal/atomic.h
+++ b/include/jemalloc/internal/atomic.h
@@ -5,21 +5,21 @@
 
 #define JEMALLOC_U8_ATOMICS
 #if defined(JEMALLOC_GCC_ATOMIC_ATOMICS)
-#  include "jemalloc/internal/atomic_gcc_atomic.h"
-#  if !defined(JEMALLOC_GCC_U8_ATOMIC_ATOMICS)
-#    undef JEMALLOC_U8_ATOMICS
-#  endif
+#	include "jemalloc/internal/atomic_gcc_atomic.h"
+#	if !defined(JEMALLOC_GCC_U8_ATOMIC_ATOMICS)
+#		undef JEMALLOC_U8_ATOMICS
+#	endif
 #elif defined(JEMALLOC_GCC_SYNC_ATOMICS)
-#  include "jemalloc/internal/atomic_gcc_sync.h"
-#  if !defined(JEMALLOC_GCC_U8_SYNC_ATOMICS)
-#    undef JEMALLOC_U8_ATOMICS
-#  endif
+#	include "jemalloc/internal/atomic_gcc_sync.h"
+#	if !defined(JEMALLOC_GCC_U8_SYNC_ATOMICS)
+#		undef JEMALLOC_U8_ATOMICS
+#	endif
 #elif defined(_MSC_VER)
-#  include "jemalloc/internal/atomic_msvc.h"
+#	include "jemalloc/internal/atomic_msvc.h"
 #elif defined(JEMALLOC_C11_ATOMICS)
-#  include "jemalloc/internal/atomic_c11.h"
+#	include "jemalloc/internal/atomic_c11.h"
 #else
-#  error "Don't have atomics implemented on this platform."
+#	error "Don't have atomics implemented on this platform."
 #endif
 
 #define ATOMIC_INLINE JEMALLOC_ALWAYS_INLINE
@@ -56,22 +56,19 @@
 /*
  * Another convenience -- simple atomic helper functions.
  */
-#define JEMALLOC_GENERATE_EXPANDED_INT_ATOMICS(type, short_type,	\
-    lg_size)								\
-    JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, lg_size)		\
-    ATOMIC_INLINE void							\
-    atomic_load_add_store_##short_type(atomic_##short_type##_t *a,	\
-	type inc) {							\
-	    type oldval = atomic_load_##short_type(a, ATOMIC_RELAXED);	\
-	    type newval = oldval + inc;					\
-	    atomic_store_##short_type(a, newval, ATOMIC_RELAXED);	\
-	}								\
-    ATOMIC_INLINE void							\
-    atomic_load_sub_store_##short_type(atomic_##short_type##_t *a,	\
-	type inc) {							\
-	    type oldval = atomic_load_##short_type(a, ATOMIC_RELAXED);	\
-	    type newval = oldval - inc;					\
-	    atomic_store_##short_type(a, newval, ATOMIC_RELAXED);	\
+#define JEMALLOC_GENERATE_EXPANDED_INT_ATOMICS(type, short_type, lg_size)      \
+	JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, lg_size)               \
+	ATOMIC_INLINE void atomic_load_add_store_##short_type(                 \
+	    atomic_##short_type##_t *a, type inc) {                            \
+		type oldval = atomic_load_##short_type(a, ATOMIC_RELAXED);     \
+		type newval = oldval + inc;                                    \
+		atomic_store_##short_type(a, newval, ATOMIC_RELAXED);          \
+	}                                                                      \
+	ATOMIC_INLINE void atomic_load_sub_store_##short_type(                 \
+	    atomic_##short_type##_t *a, type inc) {                            \
+		type oldval = atomic_load_##short_type(a, ATOMIC_RELAXED);     \
+		type newval = oldval - inc;                                    \
+		atomic_store_##short_type(a, newval, ATOMIC_RELAXED);          \
 	}
 
 /*
@@ -79,7 +76,7 @@
  * fact.
  */
 #if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
-#  define JEMALLOC_ATOMIC_U64
+#	define JEMALLOC_ATOMIC_U64
 #endif
 
 JEMALLOC_GENERATE_ATOMICS(void *, p, LG_SIZEOF_PTR)
diff --git a/include/jemalloc/internal/atomic_c11.h b/include/jemalloc/internal/atomic_c11.h
index a37e9661..1e86e2a0 100644
--- a/include/jemalloc/internal/atomic_c11.h
+++ b/include/jemalloc/internal/atomic_c11.h
@@ -66,35 +66,29 @@ atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a,	\
  * Integral types have some special operations available that non-integral ones
  * lack.
  */
-#define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, 		\
-    /* unused */ lg_size)						\
-JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size)	\
-									\
-ATOMIC_INLINE type							\
-atomic_fetch_add_##short_type(atomic_##short_type##_t *a,		\
-    type val, atomic_memory_order_t mo) {				\
-	return atomic_fetch_add_explicit(a, val, mo);			\
-}									\
-									\
-ATOMIC_INLINE type							\
-atomic_fetch_sub_##short_type(atomic_##short_type##_t *a,		\
-    type val, atomic_memory_order_t mo) {				\
-	return atomic_fetch_sub_explicit(a, val, mo);			\
-}									\
-ATOMIC_INLINE type							\
-atomic_fetch_and_##short_type(atomic_##short_type##_t *a,		\
-    type val, atomic_memory_order_t mo) {				\
-	return atomic_fetch_and_explicit(a, val, mo);			\
-}									\
-ATOMIC_INLINE type							\
-atomic_fetch_or_##short_type(atomic_##short_type##_t *a,		\
-    type val, atomic_memory_order_t mo) {				\
-	return atomic_fetch_or_explicit(a, val, mo);			\
-}									\
-ATOMIC_INLINE type							\
-atomic_fetch_xor_##short_type(atomic_##short_type##_t *a,		\
-    type val, atomic_memory_order_t mo) {				\
-	return atomic_fetch_xor_explicit(a, val, mo);			\
-}
+#define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, /* unused */ lg_size)  \
+	JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size)      \
+                                                                               \
+	ATOMIC_INLINE type atomic_fetch_add_##short_type(                      \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		return atomic_fetch_add_explicit(a, val, mo);                  \
+	}                                                                      \
+                                                                               \
+	ATOMIC_INLINE type atomic_fetch_sub_##short_type(                      \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		return atomic_fetch_sub_explicit(a, val, mo);                  \
+	}                                                                      \
+	ATOMIC_INLINE type atomic_fetch_and_##short_type(                      \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		return atomic_fetch_and_explicit(a, val, mo);                  \
+	}                                                                      \
+	ATOMIC_INLINE type atomic_fetch_or_##short_type(                       \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		return atomic_fetch_or_explicit(a, val, mo);                   \
+	}                                                                      \
+	ATOMIC_INLINE type atomic_fetch_xor_##short_type(                      \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		return atomic_fetch_xor_explicit(a, val, mo);                  \
+	}
 
 #endif /* JEMALLOC_INTERNAL_ATOMIC_C11_H */
diff --git a/include/jemalloc/internal/atomic_gcc_atomic.h b/include/jemalloc/internal/atomic_gcc_atomic.h
index 0819fde1..a828a6b0 100644
--- a/include/jemalloc/internal/atomic_gcc_atomic.h
+++ b/include/jemalloc/internal/atomic_gcc_atomic.h
@@ -6,7 +6,8 @@
 
 #define ATOMIC_INLINE JEMALLOC_ALWAYS_INLINE
 
-#define ATOMIC_INIT(...) {__VA_ARGS__}
+#define ATOMIC_INIT(...)                                                       \
+	{ __VA_ARGS__ }
 
 typedef enum {
 	atomic_memory_order_relaxed,
@@ -39,95 +40,81 @@ atomic_fence(atomic_memory_order_t mo) {
 	__atomic_thread_fence(atomic_enum_to_builtin(mo));
 }
 
-#define JEMALLOC_GENERATE_ATOMICS(type, short_type,			\
-    /* unused */ lg_size)						\
-typedef struct {							\
-	type repr;							\
-} atomic_##short_type##_t;						\
-									\
-ATOMIC_INLINE type							\
-atomic_load_##short_type(const atomic_##short_type##_t *a,		\
-    atomic_memory_order_t mo) {						\
-	type result;							\
-	__atomic_load(&a->repr, &result, atomic_enum_to_builtin(mo));	\
-	return result;							\
-}									\
-									\
-ATOMIC_INLINE void							\
-atomic_store_##short_type(atomic_##short_type##_t *a, type val,		\
-    atomic_memory_order_t mo) {						\
-	__atomic_store(&a->repr, &val, atomic_enum_to_builtin(mo));	\
-}									\
-									\
-ATOMIC_INLINE type							\
-atomic_exchange_##short_type(atomic_##short_type##_t *a, type val,	\
-    atomic_memory_order_t mo) {						\
-	type result;							\
-	__atomic_exchange(&a->repr, &val, &result,			\
-	    atomic_enum_to_builtin(mo));				\
-	return result;							\
-}									\
-									\
-ATOMIC_INLINE bool							\
-atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a,	\
-    UNUSED type *expected, type desired,				\
-    atomic_memory_order_t success_mo,					\
-    atomic_memory_order_t failure_mo) {					\
-	return __atomic_compare_exchange(&a->repr, expected, &desired,	\
-	    true, atomic_enum_to_builtin(success_mo),			\
-	    atomic_enum_to_builtin(failure_mo));			\
-}									\
-									\
-ATOMIC_INLINE bool							\
-atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a,	\
-    UNUSED type *expected, type desired,				\
-    atomic_memory_order_t success_mo,					\
-    atomic_memory_order_t failure_mo) {					\
-	return __atomic_compare_exchange(&a->repr, expected, &desired,	\
-	    false,							\
-	    atomic_enum_to_builtin(success_mo),				\
-	    atomic_enum_to_builtin(failure_mo));			\
-}
+#define JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size)      \
+	typedef struct {                                                       \
+		type repr;                                                     \
+	} atomic_##short_type##_t;                                             \
+                                                                               \
+	ATOMIC_INLINE type atomic_load_##short_type(                           \
+	    const atomic_##short_type##_t *a, atomic_memory_order_t mo) {      \
+		type result;                                                   \
+		__atomic_load(&a->repr, &result, atomic_enum_to_builtin(mo));  \
+		return result;                                                 \
+	}                                                                      \
+                                                                               \
+	ATOMIC_INLINE void atomic_store_##short_type(                          \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		__atomic_store(&a->repr, &val, atomic_enum_to_builtin(mo));    \
+	}                                                                      \
+                                                                               \
+	ATOMIC_INLINE type atomic_exchange_##short_type(                       \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		type result;                                                   \
+		__atomic_exchange(                                             \
+		    &a->repr, &val, &result, atomic_enum_to_builtin(mo));      \
+		return result;                                                 \
+	}                                                                      \
+                                                                               \
+	ATOMIC_INLINE bool atomic_compare_exchange_weak_##short_type(          \
+	    atomic_##short_type##_t *a, UNUSED type *expected, type desired,   \
+	    atomic_memory_order_t success_mo,                                  \
+	    atomic_memory_order_t failure_mo) {                                \
+		return __atomic_compare_exchange(&a->repr, expected, &desired, \
+		    true, atomic_enum_to_builtin(success_mo),                  \
+		    atomic_enum_to_builtin(failure_mo));                       \
+	}                                                                      \
+                                                                               \
+	ATOMIC_INLINE bool atomic_compare_exchange_strong_##short_type(        \
+	    atomic_##short_type##_t *a, UNUSED type *expected, type desired,   \
+	    atomic_memory_order_t success_mo,                                  \
+	    atomic_memory_order_t failure_mo) {                                \
+		return __atomic_compare_exchange(&a->repr, expected, &desired, \
+		    false, atomic_enum_to_builtin(success_mo),                 \
+		    atomic_enum_to_builtin(failure_mo));                       \
+	}
 
-
-#define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type,			\
-    /* unused */ lg_size)						\
-JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size)	\
-									\
-ATOMIC_INLINE type							\
-atomic_fetch_add_##short_type(atomic_##short_type##_t *a, type val,	\
-    atomic_memory_order_t mo) {						\
-	return __atomic_fetch_add(&a->repr, val,			\
-	    atomic_enum_to_builtin(mo));				\
-}									\
-									\
-ATOMIC_INLINE type							\
-atomic_fetch_sub_##short_type(atomic_##short_type##_t *a, type val,	\
-    atomic_memory_order_t mo) {						\
-	return __atomic_fetch_sub(&a->repr, val,			\
-	    atomic_enum_to_builtin(mo));				\
-}									\
-									\
-ATOMIC_INLINE type							\
-atomic_fetch_and_##short_type(atomic_##short_type##_t *a, type val,	\
-    atomic_memory_order_t mo) {						\
-	return __atomic_fetch_and(&a->repr, val,			\
-	    atomic_enum_to_builtin(mo));				\
-}									\
-									\
-ATOMIC_INLINE type							\
-atomic_fetch_or_##short_type(atomic_##short_type##_t *a, type val,	\
-    atomic_memory_order_t mo) {						\
-	return __atomic_fetch_or(&a->repr, val,				\
-	    atomic_enum_to_builtin(mo));				\
-}									\
-									\
-ATOMIC_INLINE type							\
-atomic_fetch_xor_##short_type(atomic_##short_type##_t *a, type val,	\
-    atomic_memory_order_t mo) {						\
-	return __atomic_fetch_xor(&a->repr, val,			\
-	    atomic_enum_to_builtin(mo));				\
-}
+#define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, /* unused */ lg_size)  \
+	JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size)      \
+                                                                               \
+	ATOMIC_INLINE type atomic_fetch_add_##short_type(                      \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		return __atomic_fetch_add(                                     \
+		    &a->repr, val, atomic_enum_to_builtin(mo));                \
+	}                                                                      \
+                                                                               \
+	ATOMIC_INLINE type atomic_fetch_sub_##short_type(                      \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		return __atomic_fetch_sub(                                     \
+		    &a->repr, val, atomic_enum_to_builtin(mo));                \
+	}                                                                      \
+                                                                               \
+	ATOMIC_INLINE type atomic_fetch_and_##short_type(                      \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		return __atomic_fetch_and(                                     \
+		    &a->repr, val, atomic_enum_to_builtin(mo));                \
+	}                                                                      \
+                                                                               \
+	ATOMIC_INLINE type atomic_fetch_or_##short_type(                       \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		return __atomic_fetch_or(                                      \
+		    &a->repr, val, atomic_enum_to_builtin(mo));                \
+	}                                                                      \
+                                                                               \
+	ATOMIC_INLINE type atomic_fetch_xor_##short_type(                      \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		return __atomic_fetch_xor(                                     \
+		    &a->repr, val, atomic_enum_to_builtin(mo));                \
+	}
 
 #undef ATOMIC_INLINE
 
diff --git a/include/jemalloc/internal/atomic_gcc_sync.h b/include/jemalloc/internal/atomic_gcc_sync.h
index 801d6197..9e2ff9c8 100644
--- a/include/jemalloc/internal/atomic_gcc_sync.h
+++ b/include/jemalloc/internal/atomic_gcc_sync.h
@@ -5,7 +5,8 @@
 
 #define ATOMIC_INLINE JEMALLOC_ALWAYS_INLINE
 
-#define ATOMIC_INIT(...) {__VA_ARGS__}
+#define ATOMIC_INIT(...)                                                       \
+	{ __VA_ARGS__ }
 
 typedef enum {
 	atomic_memory_order_relaxed,
@@ -29,13 +30,13 @@ atomic_fence(atomic_memory_order_t mo) {
 		return;
 	}
 	asm volatile("" ::: "memory");
-#  if defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__x86_64__)
 	/* This is implicit on x86. */
-#  elif defined(__ppc64__)
+#elif defined(__ppc64__)
 	asm volatile("lwsync");
-#  elif defined(__ppc__)
+#elif defined(__ppc__)
 	asm volatile("sync");
-#  elif defined(__sparc__) && defined(__arch64__)
+#elif defined(__sparc__) && defined(__arch64__)
 	if (mo == atomic_memory_order_acquire) {
 		asm volatile("membar #LoadLoad | #LoadStore");
 	} else if (mo == atomic_memory_order_release) {
@@ -43,9 +44,9 @@ atomic_fence(atomic_memory_order_t mo) {
 	} else {
 		asm volatile("membar #LoadLoad | #LoadStore | #StoreStore");
 	}
-#  else
+#else
 	__sync_synchronize();
-#  endif
+#endif
 	asm volatile("" ::: "memory");
 }
 
@@ -68,23 +69,22 @@ atomic_fence(atomic_memory_order_t mo) {
 
 ATOMIC_INLINE void
 atomic_pre_sc_load_fence() {
-#  if defined(__i386__) || defined(__x86_64__) ||			\
-    (defined(__sparc__) && defined(__arch64__))
+#if defined(__i386__) || defined(__x86_64__)                                   \
+    || (defined(__sparc__) && defined(__arch64__))
 	atomic_fence(atomic_memory_order_relaxed);
-#  else
+#else
 	atomic_fence(atomic_memory_order_seq_cst);
-#  endif
+#endif
 }
 
 ATOMIC_INLINE void
 atomic_post_sc_store_fence() {
-#  if defined(__i386__) || defined(__x86_64__) ||			\
-    (defined(__sparc__) && defined(__arch64__))
+#if defined(__i386__) || defined(__x86_64__)                                   \
+    || (defined(__sparc__) && defined(__arch64__))
 	atomic_fence(atomic_memory_order_seq_cst);
-#  else
+#else
 	atomic_fence(atomic_memory_order_relaxed);
-#  endif
-
+#endif
 }
 
 /* clang-format off */
@@ -164,39 +164,33 @@ atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a,	\
 }
 /* clang-format on */
 
-#define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type,			\
-    /* unused */ lg_size)						\
-JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size)	\
-									\
-ATOMIC_INLINE type							\
-atomic_fetch_add_##short_type(atomic_##short_type##_t *a, type val,	\
-    atomic_memory_order_t mo) {						\
-	return __sync_fetch_and_add(&a->repr, val);			\
-}									\
-									\
-ATOMIC_INLINE type							\
-atomic_fetch_sub_##short_type(atomic_##short_type##_t *a, type val,	\
-    atomic_memory_order_t mo) {						\
-	return __sync_fetch_and_sub(&a->repr, val);			\
-}									\
-									\
-ATOMIC_INLINE type							\
-atomic_fetch_and_##short_type(atomic_##short_type##_t *a, type val,	\
-    atomic_memory_order_t mo) {						\
-	return __sync_fetch_and_and(&a->repr, val);			\
-}									\
-									\
-ATOMIC_INLINE type							\
-atomic_fetch_or_##short_type(atomic_##short_type##_t *a, type val,	\
-    atomic_memory_order_t mo) {						\
-	return __sync_fetch_and_or(&a->repr, val);			\
-}									\
-									\
-ATOMIC_INLINE type							\
-atomic_fetch_xor_##short_type(atomic_##short_type##_t *a, type val,	\
-    atomic_memory_order_t mo) {						\
-	return __sync_fetch_and_xor(&a->repr, val);			\
-}
+#define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, /* unused */ lg_size)  \
+	JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size)      \
+                                                                               \
+	ATOMIC_INLINE type atomic_fetch_add_##short_type(                      \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		return __sync_fetch_and_add(&a->repr, val);                    \
+	}                                                                      \
+                                                                               \
+	ATOMIC_INLINE type atomic_fetch_sub_##short_type(                      \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		return __sync_fetch_and_sub(&a->repr, val);                    \
+	}                                                                      \
+                                                                               \
+	ATOMIC_INLINE type atomic_fetch_and_##short_type(                      \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		return __sync_fetch_and_and(&a->repr, val);                    \
+	}                                                                      \
+                                                                               \
+	ATOMIC_INLINE type atomic_fetch_or_##short_type(                       \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		return __sync_fetch_and_or(&a->repr, val);                     \
+	}                                                                      \
+                                                                               \
+	ATOMIC_INLINE type atomic_fetch_xor_##short_type(                      \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		return __sync_fetch_and_xor(&a->repr, val);                    \
+	}
 
 #undef ATOMIC_INLINE
 
diff --git a/include/jemalloc/internal/atomic_msvc.h b/include/jemalloc/internal/atomic_msvc.h
index 5313aed9..7accca63 100644
--- a/include/jemalloc/internal/atomic_msvc.h
+++ b/include/jemalloc/internal/atomic_msvc.h
@@ -5,7 +5,8 @@
 
 #define ATOMIC_INLINE JEMALLOC_ALWAYS_INLINE
 
-#define ATOMIC_INIT(...) {__VA_ARGS__}
+#define ATOMIC_INIT(...)                                                       \
+	{ __VA_ARGS__ }
 
 typedef enum {
 	atomic_memory_order_relaxed,
@@ -15,108 +16,104 @@ typedef enum {
 	atomic_memory_order_seq_cst
 } atomic_memory_order_t;
 
-typedef char atomic_repr_0_t;
-typedef short atomic_repr_1_t;
-typedef long atomic_repr_2_t;
+typedef char    atomic_repr_0_t;
+typedef short   atomic_repr_1_t;
+typedef long    atomic_repr_2_t;
 typedef __int64 atomic_repr_3_t;
 
 ATOMIC_INLINE void
 atomic_fence(atomic_memory_order_t mo) {
 	_ReadWriteBarrier();
-#  if defined(_M_ARM) || defined(_M_ARM64)
+#if defined(_M_ARM) || defined(_M_ARM64)
 	/* ARM needs a barrier for everything but relaxed. */
 	if (mo != atomic_memory_order_relaxed) {
 		MemoryBarrier();
 	}
-#  elif defined(_M_IX86) || defined (_M_X64)
+#elif defined(_M_IX86) || defined(_M_X64)
 	/* x86 needs a barrier only for seq_cst. */
 	if (mo == atomic_memory_order_seq_cst) {
 		MemoryBarrier();
 	}
-#  else
-#  error "Don't know how to create atomics for this platform for MSVC."
-#  endif
+#else
+#	error "Don't know how to create atomics for this platform for MSVC."
+#endif
 	_ReadWriteBarrier();
 }
 
-#define ATOMIC_INTERLOCKED_REPR(lg_size) atomic_repr_ ## lg_size ## _t
+#define ATOMIC_INTERLOCKED_REPR(lg_size) atomic_repr_##lg_size##_t
 
 #define ATOMIC_CONCAT(a, b) ATOMIC_RAW_CONCAT(a, b)
-#define ATOMIC_RAW_CONCAT(a, b) a ## b
+#define ATOMIC_RAW_CONCAT(a, b) a##b
 
-#define ATOMIC_INTERLOCKED_NAME(base_name, lg_size) ATOMIC_CONCAT(	\
-    base_name, ATOMIC_INTERLOCKED_SUFFIX(lg_size))
+#define ATOMIC_INTERLOCKED_NAME(base_name, lg_size)                            \
+	ATOMIC_CONCAT(base_name, ATOMIC_INTERLOCKED_SUFFIX(lg_size))
 
-#define ATOMIC_INTERLOCKED_SUFFIX(lg_size)				\
-    ATOMIC_CONCAT(ATOMIC_INTERLOCKED_SUFFIX_, lg_size)
+#define ATOMIC_INTERLOCKED_SUFFIX(lg_size)                                     \
+	ATOMIC_CONCAT(ATOMIC_INTERLOCKED_SUFFIX_, lg_size)
 
 #define ATOMIC_INTERLOCKED_SUFFIX_0 8
 #define ATOMIC_INTERLOCKED_SUFFIX_1 16
 #define ATOMIC_INTERLOCKED_SUFFIX_2
 #define ATOMIC_INTERLOCKED_SUFFIX_3 64
 
-#define JEMALLOC_GENERATE_ATOMICS(type, short_type, lg_size)		\
-typedef struct {							\
-	ATOMIC_INTERLOCKED_REPR(lg_size) repr;				\
-} atomic_##short_type##_t;						\
-									\
-ATOMIC_INLINE type							\
-atomic_load_##short_type(const atomic_##short_type##_t *a,		\
-    atomic_memory_order_t mo) {						\
-	ATOMIC_INTERLOCKED_REPR(lg_size) ret = a->repr;			\
-	if (mo != atomic_memory_order_relaxed) {			\
-		atomic_fence(atomic_memory_order_acquire);		\
-	}								\
-	return (type) ret;						\
-}									\
-									\
-ATOMIC_INLINE void							\
-atomic_store_##short_type(atomic_##short_type##_t *a,			\
-    type val, atomic_memory_order_t mo) {				\
-	if (mo != atomic_memory_order_relaxed) {			\
-		atomic_fence(atomic_memory_order_release);		\
-	}								\
-	a->repr = (ATOMIC_INTERLOCKED_REPR(lg_size)) val;		\
-	if (mo == atomic_memory_order_seq_cst) {			\
-		atomic_fence(atomic_memory_order_seq_cst);		\
-	}								\
-}									\
-									\
-ATOMIC_INLINE type							\
-atomic_exchange_##short_type(atomic_##short_type##_t *a, type val,	\
-    atomic_memory_order_t mo) {						\
-	return (type)ATOMIC_INTERLOCKED_NAME(_InterlockedExchange,	\
-	    lg_size)(&a->repr, (ATOMIC_INTERLOCKED_REPR(lg_size))val);	\
-}									\
-									\
-ATOMIC_INLINE bool							\
-atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a,	\
-    type *expected, type desired, atomic_memory_order_t success_mo,	\
-    atomic_memory_order_t failure_mo) {					\
-	ATOMIC_INTERLOCKED_REPR(lg_size) e =				\
-	    (ATOMIC_INTERLOCKED_REPR(lg_size))*expected;		\
-	ATOMIC_INTERLOCKED_REPR(lg_size) d =				\
-	    (ATOMIC_INTERLOCKED_REPR(lg_size))desired;			\
-	ATOMIC_INTERLOCKED_REPR(lg_size) old =				\
-	    ATOMIC_INTERLOCKED_NAME(_InterlockedCompareExchange, 	\
-		lg_size)(&a->repr, d, e);				\
-	if (old == e) {							\
-		return true;						\
-	} else {							\
-		*expected = (type)old;					\
-		return false;						\
-	}								\
-}									\
-									\
-ATOMIC_INLINE bool							\
-atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a,	\
-    type *expected, type desired, atomic_memory_order_t success_mo,	\
-    atomic_memory_order_t failure_mo) {					\
-	/* We implement the weak version with strong semantics. */	\
-	return atomic_compare_exchange_weak_##short_type(a, expected,	\
-	    desired, success_mo, failure_mo);				\
-}
-
+#define JEMALLOC_GENERATE_ATOMICS(type, short_type, lg_size)                   \
+	typedef struct {                                                       \
+		ATOMIC_INTERLOCKED_REPR(lg_size) repr;                         \
+	} atomic_##short_type##_t;                                             \
+                                                                               \
+	ATOMIC_INLINE type atomic_load_##short_type(                           \
+	    const atomic_##short_type##_t *a, atomic_memory_order_t mo) {      \
+		ATOMIC_INTERLOCKED_REPR(lg_size) ret = a->repr;                \
+		if (mo != atomic_memory_order_relaxed) {                       \
+			atomic_fence(atomic_memory_order_acquire);             \
+		}                                                              \
+		return (type)ret;                                              \
+	}                                                                      \
+                                                                               \
+	ATOMIC_INLINE void atomic_store_##short_type(                          \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		if (mo != atomic_memory_order_relaxed) {                       \
+			atomic_fence(atomic_memory_order_release);             \
+		}                                                              \
+		a->repr = (ATOMIC_INTERLOCKED_REPR(lg_size))val;               \
+		if (mo == atomic_memory_order_seq_cst) {                       \
+			atomic_fence(atomic_memory_order_seq_cst);             \
+		}                                                              \
+	}                                                                      \
+                                                                               \
+	ATOMIC_INLINE type atomic_exchange_##short_type(                       \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		return (type)ATOMIC_INTERLOCKED_NAME(_InterlockedExchange,     \
+		    lg_size)(&a->repr, (ATOMIC_INTERLOCKED_REPR(lg_size))val); \
+	}                                                                      \
+                                                                               \
+	ATOMIC_INLINE bool atomic_compare_exchange_weak_##short_type(          \
+	    atomic_##short_type##_t *a, type *expected, type desired,          \
+	    atomic_memory_order_t success_mo,                                  \
+	    atomic_memory_order_t failure_mo) {                                \
+		ATOMIC_INTERLOCKED_REPR(lg_size)                               \
+		e = (ATOMIC_INTERLOCKED_REPR(lg_size)) * expected;             \
+		ATOMIC_INTERLOCKED_REPR(lg_size)                               \
+		d = (ATOMIC_INTERLOCKED_REPR(lg_size))desired;                 \
+		ATOMIC_INTERLOCKED_REPR(lg_size)                               \
+		old = ATOMIC_INTERLOCKED_NAME(                                 \
+		    _InterlockedCompareExchange, lg_size)(&a->repr, d, e);     \
+		if (old == e) {                                                \
+			return true;                                           \
+		} else {                                                       \
+			*expected = (type)old;                                 \
+			return false;                                          \
+		}                                                              \
+	}                                                                      \
+                                                                               \
+	ATOMIC_INLINE bool atomic_compare_exchange_strong_##short_type(        \
+	    atomic_##short_type##_t *a, type *expected, type desired,          \
+	    atomic_memory_order_t success_mo,                                  \
+	    atomic_memory_order_t failure_mo) {                                \
+		/* We implement the weak version with strong semantics. */     \
+		return atomic_compare_exchange_weak_##short_type(              \
+		    a, expected, desired, success_mo, failure_mo);             \
+	}
 
 /* clang-format off */
 #define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, lg_size)	\
diff --git a/include/jemalloc/internal/background_thread_externs.h b/include/jemalloc/internal/background_thread_externs.h
index 0d34ee55..efc0aaa4 100644
--- a/include/jemalloc/internal/background_thread_externs.h
+++ b/include/jemalloc/internal/background_thread_externs.h
@@ -6,26 +6,26 @@
 #include "jemalloc/internal/base.h"
 #include "jemalloc/internal/mutex.h"
 
-extern bool opt_background_thread;
-extern size_t opt_max_background_threads;
-extern malloc_mutex_t background_thread_lock;
-extern atomic_b_t background_thread_enabled_state;
-extern size_t n_background_threads;
-extern size_t max_background_threads;
+extern bool                      opt_background_thread;
+extern size_t                    opt_max_background_threads;
+extern malloc_mutex_t            background_thread_lock;
+extern atomic_b_t                background_thread_enabled_state;
+extern size_t                    n_background_threads;
+extern size_t                    max_background_threads;
 extern background_thread_info_t *background_thread_info;
 
 bool background_thread_create(tsd_t *tsd, unsigned arena_ind);
 bool background_threads_enable(tsd_t *tsd);
 bool background_threads_disable(tsd_t *tsd);
-bool background_thread_is_started(background_thread_info_t* info);
-void background_thread_wakeup_early(background_thread_info_t *info,
-    nstime_t *remaining_sleep);
+bool background_thread_is_started(background_thread_info_t *info);
+void background_thread_wakeup_early(
+    background_thread_info_t *info, nstime_t *remaining_sleep);
 void background_thread_prefork0(tsdn_t *tsdn);
 void background_thread_prefork1(tsdn_t *tsdn);
 void background_thread_postfork_parent(tsdn_t *tsdn);
 void background_thread_postfork_child(tsdn_t *tsdn);
-bool background_thread_stats_read(tsdn_t *tsdn,
-    background_thread_stats_t *stats);
+bool background_thread_stats_read(
+    tsdn_t *tsdn, background_thread_stats_t *stats);
 void background_thread_ctl_init(tsdn_t *tsdn);
 
 #ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
diff --git a/include/jemalloc/internal/background_thread_inlines.h b/include/jemalloc/internal/background_thread_inlines.h
index fd3884f1..e822a3f7 100644
--- a/include/jemalloc/internal/background_thread_inlines.h
+++ b/include/jemalloc/internal/background_thread_inlines.h
@@ -36,14 +36,14 @@ background_thread_info_get(size_t ind) {
 JEMALLOC_ALWAYS_INLINE uint64_t
 background_thread_wakeup_time_get(background_thread_info_t *info) {
 	uint64_t next_wakeup = nstime_ns(&info->next_wakeup);
-	assert(atomic_load_b(&info->indefinite_sleep, ATOMIC_ACQUIRE) ==
-	    (next_wakeup == BACKGROUND_THREAD_INDEFINITE_SLEEP));
+	assert(atomic_load_b(&info->indefinite_sleep, ATOMIC_ACQUIRE)
+	    == (next_wakeup == BACKGROUND_THREAD_INDEFINITE_SLEEP));
 	return next_wakeup;
 }
 
 JEMALLOC_ALWAYS_INLINE void
-background_thread_wakeup_time_set(tsdn_t *tsdn, background_thread_info_t *info,
-    uint64_t wakeup_time) {
+background_thread_wakeup_time_set(
+    tsdn_t *tsdn, background_thread_info_t *info, uint64_t wakeup_time) {
 	malloc_mutex_assert_owner(tsdn, &info->mtx);
 	atomic_store_b(&info->indefinite_sleep,
 	    wakeup_time == BACKGROUND_THREAD_INDEFINITE_SLEEP, ATOMIC_RELEASE);
diff --git a/include/jemalloc/internal/background_thread_structs.h b/include/jemalloc/internal/background_thread_structs.h
index 67b68797..d56673da 100644
--- a/include/jemalloc/internal/background_thread_structs.h
+++ b/include/jemalloc/internal/background_thread_structs.h
@@ -7,7 +7,7 @@
 /* This file really combines "structs" and "types", but only transitionally. */
 
 #if defined(JEMALLOC_BACKGROUND_THREAD) || defined(JEMALLOC_LAZY_LOCK)
-#  define JEMALLOC_PTHREAD_CREATE_WRAPPER
+#	define JEMALLOC_PTHREAD_CREATE_WRAPPER
 #endif
 
 #define BACKGROUND_THREAD_INDEFINITE_SLEEP UINT64_MAX
@@ -35,33 +35,33 @@ typedef enum {
 struct background_thread_info_s {
 #ifdef JEMALLOC_BACKGROUND_THREAD
 	/* Background thread is pthread specific. */
-	pthread_t		thread;
-	pthread_cond_t		cond;
+	pthread_t      thread;
+	pthread_cond_t cond;
 #endif
-	malloc_mutex_t		mtx;
-	background_thread_state_t	state;
+	malloc_mutex_t            mtx;
+	background_thread_state_t state;
 	/* When true, it means no wakeup scheduled. */
-	atomic_b_t		indefinite_sleep;
+	atomic_b_t indefinite_sleep;
 	/* Next scheduled wakeup time (absolute time in ns). */
-	nstime_t		next_wakeup;
+	nstime_t next_wakeup;
 	/*
 	 *  Since the last background thread run, newly added number of pages
 	 *  that need to be purged by the next wakeup.  This is adjusted on
 	 *  epoch advance, and is used to determine whether we should signal the
 	 *  background thread to wake up earlier.
 	 */
-	size_t			npages_to_purge_new;
+	size_t npages_to_purge_new;
 	/* Stats: total number of runs since started. */
-	uint64_t		tot_n_runs;
+	uint64_t tot_n_runs;
 	/* Stats: total sleep time since started. */
-	nstime_t		tot_sleep_time;
+	nstime_t tot_sleep_time;
 };
 typedef struct background_thread_info_s background_thread_info_t;
 
 struct background_thread_stats_s {
-	size_t num_threads;
-	uint64_t num_runs;
-	nstime_t run_interval;
+	size_t            num_threads;
+	uint64_t          num_runs;
+	nstime_t          run_interval;
 	mutex_prof_data_t max_counter_per_bg_thd;
 };
 typedef struct background_thread_stats_s background_thread_stats_t;
diff --git a/include/jemalloc/internal/base.h b/include/jemalloc/internal/base.h
index c8004b25..f71a874c 100644
--- a/include/jemalloc/internal/base.h
+++ b/include/jemalloc/internal/base.h
@@ -13,7 +13,7 @@
 #define BASE_BLOCK_MIN_ALIGN ((size_t)2 << 20)
 
 enum metadata_thp_mode_e {
-	metadata_thp_disabled   = 0,
+	metadata_thp_disabled = 0,
 	/*
 	 * Lazily enable hugepage for metadata. To avoid high RSS caused by THP
 	 * + low usage arena (i.e. THP becomes a significant percentage), the
@@ -22,15 +22,15 @@ enum metadata_thp_mode_e {
 	 * arena), "auto" behaves the same as "always", i.e. madvise hugepage
 	 * right away.
 	 */
-	metadata_thp_auto       = 1,
-	metadata_thp_always     = 2,
+	metadata_thp_auto = 1,
+	metadata_thp_always = 2,
 	metadata_thp_mode_limit = 3
 };
 typedef enum metadata_thp_mode_e metadata_thp_mode_t;
 
 #define METADATA_THP_DEFAULT metadata_thp_disabled
 extern metadata_thp_mode_t opt_metadata_thp;
-extern const char *const metadata_thp_mode_names[];
+extern const char *const   metadata_thp_mode_names[];
 
 /* Embedded at the beginning of every block of base-managed virtual memory. */
 typedef struct base_block_s base_block_t;
@@ -102,24 +102,24 @@ metadata_thp_enabled(void) {
 }
 
 base_t *b0get(void);
-base_t *base_new(tsdn_t *tsdn, unsigned ind,
-    const extent_hooks_t *extent_hooks, bool metadata_use_hooks);
-void base_delete(tsdn_t *tsdn, base_t *base);
-ehooks_t *base_ehooks_get(base_t *base);
-ehooks_t *base_ehooks_get_for_metadata(base_t *base);
-extent_hooks_t *base_extent_hooks_set(base_t *base,
-    extent_hooks_t *extent_hooks);
-void *base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment);
+base_t *base_new(tsdn_t *tsdn, unsigned ind, const extent_hooks_t *extent_hooks,
+    bool metadata_use_hooks);
+void    base_delete(tsdn_t *tsdn, base_t *base);
+ehooks_t       *base_ehooks_get(base_t *base);
+ehooks_t       *base_ehooks_get_for_metadata(base_t *base);
+extent_hooks_t *base_extent_hooks_set(
+    base_t *base, extent_hooks_t *extent_hooks);
+void    *base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment);
 edata_t *base_alloc_edata(tsdn_t *tsdn, base_t *base);
-void *base_alloc_rtree(tsdn_t *tsdn, base_t *base, size_t size);
-void *b0_alloc_tcache_stack(tsdn_t *tsdn, size_t size);
-void b0_dalloc_tcache_stack(tsdn_t *tsdn, void *tcache_stack);
-void base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated,
-    size_t *edata_allocated, size_t *rtree_allocated, size_t *resident,
-    size_t *mapped, size_t *n_thp);
-void base_prefork(tsdn_t *tsdn, base_t *base);
-void base_postfork_parent(tsdn_t *tsdn, base_t *base);
-void base_postfork_child(tsdn_t *tsdn, base_t *base);
-bool base_boot(tsdn_t *tsdn);
+void    *base_alloc_rtree(tsdn_t *tsdn, base_t *base, size_t size);
+void    *b0_alloc_tcache_stack(tsdn_t *tsdn, size_t size);
+void     b0_dalloc_tcache_stack(tsdn_t *tsdn, void *tcache_stack);
+void     base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated,
+        size_t *edata_allocated, size_t *rtree_allocated, size_t *resident,
+        size_t *mapped, size_t *n_thp);
+void     base_prefork(tsdn_t *tsdn, base_t *base);
+void     base_postfork_parent(tsdn_t *tsdn, base_t *base);
+void     base_postfork_child(tsdn_t *tsdn, base_t *base);
+bool     base_boot(tsdn_t *tsdn);
 
 #endif /* JEMALLOC_INTERNAL_BASE_H */
diff --git a/include/jemalloc/internal/batcher.h b/include/jemalloc/internal/batcher.h
index 40c8b35f..3ceb8256 100644
--- a/include/jemalloc/internal/batcher.h
+++ b/include/jemalloc/internal/batcher.h
@@ -5,7 +5,7 @@
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/mutex.h"
 
-#define BATCHER_NO_IDX ((size_t)-1)
+#define BATCHER_NO_IDX ((size_t) - 1)
 
 typedef struct batcher_s batcher_t;
 struct batcher_s {
@@ -14,9 +14,9 @@ struct batcher_s {
 	 * togehter, along with the front of the mutex. The end of the mutex is
 	 * only touched if there's contention.
 	 */
-	atomic_zu_t nelems;
-	size_t nelems_max;
-	size_t npushes;
+	atomic_zu_t    nelems;
+	size_t         nelems_max;
+	size_t         npushes;
 	malloc_mutex_t mtx;
 };
 
@@ -27,8 +27,8 @@ void batcher_init(batcher_t *batcher, size_t nelems_max);
  * BATCHER_NO_IDX if no index is free.  If the former, the caller must call
  * batcher_push_end once done.
  */
-size_t batcher_push_begin(tsdn_t *tsdn, batcher_t *batcher,
-    size_t elems_to_push);
+size_t batcher_push_begin(
+    tsdn_t *tsdn, batcher_t *batcher, size_t elems_to_push);
 void batcher_push_end(tsdn_t *tsdn, batcher_t *batcher);
 
 /*
@@ -37,7 +37,7 @@ void batcher_push_end(tsdn_t *tsdn, batcher_t *batcher);
  */
 size_t batcher_pop_begin(tsdn_t *tsdn, batcher_t *batcher);
 size_t batcher_pop_get_pushes(tsdn_t *tsdn, batcher_t *batcher);
-void batcher_pop_end(tsdn_t *tsdn, batcher_t *batcher);
+void   batcher_pop_end(tsdn_t *tsdn, batcher_t *batcher);
 
 void batcher_prefork(tsdn_t *tsdn, batcher_t *batcher);
 void batcher_postfork_parent(tsdn_t *tsdn, batcher_t *batcher);
diff --git a/include/jemalloc/internal/bin.h b/include/jemalloc/internal/bin.h
index c49afea6..e91583d7 100644
--- a/include/jemalloc/internal/bin.h
+++ b/include/jemalloc/internal/bin.h
@@ -14,8 +14,8 @@
 #ifdef JEMALLOC_JET
 extern void (*bin_batching_test_after_push_hook)(size_t idx);
 extern void (*bin_batching_test_mid_pop_hook)(size_t elems_to_pop);
-extern void (*bin_batching_test_after_unlock_hook)(unsigned slab_dalloc_count,
-    bool list_empty);
+extern void (*bin_batching_test_after_unlock_hook)(
+    unsigned slab_dalloc_count, bool list_empty);
 #endif
 
 #ifdef JEMALLOC_JET
@@ -50,8 +50,8 @@ bin_batching_test_after_unlock(unsigned slab_dalloc_count, bool list_empty) {
 	(void)list_empty;
 #ifdef JEMALLOC_JET
 	if (bin_batching_test_after_unlock_hook != NULL) {
-		bin_batching_test_after_unlock_hook(slab_dalloc_count,
-		    list_empty);
+		bin_batching_test_after_unlock_hook(
+		    slab_dalloc_count, list_empty);
 	}
 #endif
 }
@@ -63,13 +63,13 @@ bin_batching_test_after_unlock(unsigned slab_dalloc_count, bool list_empty) {
 typedef struct bin_s bin_t;
 struct bin_s {
 	/* All operations on bin_t fields require lock ownership. */
-	malloc_mutex_t		lock;
+	malloc_mutex_t lock;
 
 	/*
 	 * Bin statistics.  These get touched every time the lock is acquired,
 	 * so put them close by in the hopes of getting some cache locality.
 	 */
-	bin_stats_t	stats;
+	bin_stats_t stats;
 
 	/*
 	 * Current slab being used to service allocations of this bin's size
@@ -77,29 +77,29 @@ struct bin_s {
 	 * slabcur is reassigned, the previous slab must be deallocated or
 	 * inserted into slabs_{nonfull,full}.
 	 */
-	edata_t			*slabcur;
+	edata_t *slabcur;
 
 	/*
 	 * Heap of non-full slabs.  This heap is used to assure that new
 	 * allocations come from the non-full slab that is oldest/lowest in
 	 * memory.
 	 */
-	edata_heap_t		slabs_nonfull;
+	edata_heap_t slabs_nonfull;
 
 	/* List used to track full slabs. */
-	edata_list_active_t	slabs_full;
+	edata_list_active_t slabs_full;
 };
 
 typedef struct bin_remote_free_data_s bin_remote_free_data_t;
 struct bin_remote_free_data_s {
-	void *ptr;
+	void    *ptr;
 	edata_t *slab;
 };
 
 typedef struct bin_with_batch_s bin_with_batch_t;
 struct bin_with_batch_s {
-	bin_t bin;
-	batcher_t remote_frees;
+	bin_t                  bin;
+	batcher_t              remote_frees;
 	bin_remote_free_data_t remote_free_data[BIN_REMOTE_FREE_ELEMS_MAX];
 };
 
diff --git a/include/jemalloc/internal/bin_info.h b/include/jemalloc/internal/bin_info.h
index 88d58c91..0022c3f7 100644
--- a/include/jemalloc/internal/bin_info.h
+++ b/include/jemalloc/internal/bin_info.h
@@ -26,22 +26,22 @@
 typedef struct bin_info_s bin_info_t;
 struct bin_info_s {
 	/* Size of regions in a slab for this bin's size class. */
-	size_t			reg_size;
+	size_t reg_size;
 
 	/* Total size of a slab for this bin's size class. */
-	size_t			slab_size;
+	size_t slab_size;
 
 	/* Total number of regions in a slab for this bin's size class. */
-	uint32_t		nregs;
+	uint32_t nregs;
 
 	/* Number of sharded bins in each arena for this size class. */
-	uint32_t		n_shards;
+	uint32_t n_shards;
 
 	/*
 	 * Metadata used to manipulate bitmaps for slabs associated with this
 	 * bin.
 	 */
-	bitmap_info_t		bitmap_info;
+	bitmap_info_t bitmap_info;
 };
 
 /* The maximum size a size class can be and still get batching behavior. */
@@ -51,7 +51,7 @@ extern size_t opt_bin_info_remote_free_max_batch;
 // The max number of pending elems (across all batches)
 extern size_t opt_bin_info_remote_free_max;
 
-extern szind_t bin_info_nbatched_sizes;
+extern szind_t  bin_info_nbatched_sizes;
 extern unsigned bin_info_nbatched_bins;
 extern unsigned bin_info_nunbatched_bins;
 
diff --git a/include/jemalloc/internal/bin_stats.h b/include/jemalloc/internal/bin_stats.h
index 334c166d..e1095f38 100644
--- a/include/jemalloc/internal/bin_stats.h
+++ b/include/jemalloc/internal/bin_stats.h
@@ -12,52 +12,52 @@ struct bin_stats_s {
 	 * many times, resulting many increments to nrequests, but only one
 	 * each to nmalloc and ndalloc.
 	 */
-	uint64_t	nmalloc;
-	uint64_t	ndalloc;
+	uint64_t nmalloc;
+	uint64_t ndalloc;
 
 	/*
 	 * Number of allocation requests that correspond to the size of this
 	 * bin.  This includes requests served by tcache, though tcache only
 	 * periodically merges into this counter.
 	 */
-	uint64_t	nrequests;
+	uint64_t nrequests;
 
 	/*
 	 * Current number of regions of this size class, including regions
 	 * currently cached by tcache.
 	 */
-	size_t		curregs;
+	size_t curregs;
 
 	/* Number of tcache fills from this bin. */
-	uint64_t	nfills;
+	uint64_t nfills;
 
 	/* Number of tcache flushes to this bin. */
-	uint64_t	nflushes;
+	uint64_t nflushes;
 
 	/* Total number of slabs created for this bin's size class. */
-	uint64_t	nslabs;
+	uint64_t nslabs;
 
 	/*
 	 * Total number of slabs reused by extracting them from the slabs heap
 	 * for this bin's size class.
 	 */
-	uint64_t	reslabs;
+	uint64_t reslabs;
 
 	/* Current number of slabs in this bin. */
-	size_t		curslabs;
+	size_t curslabs;
 
 	/* Current size of nonfull slabs heap in this bin. */
-	size_t		nonfull_slabs;
+	size_t nonfull_slabs;
 
-	uint64_t	batch_pops;
-	uint64_t	batch_failed_pushes;
-	uint64_t	batch_pushes;
-	uint64_t	batch_pushed_elems;
+	uint64_t batch_pops;
+	uint64_t batch_failed_pushes;
+	uint64_t batch_pushes;
+	uint64_t batch_pushed_elems;
 };
 
 typedef struct bin_stats_data_s bin_stats_data_t;
 struct bin_stats_data_s {
-	bin_stats_t stats_data;
+	bin_stats_t       stats_data;
 	mutex_prof_data_t mutex_data;
 };
 #endif /* JEMALLOC_INTERNAL_BIN_STATS_H */
diff --git a/include/jemalloc/internal/bin_types.h b/include/jemalloc/internal/bin_types.h
index 5ec22dfd..b6bad37e 100644
--- a/include/jemalloc/internal/bin_types.h
+++ b/include/jemalloc/internal/bin_types.h
@@ -8,7 +8,10 @@
 #define N_BIN_SHARDS_DEFAULT 1
 
 /* Used in TSD static initializer only. Real init in arena_bind(). */
-#define TSD_BINSHARDS_ZERO_INITIALIZER {{UINT8_MAX}}
+#define TSD_BINSHARDS_ZERO_INITIALIZER                                         \
+	{                                                                      \
+		{ UINT8_MAX }                                                  \
+	}
 
 typedef struct tsd_binshards_s tsd_binshards_t;
 struct tsd_binshards_s {
diff --git a/include/jemalloc/internal/bit_util.h b/include/jemalloc/internal/bit_util.h
index 840dbde2..88c7942e 100644
--- a/include/jemalloc/internal/bit_util.h
+++ b/include/jemalloc/internal/bit_util.h
@@ -5,9 +5,9 @@
 #include "jemalloc/internal/assert.h"
 
 /* Sanity check. */
-#if !defined(JEMALLOC_INTERNAL_FFSLL) || !defined(JEMALLOC_INTERNAL_FFSL) \
+#if !defined(JEMALLOC_INTERNAL_FFSLL) || !defined(JEMALLOC_INTERNAL_FFSL)      \
     || !defined(JEMALLOC_INTERNAL_FFS)
-#  error JEMALLOC_INTERNAL_FFS{,L,LL} should have been defined by configure
+#	error JEMALLOC_INTERNAL_FFS{,L,LL} should have been defined by configure
 #endif
 
 /*
@@ -110,15 +110,17 @@ fls_u(unsigned x) {
 }
 #elif defined(_MSC_VER)
 
-#if LG_SIZEOF_PTR == 3
-#define DO_BSR64(bit, x) _BitScanReverse64(&bit, x)
-#else
+#	if LG_SIZEOF_PTR == 3
+#		define DO_BSR64(bit, x) _BitScanReverse64(&bit, x)
+#	else
 /*
  * This never actually runs; we're just dodging a compiler error for the
  * never-taken branch where sizeof(void *) == 8.
  */
-#define DO_BSR64(bit, x) bit = 0; unreachable()
-#endif
+#		define DO_BSR64(bit, x)                                       \
+			bit = 0;                                               \
+			unreachable()
+#	endif
 
 /* clang-format off */
 #define DO_FLS(x) do {							\
@@ -164,8 +166,8 @@ fls_u(unsigned x) {
 	DO_FLS(x);
 }
 
-#undef DO_FLS
-#undef DO_BSR64
+#	undef DO_FLS
+#	undef DO_BSR64
 #else
 
 static inline unsigned
@@ -185,7 +187,7 @@ fls_u(unsigned x) {
 #endif
 
 #if LG_SIZEOF_LONG_LONG > 3
-#  error "Haven't implemented popcount for 16-byte ints."
+#	error "Haven't implemented popcount for 16-byte ints."
 #endif
 
 /* clang-format off */
@@ -284,7 +286,7 @@ popcount_llu(unsigned long long bitmap) {
  */
 
 static inline size_t
-cfs_lu(unsigned long* bitmap) {
+cfs_lu(unsigned long *bitmap) {
 	util_assume(*bitmap != 0);
 	size_t bit = ffs_lu(*bitmap);
 	*bitmap ^= ZU(1) << bit;
@@ -300,7 +302,7 @@ ffs_zu(size_t x) {
 #elif LG_SIZEOF_PTR == LG_SIZEOF_LONG_LONG
 	return ffs_llu(x);
 #else
-#error No implementation for size_t ffs()
+#	error No implementation for size_t ffs()
 #endif
 }
 
@@ -313,11 +315,10 @@ fls_zu(size_t x) {
 #elif LG_SIZEOF_PTR == LG_SIZEOF_LONG_LONG
 	return fls_llu(x);
 #else
-#error No implementation for size_t fls()
+#	error No implementation for size_t fls()
 #endif
 }
 
-
 static inline unsigned
 ffs_u64(uint64_t x) {
 #if LG_SIZEOF_LONG == 3
@@ -325,7 +326,7 @@ ffs_u64(uint64_t x) {
 #elif LG_SIZEOF_LONG_LONG == 3
 	return ffs_llu(x);
 #else
-#error No implementation for 64-bit ffs()
+#	error No implementation for 64-bit ffs()
 #endif
 }
 
@@ -336,7 +337,7 @@ fls_u64(uint64_t x) {
 #elif LG_SIZEOF_LONG_LONG == 3
 	return fls_llu(x);
 #else
-#error No implementation for 64-bit fls()
+#	error No implementation for 64-bit fls()
 #endif
 }
 
@@ -345,7 +346,7 @@ ffs_u32(uint32_t x) {
 #if LG_SIZEOF_INT == 2
 	return ffs_u(x);
 #else
-#error No implementation for 32-bit ffs()
+#	error No implementation for 32-bit ffs()
 #endif
 }
 
@@ -354,7 +355,7 @@ fls_u32(uint32_t x) {
 #if LG_SIZEOF_INT == 2
 	return fls_u(x);
 #else
-#error No implementation for 32-bit fls()
+#	error No implementation for 32-bit fls()
 #endif
 }
 
@@ -375,7 +376,7 @@ pow2_ceil_u64(uint64_t x) {
 static inline uint32_t
 pow2_ceil_u32(uint32_t x) {
 	if (unlikely(x <= 1)) {
-	    return x;
+		return x;
 	}
 	size_t msb_on_index = fls_u32(x - 1);
 	/* As above. */
@@ -413,13 +414,16 @@ lg_ceil(size_t x) {
 #define LG_FLOOR_2(x) (x < (1ULL << 1) ? LG_FLOOR_1(x) : 1 + LG_FLOOR_1(x >> 1))
 #define LG_FLOOR_4(x) (x < (1ULL << 2) ? LG_FLOOR_2(x) : 2 + LG_FLOOR_2(x >> 2))
 #define LG_FLOOR_8(x) (x < (1ULL << 4) ? LG_FLOOR_4(x) : 4 + LG_FLOOR_4(x >> 4))
-#define LG_FLOOR_16(x) (x < (1ULL << 8) ? LG_FLOOR_8(x) : 8 + LG_FLOOR_8(x >> 8))
-#define LG_FLOOR_32(x) (x < (1ULL << 16) ? LG_FLOOR_16(x) : 16 + LG_FLOOR_16(x >> 16))
-#define LG_FLOOR_64(x) (x < (1ULL << 32) ? LG_FLOOR_32(x) : 32 + LG_FLOOR_32(x >> 32))
+#define LG_FLOOR_16(x)                                                         \
+	(x < (1ULL << 8) ? LG_FLOOR_8(x) : 8 + LG_FLOOR_8(x >> 8))
+#define LG_FLOOR_32(x)                                                         \
+	(x < (1ULL << 16) ? LG_FLOOR_16(x) : 16 + LG_FLOOR_16(x >> 16))
+#define LG_FLOOR_64(x)                                                         \
+	(x < (1ULL << 32) ? LG_FLOOR_32(x) : 32 + LG_FLOOR_32(x >> 32))
 #if LG_SIZEOF_PTR == 2
-#  define LG_FLOOR(x) LG_FLOOR_32((x))
+#	define LG_FLOOR(x) LG_FLOOR_32((x))
 #else
-#  define LG_FLOOR(x) LG_FLOOR_64((x))
+#	define LG_FLOOR(x) LG_FLOOR_64((x))
 #endif
 
 #define LG_CEIL(x) (LG_FLOOR(x) + (((x) & ((x) - 1)) == 0 ? 0 : 1))
diff --git a/include/jemalloc/internal/bitmap.h b/include/jemalloc/internal/bitmap.h
index 8cd5f5a3..e0f596fb 100644
--- a/include/jemalloc/internal/bitmap.h
+++ b/include/jemalloc/internal/bitmap.h
@@ -6,22 +6,22 @@
 #include "jemalloc/internal/sc.h"
 
 typedef unsigned long bitmap_t;
-#define LG_SIZEOF_BITMAP	LG_SIZEOF_LONG
+#define LG_SIZEOF_BITMAP LG_SIZEOF_LONG
 
 /* Maximum bitmap bit count is 2^LG_BITMAP_MAXBITS. */
 #if SC_LG_SLAB_MAXREGS > LG_CEIL(SC_NSIZES)
 /* Maximum bitmap bit count is determined by maximum regions per slab. */
-#  define LG_BITMAP_MAXBITS	SC_LG_SLAB_MAXREGS
+#	define LG_BITMAP_MAXBITS SC_LG_SLAB_MAXREGS
 #else
 /* Maximum bitmap bit count is determined by number of extent size classes. */
-#  define LG_BITMAP_MAXBITS	LG_CEIL(SC_NSIZES)
+#	define LG_BITMAP_MAXBITS LG_CEIL(SC_NSIZES)
 #endif
-#define BITMAP_MAXBITS		(ZU(1) << LG_BITMAP_MAXBITS)
+#define BITMAP_MAXBITS (ZU(1) << LG_BITMAP_MAXBITS)
 
 /* Number of bits per group. */
-#define LG_BITMAP_GROUP_NBITS		(LG_SIZEOF_BITMAP + 3)
-#define BITMAP_GROUP_NBITS		(1U << LG_BITMAP_GROUP_NBITS)
-#define BITMAP_GROUP_NBITS_MASK		(BITMAP_GROUP_NBITS-1)
+#define LG_BITMAP_GROUP_NBITS (LG_SIZEOF_BITMAP + 3)
+#define BITMAP_GROUP_NBITS (1U << LG_BITMAP_GROUP_NBITS)
+#define BITMAP_GROUP_NBITS_MASK (BITMAP_GROUP_NBITS - 1)
 
 /*
  * Do some analysis on how big the bitmap is before we use a tree.  For a brute
@@ -29,67 +29,64 @@ typedef unsigned long bitmap_t;
  * use a tree instead.
  */
 #if LG_BITMAP_MAXBITS - LG_BITMAP_GROUP_NBITS > 3
-#  define BITMAP_USE_TREE
+#	define BITMAP_USE_TREE
 #endif
 
 /* Number of groups required to store a given number of bits. */
-#define BITMAP_BITS2GROUPS(nbits)					\
-    (((nbits) + BITMAP_GROUP_NBITS_MASK) >> LG_BITMAP_GROUP_NBITS)
+#define BITMAP_BITS2GROUPS(nbits)                                              \
+	(((nbits) + BITMAP_GROUP_NBITS_MASK) >> LG_BITMAP_GROUP_NBITS)
 
 /*
  * Number of groups required at a particular level for a given number of bits.
  */
-#define BITMAP_GROUPS_L0(nbits)						\
-    BITMAP_BITS2GROUPS(nbits)
-#define BITMAP_GROUPS_L1(nbits)						\
-    BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(nbits))
-#define BITMAP_GROUPS_L2(nbits)						\
-    BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS((nbits))))
-#define BITMAP_GROUPS_L3(nbits)						\
-    BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(		\
-	BITMAP_BITS2GROUPS((nbits)))))
-#define BITMAP_GROUPS_L4(nbits)						\
-    BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(		\
-	BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS((nbits))))))
+#define BITMAP_GROUPS_L0(nbits) BITMAP_BITS2GROUPS(nbits)
+#define BITMAP_GROUPS_L1(nbits) BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(nbits))
+#define BITMAP_GROUPS_L2(nbits)                                                \
+	BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS((nbits))))
+#define BITMAP_GROUPS_L3(nbits)                                                \
+	BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(                                 \
+	    BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS((nbits)))))
+#define BITMAP_GROUPS_L4(nbits)                                                \
+	BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(              \
+	    BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS((nbits))))))
 
 /*
  * Assuming the number of levels, number of groups required for a given number
  * of bits.
  */
-#define BITMAP_GROUPS_1_LEVEL(nbits)					\
-    BITMAP_GROUPS_L0(nbits)
-#define BITMAP_GROUPS_2_LEVEL(nbits)					\
-    (BITMAP_GROUPS_1_LEVEL(nbits) + BITMAP_GROUPS_L1(nbits))
-#define BITMAP_GROUPS_3_LEVEL(nbits)					\
-    (BITMAP_GROUPS_2_LEVEL(nbits) + BITMAP_GROUPS_L2(nbits))
-#define BITMAP_GROUPS_4_LEVEL(nbits)					\
-    (BITMAP_GROUPS_3_LEVEL(nbits) + BITMAP_GROUPS_L3(nbits))
-#define BITMAP_GROUPS_5_LEVEL(nbits)					\
-    (BITMAP_GROUPS_4_LEVEL(nbits) + BITMAP_GROUPS_L4(nbits))
+#define BITMAP_GROUPS_1_LEVEL(nbits) BITMAP_GROUPS_L0(nbits)
+#define BITMAP_GROUPS_2_LEVEL(nbits)                                           \
+	(BITMAP_GROUPS_1_LEVEL(nbits) + BITMAP_GROUPS_L1(nbits))
+#define BITMAP_GROUPS_3_LEVEL(nbits)                                           \
+	(BITMAP_GROUPS_2_LEVEL(nbits) + BITMAP_GROUPS_L2(nbits))
+#define BITMAP_GROUPS_4_LEVEL(nbits)                                           \
+	(BITMAP_GROUPS_3_LEVEL(nbits) + BITMAP_GROUPS_L3(nbits))
+#define BITMAP_GROUPS_5_LEVEL(nbits)                                           \
+	(BITMAP_GROUPS_4_LEVEL(nbits) + BITMAP_GROUPS_L4(nbits))
 
 /*
  * Maximum number of groups required to support LG_BITMAP_MAXBITS.
  */
 #ifdef BITMAP_USE_TREE
 
-#if LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS
-#  define BITMAP_GROUPS(nbits)	BITMAP_GROUPS_1_LEVEL(nbits)
-#  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_1_LEVEL(BITMAP_MAXBITS)
-#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 2
-#  define BITMAP_GROUPS(nbits)	BITMAP_GROUPS_2_LEVEL(nbits)
-#  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_2_LEVEL(BITMAP_MAXBITS)
-#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 3
-#  define BITMAP_GROUPS(nbits)	BITMAP_GROUPS_3_LEVEL(nbits)
-#  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_3_LEVEL(BITMAP_MAXBITS)
-#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 4
-#  define BITMAP_GROUPS(nbits)	BITMAP_GROUPS_4_LEVEL(nbits)
-#  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_4_LEVEL(BITMAP_MAXBITS)
-#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 5
-#  define BITMAP_GROUPS(nbits)	BITMAP_GROUPS_5_LEVEL(nbits)
-#  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_5_LEVEL(BITMAP_MAXBITS)
-#else
-#  error "Unsupported bitmap size"
-#endif
+#	if LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS
+#		define BITMAP_GROUPS(nbits) BITMAP_GROUPS_1_LEVEL(nbits)
+#		define BITMAP_GROUPS_MAX BITMAP_GROUPS_1_LEVEL(BITMAP_MAXBITS)
+#	elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 2
+#		define BITMAP_GROUPS(nbits) BITMAP_GROUPS_2_LEVEL(nbits)
+#		define BITMAP_GROUPS_MAX BITMAP_GROUPS_2_LEVEL(BITMAP_MAXBITS)
+#	elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 3
+#		define BITMAP_GROUPS(nbits) BITMAP_GROUPS_3_LEVEL(nbits)
+#		define BITMAP_GROUPS_MAX BITMAP_GROUPS_3_LEVEL(BITMAP_MAXBITS)
+#	elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 4
+#		define BITMAP_GROUPS(nbits) BITMAP_GROUPS_4_LEVEL(nbits)
+#		define BITMAP_GROUPS_MAX BITMAP_GROUPS_4_LEVEL(BITMAP_MAXBITS)
+#	elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 5
+#		define BITMAP_GROUPS(nbits) BITMAP_GROUPS_5_LEVEL(nbits)
+#		define BITMAP_GROUPS_MAX BITMAP_GROUPS_5_LEVEL(BITMAP_MAXBITS)
+#	else
+#		error "Unsupported bitmap size"
+#	endif
 
 /*
  * Maximum number of levels possible.  This could be statically computed based
@@ -105,42 +102,53 @@ typedef unsigned long bitmap_t;
  * unused trailing entries in bitmap_info_t structures; the bitmaps themselves
  * are not impacted.
  */
-#define BITMAP_MAX_LEVELS	5
+#	define BITMAP_MAX_LEVELS 5
 
-#define BITMAP_INFO_INITIALIZER(nbits) {				\
-	/* nbits. */							\
-	nbits,								\
-	/* nlevels. */							\
-	(BITMAP_GROUPS_L0(nbits) > BITMAP_GROUPS_L1(nbits)) +		\
-	    (BITMAP_GROUPS_L1(nbits) > BITMAP_GROUPS_L2(nbits)) +	\
-	    (BITMAP_GROUPS_L2(nbits) > BITMAP_GROUPS_L3(nbits)) +	\
-	    (BITMAP_GROUPS_L3(nbits) > BITMAP_GROUPS_L4(nbits)) + 1,	\
-	/* levels. */							\
-	{								\
-		{0},							\
-		{BITMAP_GROUPS_L0(nbits)},				\
-		{BITMAP_GROUPS_L1(nbits) + BITMAP_GROUPS_L0(nbits)},	\
-		{BITMAP_GROUPS_L2(nbits) + BITMAP_GROUPS_L1(nbits) +	\
-		    BITMAP_GROUPS_L0(nbits)},				\
-		{BITMAP_GROUPS_L3(nbits) + BITMAP_GROUPS_L2(nbits) +	\
-		    BITMAP_GROUPS_L1(nbits) + BITMAP_GROUPS_L0(nbits)},	\
-		{BITMAP_GROUPS_L4(nbits) + BITMAP_GROUPS_L3(nbits) +	\
-		     BITMAP_GROUPS_L2(nbits) + BITMAP_GROUPS_L1(nbits)	\
-		     + BITMAP_GROUPS_L0(nbits)}				\
-	}								\
-}
+#	define BITMAP_INFO_INITIALIZER(nbits)                                 \
+		{                                                              \
+			/* nbits. */                                           \
+			nbits, /* nlevels. */                                  \
+			    (BITMAP_GROUPS_L0(nbits)                           \
+			        > BITMAP_GROUPS_L1(nbits))                     \
+			    + (BITMAP_GROUPS_L1(nbits)                         \
+			        > BITMAP_GROUPS_L2(nbits))                     \
+			    + (BITMAP_GROUPS_L2(nbits)                         \
+			        > BITMAP_GROUPS_L3(nbits))                     \
+			    + (BITMAP_GROUPS_L3(nbits)                         \
+			        > BITMAP_GROUPS_L4(nbits))                     \
+			    + 1, /* levels. */                                 \
+			{                                                      \
+				{0}, {BITMAP_GROUPS_L0(nbits)},                \
+				    {BITMAP_GROUPS_L1(nbits)                   \
+				        + BITMAP_GROUPS_L0(nbits)},            \
+				    {BITMAP_GROUPS_L2(nbits)                   \
+				        + BITMAP_GROUPS_L1(nbits)              \
+				        + BITMAP_GROUPS_L0(nbits)},            \
+				    {BITMAP_GROUPS_L3(nbits)                   \
+				        + BITMAP_GROUPS_L2(nbits)              \
+				        + BITMAP_GROUPS_L1(nbits)              \
+				        + BITMAP_GROUPS_L0(nbits)},            \
+				{                                              \
+					BITMAP_GROUPS_L4(nbits)                \
+					    + BITMAP_GROUPS_L3(nbits)          \
+					    + BITMAP_GROUPS_L2(nbits)          \
+					    + BITMAP_GROUPS_L1(nbits)          \
+					    + BITMAP_GROUPS_L0(nbits)          \
+				}                                              \
+			}                                                      \
+		}
 
 #else /* BITMAP_USE_TREE */
 
-#define BITMAP_GROUPS(nbits)	BITMAP_BITS2GROUPS(nbits)
-#define BITMAP_GROUPS_MAX	BITMAP_BITS2GROUPS(BITMAP_MAXBITS)
+#	define BITMAP_GROUPS(nbits) BITMAP_BITS2GROUPS(nbits)
+#	define BITMAP_GROUPS_MAX BITMAP_BITS2GROUPS(BITMAP_MAXBITS)
 
-#define BITMAP_INFO_INITIALIZER(nbits) {				\
-	/* nbits. */							\
-	nbits,								\
-	/* ngroups. */							\
-	BITMAP_BITS2GROUPS(nbits)					\
-}
+#	define BITMAP_INFO_INITIALIZER(nbits)                                 \
+		{                                                              \
+			/* nbits. */                                           \
+			nbits, /* ngroups. */                                  \
+			    BITMAP_BITS2GROUPS(nbits)                          \
+		}
 
 #endif /* BITMAP_USE_TREE */
 
@@ -161,21 +169,21 @@ typedef struct bitmap_info_s {
 	 * Only the first (nlevels+1) elements are used, and levels are ordered
 	 * bottom to top (e.g. the bottom level is stored in levels[0]).
 	 */
-	bitmap_level_t levels[BITMAP_MAX_LEVELS+1];
-#else /* BITMAP_USE_TREE */
+	bitmap_level_t levels[BITMAP_MAX_LEVELS + 1];
+#else  /* BITMAP_USE_TREE */
 	/* Number of groups necessary for nbits. */
 	size_t ngroups;
 #endif /* BITMAP_USE_TREE */
 } bitmap_info_t;
 
-void bitmap_info_init(bitmap_info_t *binfo, size_t nbits);
-void bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo, bool fill);
+void   bitmap_info_init(bitmap_info_t *binfo, size_t nbits);
+void   bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo, bool fill);
 size_t bitmap_size(const bitmap_info_t *binfo);
 
 static inline bool
 bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo) {
 #ifdef BITMAP_USE_TREE
-	size_t rgoff = binfo->levels[binfo->nlevels].group_offset - 1;
+	size_t   rgoff = binfo->levels[binfo->nlevels].group_offset - 1;
 	bitmap_t rg = bitmap[rgoff];
 	/* The bitmap is full iff the root group is 0. */
 	return (rg == 0);
@@ -193,7 +201,7 @@ bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo) {
 
 static inline bool
 bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) {
-	size_t goff;
+	size_t   goff;
 	bitmap_t g;
 
 	assert(bit < binfo->nbits);
@@ -204,9 +212,9 @@ bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) {
 
 static inline void
 bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) {
-	size_t goff;
+	size_t    goff;
 	bitmap_t *gp;
-	bitmap_t g;
+	bitmap_t  g;
 
 	assert(bit < binfo->nbits);
 	assert(!bitmap_get(bitmap, binfo, bit));
@@ -245,12 +253,13 @@ bitmap_ffu(const bitmap_t *bitmap, const bitmap_info_t *binfo, size_t min_bit) {
 #ifdef BITMAP_USE_TREE
 	size_t bit = 0;
 	for (unsigned level = binfo->nlevels; level--;) {
-		size_t lg_bits_per_group = (LG_BITMAP_GROUP_NBITS * (level +
-		    1));
-		bitmap_t group = bitmap[binfo->levels[level].group_offset + (bit
-		    >> lg_bits_per_group)];
-		unsigned group_nmask = (unsigned)(((min_bit > bit) ? (min_bit -
-		    bit) : 0) >> (lg_bits_per_group - LG_BITMAP_GROUP_NBITS));
+		size_t   lg_bits_per_group = (LG_BITMAP_GROUP_NBITS
+                    * (level + 1));
+		bitmap_t group = bitmap[binfo->levels[level].group_offset
+		    + (bit >> lg_bits_per_group)];
+		unsigned group_nmask =
+		    (unsigned)(((min_bit > bit) ? (min_bit - bit) : 0)
+		        >> (lg_bits_per_group - LG_BITMAP_GROUP_NBITS));
 		assert(group_nmask <= BITMAP_GROUP_NBITS);
 		bitmap_t group_mask = ~((1LU << group_nmask) - 1);
 		bitmap_t group_masked = group & group_mask;
@@ -273,16 +282,16 @@ bitmap_ffu(const bitmap_t *bitmap, const bitmap_info_t *binfo, size_t min_bit) {
 			}
 			return bitmap_ffu(bitmap, binfo, sib_base);
 		}
-		bit += ((size_t)ffs_lu(group_masked)) <<
-		    (lg_bits_per_group - LG_BITMAP_GROUP_NBITS);
+		bit += ((size_t)ffs_lu(group_masked))
+		    << (lg_bits_per_group - LG_BITMAP_GROUP_NBITS);
 	}
 	assert(bit >= min_bit);
 	assert(bit < binfo->nbits);
 	return bit;
 #else
-	size_t i = min_bit >> LG_BITMAP_GROUP_NBITS;
-	bitmap_t g = bitmap[i] & ~((1LU << (min_bit & BITMAP_GROUP_NBITS_MASK))
-	    - 1);
+	size_t   i = min_bit >> LG_BITMAP_GROUP_NBITS;
+	bitmap_t g = bitmap[i]
+	    & ~((1LU << (min_bit & BITMAP_GROUP_NBITS_MASK)) - 1);
 	size_t bit;
 	while (1) {
 		if (g != 0) {
@@ -302,7 +311,7 @@ bitmap_ffu(const bitmap_t *bitmap, const bitmap_info_t *binfo, size_t min_bit) {
 /* sfu: set first unset. */
 static inline size_t
 bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) {
-	size_t bit;
+	size_t   bit;
 	bitmap_t g;
 	unsigned i;
 
@@ -332,9 +341,9 @@ bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) {
 
 static inline void
 bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) {
-	size_t goff;
-	bitmap_t *gp;
-	bitmap_t g;
+	size_t      goff;
+	bitmap_t   *gp;
+	bitmap_t    g;
 	UNUSED bool propagate;
 
 	assert(bit < binfo->nbits);
diff --git a/include/jemalloc/internal/buf_writer.h b/include/jemalloc/internal/buf_writer.h
index fa0ac99c..5ee9af4e 100644
--- a/include/jemalloc/internal/buf_writer.h
+++ b/include/jemalloc/internal/buf_writer.h
@@ -16,21 +16,21 @@
 
 typedef struct {
 	write_cb_t *write_cb;
-	void *cbopaque;
-	char *buf;
-	size_t buf_size;
-	size_t buf_end;
-	bool internal_buf;
+	void       *cbopaque;
+	char       *buf;
+	size_t      buf_size;
+	size_t      buf_end;
+	bool        internal_buf;
 } buf_writer_t;
 
-bool buf_writer_init(tsdn_t *tsdn, buf_writer_t *buf_writer,
-    write_cb_t *write_cb, void *cbopaque, char *buf, size_t buf_len);
-void buf_writer_flush(buf_writer_t *buf_writer);
+bool       buf_writer_init(tsdn_t *tsdn, buf_writer_t *buf_writer,
+          write_cb_t *write_cb, void *cbopaque, char *buf, size_t buf_len);
+void       buf_writer_flush(buf_writer_t *buf_writer);
 write_cb_t buf_writer_cb;
-void buf_writer_terminate(tsdn_t *tsdn, buf_writer_t *buf_writer);
+void       buf_writer_terminate(tsdn_t *tsdn, buf_writer_t *buf_writer);
 
-typedef ssize_t (read_cb_t)(void *read_cbopaque, void *buf, size_t limit);
-void buf_writer_pipe(buf_writer_t *buf_writer, read_cb_t *read_cb,
-    void *read_cbopaque);
+typedef ssize_t(read_cb_t)(void *read_cbopaque, void *buf, size_t limit);
+void buf_writer_pipe(
+    buf_writer_t *buf_writer, read_cb_t *read_cb, void *read_cbopaque);
 
 #endif /* JEMALLOC_INTERNAL_BUF_WRITER_H */
diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 7ab48dc9..08ee0d6a 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -45,8 +45,8 @@ extern const uintptr_t disabled_bin;
  *   1 << (sizeof(cache_bin_sz_t) * 8)
  * bytes spread across pointer sized objects to get the maximum.
  */
-#define CACHE_BIN_NCACHED_MAX (((size_t)1 << sizeof(cache_bin_sz_t) * 8) \
-    / sizeof(void *) - 1)
+#define CACHE_BIN_NCACHED_MAX                                                  \
+	(((size_t)1 << sizeof(cache_bin_sz_t) * 8) / sizeof(void *) - 1)
 
 /*
  * This lives inside the cache_bin (for locality reasons), and is initialized
@@ -152,8 +152,8 @@ struct cache_bin_array_descriptor_s {
 };
 
 static inline void
-cache_bin_array_descriptor_init(cache_bin_array_descriptor_t *descriptor,
-    cache_bin_t *bins) {
+cache_bin_array_descriptor_init(
+    cache_bin_array_descriptor_t *descriptor, cache_bin_t *bins) {
 	ql_elm_new(descriptor, link);
 	descriptor->bins = bins;
 }
@@ -222,7 +222,8 @@ cache_bin_ncached_max_get(cache_bin_t *bin) {
  * with later.
  */
 static inline void
-cache_bin_assert_earlier(cache_bin_t *bin, cache_bin_sz_t earlier, cache_bin_sz_t later) {
+cache_bin_assert_earlier(
+    cache_bin_t *bin, cache_bin_sz_t earlier, cache_bin_sz_t later) {
 	if (earlier > later) {
 		assert(bin->low_bits_full > bin->low_bits_empty);
 	}
@@ -285,8 +286,8 @@ static inline void **
 cache_bin_empty_position_get(cache_bin_t *bin) {
 	cache_bin_sz_t diff = cache_bin_diff(bin,
 	    (cache_bin_sz_t)(uintptr_t)bin->stack_head, bin->low_bits_empty);
-	byte_t *empty_bits = (byte_t *)bin->stack_head + diff;
-	void **ret = (void **)empty_bits;
+	byte_t        *empty_bits = (byte_t *)bin->stack_head + diff;
+	void         **ret = (void **)empty_bits;
 
 	assert(ret >= bin->stack_head);
 
@@ -305,8 +306,8 @@ cache_bin_empty_position_get(cache_bin_t *bin) {
  */
 static inline cache_bin_sz_t
 cache_bin_low_bits_low_bound_get(cache_bin_t *bin) {
-	return (cache_bin_sz_t)bin->low_bits_empty -
-	    cache_bin_ncached_max_get(bin) * sizeof(void *);
+	return (cache_bin_sz_t)bin->low_bits_empty
+	    - cache_bin_ncached_max_get(bin) * sizeof(void *);
 }
 
 /*
@@ -317,7 +318,7 @@ cache_bin_low_bits_low_bound_get(cache_bin_t *bin) {
 static inline void **
 cache_bin_low_bound_get(cache_bin_t *bin) {
 	cache_bin_sz_t ncached_max = cache_bin_ncached_max_get(bin);
-	void **ret = cache_bin_empty_position_get(bin) - ncached_max;
+	void         **ret = cache_bin_empty_position_get(bin) - ncached_max;
 	assert(ret <= bin->stack_head);
 
 	return ret;
@@ -340,8 +341,8 @@ cache_bin_assert_empty(cache_bin_t *bin) {
  */
 static inline cache_bin_sz_t
 cache_bin_low_water_get_internal(cache_bin_t *bin) {
-	return cache_bin_diff(bin, bin->low_bits_low_water,
-	    bin->low_bits_empty) / sizeof(void *);
+	return cache_bin_diff(bin, bin->low_bits_low_water, bin->low_bits_empty)
+	    / sizeof(void *);
 }
 
 /* Returns the numeric value of low water in [0, ncached]. */
@@ -351,7 +352,8 @@ cache_bin_low_water_get(cache_bin_t *bin) {
 	assert(low_water <= cache_bin_ncached_max_get(bin));
 	assert(low_water <= cache_bin_ncached_get_local(bin));
 
-	cache_bin_assert_earlier(bin, (cache_bin_sz_t)(uintptr_t)bin->stack_head,
+	cache_bin_assert_earlier(bin,
+	    (cache_bin_sz_t)(uintptr_t)bin->stack_head,
 	    bin->low_bits_low_water);
 
 	return low_water;
@@ -390,9 +392,9 @@ cache_bin_alloc_impl(cache_bin_t *bin, bool *success, bool adjust_low_water) {
 	 * This may read from the empty position; however the loaded value won't
 	 * be used.  It's safe because the stack has one more slot reserved.
 	 */
-	void *ret = *bin->stack_head;
+	void          *ret = *bin->stack_head;
 	cache_bin_sz_t low_bits = (cache_bin_sz_t)(uintptr_t)bin->stack_head;
-	void **new_head = bin->stack_head + 1;
+	void         **new_head = bin->stack_head + 1;
 
 	/*
 	 * Note that the low water mark is at most empty; if we pass this check,
@@ -455,7 +457,8 @@ cache_bin_alloc_batch(cache_bin_t *bin, size_t num, void **out) {
 
 JEMALLOC_ALWAYS_INLINE bool
 cache_bin_full(cache_bin_t *bin) {
-	return ((cache_bin_sz_t)(uintptr_t)bin->stack_head == bin->low_bits_full);
+	return (
+	    (cache_bin_sz_t)(uintptr_t)bin->stack_head == bin->low_bits_full);
 }
 
 /*
@@ -469,9 +472,9 @@ cache_bin_dalloc_safety_checks(cache_bin_t *bin, void *ptr) {
 	}
 
 	cache_bin_sz_t ncached = cache_bin_ncached_get_internal(bin);
-	unsigned max_scan = opt_debug_double_free_max_scan < ncached
-	    ? opt_debug_double_free_max_scan
-	    : ncached;
+	unsigned       max_scan = opt_debug_double_free_max_scan < ncached
+	          ? opt_debug_double_free_max_scan
+	          : ncached;
 
 	void **cur = bin->stack_head;
 	void **limit = cur + max_scan;
@@ -516,9 +519,11 @@ cache_bin_stash(cache_bin_t *bin, void *ptr) {
 	}
 
 	/* Stash at the full position, in the [full, head) range. */
-	cache_bin_sz_t low_bits_head = (cache_bin_sz_t)(uintptr_t)bin->stack_head;
+	cache_bin_sz_t low_bits_head = (cache_bin_sz_t)(uintptr_t)
+	                                   bin->stack_head;
 	/* Wraparound handled as well. */
-	cache_bin_sz_t diff = cache_bin_diff(bin, bin->low_bits_full, low_bits_head);
+	cache_bin_sz_t diff = cache_bin_diff(
+	    bin, bin->low_bits_full, low_bits_head);
 	*(void **)((byte_t *)bin->stack_head - diff) = ptr;
 
 	assert(!cache_bin_full(bin));
@@ -532,18 +537,21 @@ cache_bin_stash(cache_bin_t *bin, void *ptr) {
 JEMALLOC_ALWAYS_INLINE cache_bin_sz_t
 cache_bin_nstashed_get_internal(cache_bin_t *bin) {
 	cache_bin_sz_t ncached_max = cache_bin_ncached_max_get(bin);
-	cache_bin_sz_t low_bits_low_bound = cache_bin_low_bits_low_bound_get(bin);
+	cache_bin_sz_t low_bits_low_bound = cache_bin_low_bits_low_bound_get(
+	    bin);
 
-	cache_bin_sz_t n = cache_bin_diff(bin, low_bits_low_bound,
-	    bin->low_bits_full) / sizeof(void *);
+	cache_bin_sz_t n = cache_bin_diff(
+	                       bin, low_bits_low_bound, bin->low_bits_full)
+	    / sizeof(void *);
 	assert(n <= ncached_max);
 	if (config_debug && n != 0) {
 		/* Below are for assertions only. */
 		void **low_bound = cache_bin_low_bound_get(bin);
 
-		assert((cache_bin_sz_t)(uintptr_t)low_bound == low_bits_low_bound);
+		assert(
+		    (cache_bin_sz_t)(uintptr_t)low_bound == low_bits_low_bound);
 		void *stashed = *(low_bound + n - 1);
-		bool aligned = cache_bin_nonfast_aligned(stashed);
+		bool  aligned = cache_bin_nonfast_aligned(stashed);
 #ifdef JEMALLOC_JET
 		/* Allow arbitrary pointers to be stashed in tests. */
 		aligned = true;
@@ -582,16 +590,17 @@ cache_bin_nstashed_get_local(cache_bin_t *bin) {
  * they help access values that will not be concurrently modified.
  */
 static inline void
-cache_bin_nitems_get_remote(cache_bin_t *bin, cache_bin_sz_t *ncached,
-    cache_bin_sz_t *nstashed) {
+cache_bin_nitems_get_remote(
+    cache_bin_t *bin, cache_bin_sz_t *ncached, cache_bin_sz_t *nstashed) {
 	/* Racy version of cache_bin_ncached_get_internal. */
-	cache_bin_sz_t diff = bin->low_bits_empty -
-	    (cache_bin_sz_t)(uintptr_t)bin->stack_head;
+	cache_bin_sz_t diff = bin->low_bits_empty
+	    - (cache_bin_sz_t)(uintptr_t)bin->stack_head;
 	cache_bin_sz_t n = diff / sizeof(void *);
 	*ncached = n;
 
 	/* Racy version of cache_bin_nstashed_get_internal. */
-	cache_bin_sz_t low_bits_low_bound = cache_bin_low_bits_low_bound_get(bin);
+	cache_bin_sz_t low_bits_low_bound = cache_bin_low_bits_low_bound_get(
+	    bin);
 	n = (bin->low_bits_full - low_bits_low_bound) / sizeof(void *);
 	*nstashed = n;
 	/*
@@ -616,7 +625,8 @@ struct cache_bin_fill_ctl_s {
  * This is to avoid stack overflow when we do batch edata look up, which
  * reserves a nflush * sizeof(emap_batch_lookup_result_t) stack variable.
  */
-#define CACHE_BIN_NFLUSH_BATCH_MAX ((VARIABLE_ARRAY_SIZE_MAX >> LG_SIZEOF_PTR) - 1)
+#define CACHE_BIN_NFLUSH_BATCH_MAX                                             \
+	((VARIABLE_ARRAY_SIZE_MAX >> LG_SIZEOF_PTR) - 1)
 
 /*
  * Filling and flushing are done in batch, on arrays of void *s.  For filling,
@@ -638,7 +648,7 @@ struct cache_bin_fill_ctl_s {
 typedef struct cache_bin_ptr_array_s cache_bin_ptr_array_t;
 struct cache_bin_ptr_array_s {
 	cache_bin_sz_t n;
-	void **ptr;
+	void         **ptr;
 };
 
 /*
@@ -650,17 +660,17 @@ struct cache_bin_ptr_array_s {
  * representations is easy (since they'll require an alloca in the calling
  * frame).
  */
-#define CACHE_BIN_PTR_ARRAY_DECLARE(name, nval)				\
-    cache_bin_ptr_array_t name;						\
-    name.n = (nval)
+#define CACHE_BIN_PTR_ARRAY_DECLARE(name, nval)                                \
+	cache_bin_ptr_array_t name;                                            \
+	name.n = (nval)
 
 /*
  * Start a fill.  The bin must be empty, and This must be followed by a
  * finish_fill call before doing any alloc/dalloc operations on the bin.
  */
 static inline void
-cache_bin_init_ptr_array_for_fill(cache_bin_t *bin, cache_bin_ptr_array_t *arr,
-    cache_bin_sz_t nfill) {
+cache_bin_init_ptr_array_for_fill(
+    cache_bin_t *bin, cache_bin_ptr_array_t *arr, cache_bin_sz_t nfill) {
 	cache_bin_assert_empty(bin);
 	arr->ptr = cache_bin_empty_position_get(bin) - nfill;
 }
@@ -671,8 +681,8 @@ cache_bin_init_ptr_array_for_fill(cache_bin_t *bin, cache_bin_ptr_array_t *arr,
  * case of OOM.
  */
 static inline void
-cache_bin_finish_fill(cache_bin_t *bin, cache_bin_ptr_array_t *arr,
-    cache_bin_sz_t nfilled) {
+cache_bin_finish_fill(
+    cache_bin_t *bin, cache_bin_ptr_array_t *arr, cache_bin_sz_t nfilled) {
 	cache_bin_assert_empty(bin);
 	void **empty_position = cache_bin_empty_position_get(bin);
 	if (nfilled < arr->n) {
@@ -687,19 +697,18 @@ cache_bin_finish_fill(cache_bin_t *bin, cache_bin_ptr_array_t *arr,
  * everything we give them.
  */
 static inline void
-cache_bin_init_ptr_array_for_flush(cache_bin_t *bin,
-    cache_bin_ptr_array_t *arr, cache_bin_sz_t nflush) {
+cache_bin_init_ptr_array_for_flush(
+    cache_bin_t *bin, cache_bin_ptr_array_t *arr, cache_bin_sz_t nflush) {
 	arr->ptr = cache_bin_empty_position_get(bin) - nflush;
-	assert(cache_bin_ncached_get_local(bin) == 0
-	    || *arr->ptr != NULL);
+	assert(cache_bin_ncached_get_local(bin) == 0 || *arr->ptr != NULL);
 }
 
 static inline void
-cache_bin_finish_flush(cache_bin_t *bin, cache_bin_ptr_array_t *arr,
-    cache_bin_sz_t nflushed) {
+cache_bin_finish_flush(
+    cache_bin_t *bin, cache_bin_ptr_array_t *arr, cache_bin_sz_t nflushed) {
 	unsigned rem = cache_bin_ncached_get_local(bin) - nflushed;
-	memmove(bin->stack_head + nflushed, bin->stack_head,
-	    rem * sizeof(void *));
+	memmove(
+	    bin->stack_head + nflushed, bin->stack_head, rem * sizeof(void *));
 	bin->stack_head += nflushed;
 	cache_bin_low_water_adjust(bin);
 }
@@ -728,14 +737,14 @@ cache_bin_finish_flush_stashed(cache_bin_t *bin) {
  * Initialize a cache_bin_info to represent up to the given number of items in
  * the cache_bins it is associated with.
  */
-void cache_bin_info_init(cache_bin_info_t *bin_info,
-    cache_bin_sz_t ncached_max);
+void cache_bin_info_init(
+    cache_bin_info_t *bin_info, cache_bin_sz_t ncached_max);
 /*
  * Given an array of initialized cache_bin_info_ts, determine how big an
  * allocation is required to initialize a full set of cache_bin_ts.
  */
-void cache_bin_info_compute_alloc(const cache_bin_info_t *infos,
-    szind_t ninfos, size_t *size, size_t *alignment);
+void cache_bin_info_compute_alloc(const cache_bin_info_t *infos, szind_t ninfos,
+    size_t *size, size_t *alignment);
 
 /*
  * Actually initialize some cache bins.  Callers should allocate the backing
@@ -747,8 +756,8 @@ void cache_bin_info_compute_alloc(const cache_bin_info_t *infos,
 void cache_bin_preincrement(const cache_bin_info_t *infos, szind_t ninfos,
     void *alloc, size_t *cur_offset);
 void cache_bin_postincrement(void *alloc, size_t *cur_offset);
-void cache_bin_init(cache_bin_t *bin, const cache_bin_info_t *info,
-    void *alloc, size_t *cur_offset);
+void cache_bin_init(cache_bin_t *bin, const cache_bin_info_t *info, void *alloc,
+    size_t *cur_offset);
 void cache_bin_init_disabled(cache_bin_t *bin, cache_bin_sz_t ncached_max);
 
 bool cache_bin_stack_use_thp(void);
diff --git a/include/jemalloc/internal/ckh.h b/include/jemalloc/internal/ckh.h
index 8e9d7fed..01b27e8f 100644
--- a/include/jemalloc/internal/ckh.h
+++ b/include/jemalloc/internal/ckh.h
@@ -22,8 +22,8 @@
 #define LG_CKH_BUCKET_CELLS (LG_CACHELINE - LG_SIZEOF_PTR - 1)
 
 /* Typedefs to allow easy function pointer passing. */
-typedef void ckh_hash_t (const void *, size_t[2]);
-typedef bool ckh_keycomp_t (const void *, const void *);
+typedef void ckh_hash_t(const void *, size_t[2]);
+typedef bool ckh_keycomp_t(const void *, const void *);
 
 /* Hash table cell. */
 typedef struct {
@@ -56,7 +56,7 @@ typedef struct {
 	unsigned lg_curbuckets;
 
 	/* Hash and comparison functions. */
-	ckh_hash_t *hash;
+	ckh_hash_t    *hash;
 	ckh_keycomp_t *keycomp;
 
 	/* Hash table with 2^lg_curbuckets buckets. */
@@ -89,8 +89,8 @@ bool ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data);
  * the key and value, and doesn't do any lifetime management.
  */
 bool ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data);
-bool ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key,
-    void **data);
+bool ckh_remove(
+    tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key, void **data);
 bool ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data);
 
 /* Some useful hash and comparison functions for strings and pointers. */
diff --git a/include/jemalloc/internal/counter.h b/include/jemalloc/internal/counter.h
index 74e30701..0f38d40c 100644
--- a/include/jemalloc/internal/counter.h
+++ b/include/jemalloc/internal/counter.h
@@ -8,7 +8,7 @@
 typedef struct counter_accum_s {
 	LOCKEDINT_MTX_DECLARE(mtx)
 	locked_u64_t accumbytes;
-	uint64_t interval;
+	uint64_t     interval;
 } counter_accum_t;
 
 JEMALLOC_ALWAYS_INLINE bool
diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index 1f124bfc..b290411b 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -13,7 +13,7 @@
 #include "jemalloc/internal/stats.h"
 
 /* Maximum ctl tree depth. */
-#define CTL_MAX_DEPTH	7
+#define CTL_MAX_DEPTH 7
 #define CTL_MULTI_SETTING_MAX_LEN 1000
 
 typedef struct ctl_node_s {
@@ -21,37 +21,37 @@ typedef struct ctl_node_s {
 } ctl_node_t;
 
 typedef struct ctl_named_node_s {
-	ctl_node_t node;
+	ctl_node_t  node;
 	const char *name;
 	/* If (nchildren == 0), this is a terminal node. */
-	size_t nchildren;
+	size_t            nchildren;
 	const ctl_node_t *children;
-	int (*ctl)(tsd_t *, const size_t *, size_t, void *, size_t *, void *,
-	    size_t);
+	int (*ctl)(
+	    tsd_t *, const size_t *, size_t, void *, size_t *, void *, size_t);
 } ctl_named_node_t;
 
 typedef struct ctl_indexed_node_s {
 	struct ctl_node_s node;
-	const ctl_named_node_t *(*index)(tsdn_t *, const size_t *, size_t,
-	    size_t);
+	const ctl_named_node_t *(*index)(
+	    tsdn_t *, const size_t *, size_t, size_t);
 } ctl_indexed_node_t;
 
 typedef struct ctl_arena_stats_s {
 	arena_stats_t astats;
 
 	/* Aggregate stats for small size classes, based on bin stats. */
-	size_t allocated_small;
+	size_t   allocated_small;
 	uint64_t nmalloc_small;
 	uint64_t ndalloc_small;
 	uint64_t nrequests_small;
 	uint64_t nfills_small;
 	uint64_t nflushes_small;
 
-	bin_stats_data_t bstats[SC_NBINS];
+	bin_stats_data_t    bstats[SC_NBINS];
 	arena_stats_large_t lstats[SC_NSIZES - SC_NBINS];
-	pac_estats_t estats[SC_NPSIZES];
-	hpa_shard_stats_t hpastats;
-	sec_stats_t secstats;
+	pac_estats_t        estats[SC_NPSIZES];
+	hpa_shard_stats_t   hpastats;
+	sec_stats_t         secstats;
 } ctl_arena_stats_t;
 
 typedef struct ctl_stats_s {
@@ -72,17 +72,17 @@ typedef struct ctl_stats_s {
 typedef struct ctl_arena_s ctl_arena_t;
 struct ctl_arena_s {
 	unsigned arena_ind;
-	bool initialized;
+	bool     initialized;
 	ql_elm(ctl_arena_t) destroyed_link;
 
 	/* Basic stats, supported even if !config_stats. */
-	unsigned nthreads;
+	unsigned    nthreads;
 	const char *dss;
-	ssize_t dirty_decay_ms;
-	ssize_t muzzy_decay_ms;
-	size_t pactive;
-	size_t pdirty;
-	size_t pmuzzy;
+	ssize_t     dirty_decay_ms;
+	ssize_t     muzzy_decay_ms;
+	size_t      pactive;
+	size_t      pdirty;
+	size_t      pmuzzy;
 
 	/* NULL if !config_stats. */
 	ctl_arena_stats_t *astats;
@@ -107,60 +107,67 @@ int ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp,
 int ctl_nametomib(tsd_t *tsd, const char *name, size_t *mibp, size_t *miblenp);
 int ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen);
-int ctl_mibnametomib(tsd_t *tsd, size_t *mib, size_t miblen, const char *name,
-    size_t *miblenp);
-int ctl_bymibname(tsd_t *tsd, size_t *mib, size_t miblen, const char *name,
-    size_t *miblenp, void *oldp, size_t *oldlenp, void *newp, size_t newlen);
+int ctl_mibnametomib(
+    tsd_t *tsd, size_t *mib, size_t miblen, const char *name, size_t *miblenp);
+int  ctl_bymibname(tsd_t *tsd, size_t *mib, size_t miblen, const char *name,
+     size_t *miblenp, void *oldp, size_t *oldlenp, void *newp, size_t newlen);
 bool ctl_boot(void);
 void ctl_prefork(tsdn_t *tsdn);
 void ctl_postfork_parent(tsdn_t *tsdn);
 void ctl_postfork_child(tsdn_t *tsdn);
 void ctl_mtx_assert_held(tsdn_t *tsdn);
 
-#define xmallctl(name, oldp, oldlenp, newp, newlen) do {		\
-	if (je_mallctl(name, oldp, oldlenp, newp, newlen)		\
-	    != 0) {							\
-		malloc_printf(						\
-		    "<jemalloc>: Failure in xmallctl(\"%s\", ...)\n",	\
-		    name);						\
-		abort();						\
-	}								\
-} while (0)
+#define xmallctl(name, oldp, oldlenp, newp, newlen)                            \
+	do {                                                                   \
+		if (je_mallctl(name, oldp, oldlenp, newp, newlen) != 0) {      \
+			malloc_printf(                                         \
+			    "<jemalloc>: Failure in xmallctl(\"%s\", ...)\n",  \
+			    name);                                             \
+			abort();                                               \
+		}                                                              \
+	} while (0)
 
-#define xmallctlnametomib(name, mibp, miblenp) do {			\
-	if (je_mallctlnametomib(name, mibp, miblenp) != 0) {		\
-		malloc_printf("<jemalloc>: Failure in "			\
-		    "xmallctlnametomib(\"%s\", ...)\n", name);		\
-		abort();						\
-	}								\
-} while (0)
+#define xmallctlnametomib(name, mibp, miblenp)                                 \
+	do {                                                                   \
+		if (je_mallctlnametomib(name, mibp, miblenp) != 0) {           \
+			malloc_printf(                                         \
+			    "<jemalloc>: Failure in "                          \
+			    "xmallctlnametomib(\"%s\", ...)\n",                \
+			    name);                                             \
+			abort();                                               \
+		}                                                              \
+	} while (0)
 
-#define xmallctlbymib(mib, miblen, oldp, oldlenp, newp, newlen) do {	\
-	if (je_mallctlbymib(mib, miblen, oldp, oldlenp, newp,		\
-	    newlen) != 0) {						\
-		malloc_write(						\
-		    "<jemalloc>: Failure in xmallctlbymib()\n");	\
-		abort();						\
-	}								\
-} while (0)
+#define xmallctlbymib(mib, miblen, oldp, oldlenp, newp, newlen)                \
+	do {                                                                   \
+		if (je_mallctlbymib(mib, miblen, oldp, oldlenp, newp, newlen)  \
+		    != 0) {                                                    \
+			malloc_write(                                          \
+			    "<jemalloc>: Failure in xmallctlbymib()\n");       \
+			abort();                                               \
+		}                                                              \
+	} while (0)
 
-#define xmallctlmibnametomib(mib, miblen, name, miblenp) do {		\
-	if (ctl_mibnametomib(tsd_fetch(), mib, miblen, name, miblenp)	\
-	    != 0) {							\
-		malloc_write(						\
-		    "<jemalloc>: Failure in ctl_mibnametomib()\n");	\
-		abort();						\
-	}								\
-} while (0)
+#define xmallctlmibnametomib(mib, miblen, name, miblenp)                       \
+	do {                                                                   \
+		if (ctl_mibnametomib(tsd_fetch(), mib, miblen, name, miblenp)  \
+		    != 0) {                                                    \
+			malloc_write(                                          \
+			    "<jemalloc>: Failure in ctl_mibnametomib()\n");    \
+			abort();                                               \
+		}                                                              \
+	} while (0)
 
-#define xmallctlbymibname(mib, miblen, name, miblenp, oldp, oldlenp,	\
-    newp, newlen) do {							\
-	if (ctl_bymibname(tsd_fetch(), mib, miblen, name, miblenp,	\
-	    oldp, oldlenp, newp, newlen) != 0) {			\
-		malloc_write(						\
-		    "<jemalloc>: Failure in ctl_bymibname()\n");	\
-		abort();						\
-	}								\
-} while (0)
+#define xmallctlbymibname(                                                     \
+    mib, miblen, name, miblenp, oldp, oldlenp, newp, newlen)                   \
+	do {                                                                   \
+		if (ctl_bymibname(tsd_fetch(), mib, miblen, name, miblenp,     \
+		        oldp, oldlenp, newp, newlen)                           \
+		    != 0) {                                                    \
+			malloc_write(                                          \
+			    "<jemalloc>: Failure in ctl_bymibname()\n");       \
+			abort();                                               \
+		}                                                              \
+	} while (0)
 
 #endif /* JEMALLOC_INTERNAL_CTL_H */
diff --git a/include/jemalloc/internal/decay.h b/include/jemalloc/internal/decay.h
index 74be55da..e8773655 100644
--- a/include/jemalloc/internal/decay.h
+++ b/include/jemalloc/internal/decay.h
@@ -5,7 +5,7 @@
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/smoothstep.h"
 
-#define DECAY_UNBOUNDED_TIME_TO_PURGE ((uint64_t)-1)
+#define DECAY_UNBOUNDED_TIME_TO_PURGE ((uint64_t) - 1)
 
 /*
  * The decay_t computes the number of pages we should purge at any given time.
@@ -168,12 +168,12 @@ void decay_reinit(decay_t *decay, nstime_t *cur_time, ssize_t decay_ms);
 /*
  * Compute how many of 'npages_new' pages we would need to purge in 'time'.
  */
-uint64_t decay_npages_purge_in(decay_t *decay, nstime_t *time,
-    size_t npages_new);
+uint64_t decay_npages_purge_in(
+    decay_t *decay, nstime_t *time, size_t npages_new);
 
 /* Returns true if the epoch advanced and there are pages to purge. */
-bool decay_maybe_advance_epoch(decay_t *decay, nstime_t *new_time,
-    size_t current_npages);
+bool decay_maybe_advance_epoch(
+    decay_t *decay, nstime_t *new_time, size_t current_npages);
 
 /*
  * Calculates wait time until a number of pages in the interval
@@ -182,7 +182,7 @@ bool decay_maybe_advance_epoch(decay_t *decay, nstime_t *new_time,
  * Returns number of nanoseconds or DECAY_UNBOUNDED_TIME_TO_PURGE in case of
  * indefinite wait.
  */
-uint64_t decay_ns_until_purge(decay_t *decay, size_t npages_current,
-    uint64_t npages_threshold);
+uint64_t decay_ns_until_purge(
+    decay_t *decay, size_t npages_current, uint64_t npages_threshold);
 
 #endif /* JEMALLOC_INTERNAL_DECAY_H */
diff --git a/include/jemalloc/internal/ecache.h b/include/jemalloc/internal/ecache.h
index 2bd74fde..605733b5 100644
--- a/include/jemalloc/internal/ecache.h
+++ b/include/jemalloc/internal/ecache.h
@@ -9,8 +9,8 @@
 typedef struct ecache_s ecache_t;
 struct ecache_s {
 	malloc_mutex_t mtx;
-	eset_t eset;
-	eset_t guarded_eset;
+	eset_t         eset;
+	eset_t         guarded_eset;
 	/* All stored extents must be in the same state. */
 	extent_state_t state;
 	/* The index of the ehooks the ecache is associated with. */
@@ -24,22 +24,22 @@ struct ecache_s {
 
 static inline size_t
 ecache_npages_get(ecache_t *ecache) {
-	return eset_npages_get(&ecache->eset) +
-	    eset_npages_get(&ecache->guarded_eset);
+	return eset_npages_get(&ecache->eset)
+	    + eset_npages_get(&ecache->guarded_eset);
 }
 
 /* Get the number of extents in the given page size index. */
 static inline size_t
 ecache_nextents_get(ecache_t *ecache, pszind_t ind) {
-	return eset_nextents_get(&ecache->eset, ind) +
-	    eset_nextents_get(&ecache->guarded_eset, ind);
+	return eset_nextents_get(&ecache->eset, ind)
+	    + eset_nextents_get(&ecache->guarded_eset, ind);
 }
 
 /* Get the sum total bytes of the extents in the given page size index. */
 static inline size_t
 ecache_nbytes_get(ecache_t *ecache, pszind_t ind) {
-	return eset_nbytes_get(&ecache->eset, ind) +
-	    eset_nbytes_get(&ecache->guarded_eset, ind);
+	return eset_nbytes_get(&ecache->eset, ind)
+	    + eset_nbytes_get(&ecache->guarded_eset, ind);
 }
 
 static inline unsigned
diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index e41e4efa..2b229e7d 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -30,9 +30,9 @@
 #define ESET_ENUMERATE_MAX_NUM 32
 
 enum extent_state_e {
-	extent_state_active   = 0,
-	extent_state_dirty    = 1,
-	extent_state_muzzy    = 2,
+	extent_state_active = 0,
+	extent_state_dirty = 1,
+	extent_state_muzzy = 2,
 	extent_state_retained = 3,
 	extent_state_transition = 4, /* States below are intermediate. */
 	extent_state_merging = 5,
@@ -42,7 +42,7 @@ typedef enum extent_state_e extent_state_t;
 
 enum extent_head_state_e {
 	EXTENT_NOT_HEAD,
-	EXTENT_IS_HEAD   /* See comments in ehooks_default_merge_impl(). */
+	EXTENT_IS_HEAD /* See comments in ehooks_default_merge_impl(). */
 };
 typedef enum extent_head_state_e extent_head_state_t;
 
@@ -50,25 +50,22 @@ typedef enum extent_head_state_e extent_head_state_t;
  * Which implementation of the page allocator interface, (PAI, defined in
  * pai.h) owns the given extent?
  */
-enum extent_pai_e {
-	EXTENT_PAI_PAC = 0,
-	EXTENT_PAI_HPA = 1
-};
+enum extent_pai_e { EXTENT_PAI_PAC = 0, EXTENT_PAI_HPA = 1 };
 typedef enum extent_pai_e extent_pai_t;
 
 struct e_prof_info_s {
 	/* Time when this was allocated. */
-	nstime_t	e_prof_alloc_time;
+	nstime_t e_prof_alloc_time;
 	/* Allocation request size. */
-	size_t		e_prof_alloc_size;
+	size_t e_prof_alloc_size;
 	/* Points to a prof_tctx_t. */
-	atomic_p_t	e_prof_tctx;
+	atomic_p_t e_prof_tctx;
 	/*
 	 * Points to a prof_recent_t for the allocation; NULL
 	 * means the recent allocation record no longer exists.
 	 * Protected by prof_recent_alloc_mtx.
 	 */
-	atomic_p_t	e_prof_recent_alloc;
+	atomic_p_t e_prof_recent_alloc;
 };
 typedef struct e_prof_info_s e_prof_info_t;
 
@@ -85,13 +82,13 @@ typedef struct e_prof_info_s e_prof_info_t;
  */
 typedef struct edata_map_info_s edata_map_info_t;
 struct edata_map_info_s {
-	bool slab;
+	bool    slab;
 	szind_t szind;
 };
 
 typedef struct edata_cmp_summary_s edata_cmp_summary_t;
 struct edata_cmp_summary_s {
-	uint64_t sn;
+	uint64_t  sn;
 	uintptr_t addr;
 };
 
@@ -149,55 +146,72 @@ struct edata_s {
 	 *
 	 * bin_shard: the shard of the bin from which this extent came.
 	 */
-	uint64_t		e_bits;
-#define MASK(CURRENT_FIELD_WIDTH, CURRENT_FIELD_SHIFT) ((((((uint64_t)0x1U) << (CURRENT_FIELD_WIDTH)) - 1)) << (CURRENT_FIELD_SHIFT))
+	uint64_t e_bits;
+#define MASK(CURRENT_FIELD_WIDTH, CURRENT_FIELD_SHIFT)                         \
+	((((((uint64_t)0x1U) << (CURRENT_FIELD_WIDTH)) - 1))                   \
+	    << (CURRENT_FIELD_SHIFT))
 
-#define EDATA_BITS_ARENA_WIDTH  MALLOCX_ARENA_BITS
-#define EDATA_BITS_ARENA_SHIFT  0
-#define EDATA_BITS_ARENA_MASK  MASK(EDATA_BITS_ARENA_WIDTH, EDATA_BITS_ARENA_SHIFT)
+#define EDATA_BITS_ARENA_WIDTH MALLOCX_ARENA_BITS
+#define EDATA_BITS_ARENA_SHIFT 0
+#define EDATA_BITS_ARENA_MASK                                                  \
+	MASK(EDATA_BITS_ARENA_WIDTH, EDATA_BITS_ARENA_SHIFT)
 
-#define EDATA_BITS_SLAB_WIDTH  1
-#define EDATA_BITS_SLAB_SHIFT  (EDATA_BITS_ARENA_WIDTH + EDATA_BITS_ARENA_SHIFT)
-#define EDATA_BITS_SLAB_MASK  MASK(EDATA_BITS_SLAB_WIDTH, EDATA_BITS_SLAB_SHIFT)
+#define EDATA_BITS_SLAB_WIDTH 1
+#define EDATA_BITS_SLAB_SHIFT (EDATA_BITS_ARENA_WIDTH + EDATA_BITS_ARENA_SHIFT)
+#define EDATA_BITS_SLAB_MASK MASK(EDATA_BITS_SLAB_WIDTH, EDATA_BITS_SLAB_SHIFT)
 
-#define EDATA_BITS_COMMITTED_WIDTH  1
-#define EDATA_BITS_COMMITTED_SHIFT  (EDATA_BITS_SLAB_WIDTH + EDATA_BITS_SLAB_SHIFT)
-#define EDATA_BITS_COMMITTED_MASK  MASK(EDATA_BITS_COMMITTED_WIDTH, EDATA_BITS_COMMITTED_SHIFT)
+#define EDATA_BITS_COMMITTED_WIDTH 1
+#define EDATA_BITS_COMMITTED_SHIFT                                             \
+	(EDATA_BITS_SLAB_WIDTH + EDATA_BITS_SLAB_SHIFT)
+#define EDATA_BITS_COMMITTED_MASK                                              \
+	MASK(EDATA_BITS_COMMITTED_WIDTH, EDATA_BITS_COMMITTED_SHIFT)
 
-#define EDATA_BITS_PAI_WIDTH  1
-#define EDATA_BITS_PAI_SHIFT  (EDATA_BITS_COMMITTED_WIDTH + EDATA_BITS_COMMITTED_SHIFT)
-#define EDATA_BITS_PAI_MASK  MASK(EDATA_BITS_PAI_WIDTH, EDATA_BITS_PAI_SHIFT)
+#define EDATA_BITS_PAI_WIDTH 1
+#define EDATA_BITS_PAI_SHIFT                                                   \
+	(EDATA_BITS_COMMITTED_WIDTH + EDATA_BITS_COMMITTED_SHIFT)
+#define EDATA_BITS_PAI_MASK MASK(EDATA_BITS_PAI_WIDTH, EDATA_BITS_PAI_SHIFT)
 
-#define EDATA_BITS_ZEROED_WIDTH  1
-#define EDATA_BITS_ZEROED_SHIFT  (EDATA_BITS_PAI_WIDTH + EDATA_BITS_PAI_SHIFT)
-#define EDATA_BITS_ZEROED_MASK  MASK(EDATA_BITS_ZEROED_WIDTH, EDATA_BITS_ZEROED_SHIFT)
+#define EDATA_BITS_ZEROED_WIDTH 1
+#define EDATA_BITS_ZEROED_SHIFT (EDATA_BITS_PAI_WIDTH + EDATA_BITS_PAI_SHIFT)
+#define EDATA_BITS_ZEROED_MASK                                                 \
+	MASK(EDATA_BITS_ZEROED_WIDTH, EDATA_BITS_ZEROED_SHIFT)
 
-#define EDATA_BITS_GUARDED_WIDTH  1
-#define EDATA_BITS_GUARDED_SHIFT  (EDATA_BITS_ZEROED_WIDTH + EDATA_BITS_ZEROED_SHIFT)
-#define EDATA_BITS_GUARDED_MASK  MASK(EDATA_BITS_GUARDED_WIDTH, EDATA_BITS_GUARDED_SHIFT)
+#define EDATA_BITS_GUARDED_WIDTH 1
+#define EDATA_BITS_GUARDED_SHIFT                                               \
+	(EDATA_BITS_ZEROED_WIDTH + EDATA_BITS_ZEROED_SHIFT)
+#define EDATA_BITS_GUARDED_MASK                                                \
+	MASK(EDATA_BITS_GUARDED_WIDTH, EDATA_BITS_GUARDED_SHIFT)
 
-#define EDATA_BITS_STATE_WIDTH  3
-#define EDATA_BITS_STATE_SHIFT  (EDATA_BITS_GUARDED_WIDTH + EDATA_BITS_GUARDED_SHIFT)
-#define EDATA_BITS_STATE_MASK  MASK(EDATA_BITS_STATE_WIDTH, EDATA_BITS_STATE_SHIFT)
+#define EDATA_BITS_STATE_WIDTH 3
+#define EDATA_BITS_STATE_SHIFT                                                 \
+	(EDATA_BITS_GUARDED_WIDTH + EDATA_BITS_GUARDED_SHIFT)
+#define EDATA_BITS_STATE_MASK                                                  \
+	MASK(EDATA_BITS_STATE_WIDTH, EDATA_BITS_STATE_SHIFT)
 
-#define EDATA_BITS_SZIND_WIDTH  LG_CEIL(SC_NSIZES)
-#define EDATA_BITS_SZIND_SHIFT  (EDATA_BITS_STATE_WIDTH + EDATA_BITS_STATE_SHIFT)
-#define EDATA_BITS_SZIND_MASK  MASK(EDATA_BITS_SZIND_WIDTH, EDATA_BITS_SZIND_SHIFT)
+#define EDATA_BITS_SZIND_WIDTH LG_CEIL(SC_NSIZES)
+#define EDATA_BITS_SZIND_SHIFT (EDATA_BITS_STATE_WIDTH + EDATA_BITS_STATE_SHIFT)
+#define EDATA_BITS_SZIND_MASK                                                  \
+	MASK(EDATA_BITS_SZIND_WIDTH, EDATA_BITS_SZIND_SHIFT)
 
-#define EDATA_BITS_NFREE_WIDTH  (SC_LG_SLAB_MAXREGS + 1)
-#define EDATA_BITS_NFREE_SHIFT  (EDATA_BITS_SZIND_WIDTH + EDATA_BITS_SZIND_SHIFT)
-#define EDATA_BITS_NFREE_MASK  MASK(EDATA_BITS_NFREE_WIDTH, EDATA_BITS_NFREE_SHIFT)
+#define EDATA_BITS_NFREE_WIDTH (SC_LG_SLAB_MAXREGS + 1)
+#define EDATA_BITS_NFREE_SHIFT (EDATA_BITS_SZIND_WIDTH + EDATA_BITS_SZIND_SHIFT)
+#define EDATA_BITS_NFREE_MASK                                                  \
+	MASK(EDATA_BITS_NFREE_WIDTH, EDATA_BITS_NFREE_SHIFT)
 
-#define EDATA_BITS_BINSHARD_WIDTH  6
-#define EDATA_BITS_BINSHARD_SHIFT  (EDATA_BITS_NFREE_WIDTH + EDATA_BITS_NFREE_SHIFT)
-#define EDATA_BITS_BINSHARD_MASK  MASK(EDATA_BITS_BINSHARD_WIDTH, EDATA_BITS_BINSHARD_SHIFT)
+#define EDATA_BITS_BINSHARD_WIDTH 6
+#define EDATA_BITS_BINSHARD_SHIFT                                              \
+	(EDATA_BITS_NFREE_WIDTH + EDATA_BITS_NFREE_SHIFT)
+#define EDATA_BITS_BINSHARD_MASK                                               \
+	MASK(EDATA_BITS_BINSHARD_WIDTH, EDATA_BITS_BINSHARD_SHIFT)
 
 #define EDATA_BITS_IS_HEAD_WIDTH 1
-#define EDATA_BITS_IS_HEAD_SHIFT  (EDATA_BITS_BINSHARD_WIDTH + EDATA_BITS_BINSHARD_SHIFT)
-#define EDATA_BITS_IS_HEAD_MASK  MASK(EDATA_BITS_IS_HEAD_WIDTH, EDATA_BITS_IS_HEAD_SHIFT)
+#define EDATA_BITS_IS_HEAD_SHIFT                                               \
+	(EDATA_BITS_BINSHARD_WIDTH + EDATA_BITS_BINSHARD_SHIFT)
+#define EDATA_BITS_IS_HEAD_MASK                                                \
+	MASK(EDATA_BITS_IS_HEAD_WIDTH, EDATA_BITS_IS_HEAD_SHIFT)
 
 	/* Pointer to the extent that this structure is responsible for. */
-	void			*e_addr;
+	void *e_addr;
 
 	union {
 		/*
@@ -207,11 +221,11 @@ struct edata_s {
 		 *
 		 * ssssssss [...] ssssssss ssssnnnn nnnnnnnn
 		 */
-		size_t			e_size_esn;
-	#define EDATA_SIZE_MASK	((size_t)~(PAGE-1))
-	#define EDATA_ESN_MASK		((size_t)PAGE-1)
+		size_t e_size_esn;
+#define EDATA_SIZE_MASK ((size_t) ~(PAGE - 1))
+#define EDATA_ESN_MASK ((size_t)PAGE - 1)
 		/* Base extent size, which may not be a multiple of PAGE. */
-		size_t			e_bsize;
+		size_t e_bsize;
 	};
 
 	/*
@@ -232,7 +246,7 @@ struct edata_s {
 		 * List linkage used when the edata_t is active; either in
 		 * arena's large allocations or bin_t's slabs_full.
 		 */
-		ql_elm(edata_t)	ql_link_active;
+		ql_elm(edata_t) ql_link_active;
 		/*
 		 * Pairing heap linkage.  Used whenever the extent is inactive
 		 * (in the page allocators), or when it is active and in
@@ -240,7 +254,7 @@ struct edata_s {
 		 * extent and sitting in an edata_cache.
 		 */
 		union {
-			edata_heap_link_t heap_link;
+			edata_heap_link_t  heap_link;
 			edata_avail_link_t avail_link;
 		};
 	};
@@ -253,10 +267,10 @@ struct edata_s {
 		 */
 		ql_elm(edata_t) ql_link_inactive;
 		/* Small region slab metadata. */
-		slab_data_t	e_slab_data;
+		slab_data_t e_slab_data;
 
 		/* Profiling data, used for large objects. */
-		e_prof_info_t	e_prof_info;
+		e_prof_info_t e_prof_info;
 	};
 };
 
@@ -265,8 +279,8 @@ TYPED_LIST(edata_list_inactive, edata_t, ql_link_inactive)
 
 static inline unsigned
 edata_arena_ind_get(const edata_t *edata) {
-	unsigned arena_ind = (unsigned)((edata->e_bits &
-	    EDATA_BITS_ARENA_MASK) >> EDATA_BITS_ARENA_SHIFT);
+	unsigned arena_ind = (unsigned)((edata->e_bits & EDATA_BITS_ARENA_MASK)
+	    >> EDATA_BITS_ARENA_SHIFT);
 	assert(arena_ind < MALLOCX_ARENA_LIMIT);
 
 	return arena_ind;
@@ -274,8 +288,8 @@ edata_arena_ind_get(const edata_t *edata) {
 
 static inline szind_t
 edata_szind_get_maybe_invalid(const edata_t *edata) {
-	szind_t szind = (szind_t)((edata->e_bits & EDATA_BITS_SZIND_MASK) >>
-	    EDATA_BITS_SZIND_SHIFT);
+	szind_t szind = (szind_t)((edata->e_bits & EDATA_BITS_SZIND_MASK)
+	    >> EDATA_BITS_SZIND_SHIFT);
 	assert(szind <= SC_NSIZES);
 	return szind;
 }
@@ -318,8 +332,8 @@ edata_usize_get(const edata_t *edata) {
 
 	if (!sz_large_size_classes_disabled() || szind < SC_NBINS) {
 		size_t usize_from_ind = sz_index2size(szind);
-		if (!sz_large_size_classes_disabled() &&
-		    usize_from_ind >= SC_LARGE_MINCLASS) {
+		if (!sz_large_size_classes_disabled()
+		    && usize_from_ind >= SC_LARGE_MINCLASS) {
 			size_t size = (edata->e_size_esn & EDATA_SIZE_MASK);
 			assert(size > sz_large_pad);
 			size_t usize_from_size = size - sz_large_pad;
@@ -341,8 +355,9 @@ edata_usize_get(const edata_t *edata) {
 
 static inline unsigned
 edata_binshard_get(const edata_t *edata) {
-	unsigned binshard = (unsigned)((edata->e_bits &
-	    EDATA_BITS_BINSHARD_MASK) >> EDATA_BITS_BINSHARD_SHIFT);
+	unsigned binshard = (unsigned)((edata->e_bits
+	                                   & EDATA_BITS_BINSHARD_MASK)
+	    >> EDATA_BITS_BINSHARD_SHIFT);
 	assert(binshard < bin_infos[edata_szind_get(edata)].n_shards);
 	return binshard;
 }
@@ -354,58 +369,58 @@ edata_sn_get(const edata_t *edata) {
 
 static inline extent_state_t
 edata_state_get(const edata_t *edata) {
-	return (extent_state_t)((edata->e_bits & EDATA_BITS_STATE_MASK) >>
-	    EDATA_BITS_STATE_SHIFT);
+	return (extent_state_t)((edata->e_bits & EDATA_BITS_STATE_MASK)
+	    >> EDATA_BITS_STATE_SHIFT);
 }
 
 static inline bool
 edata_guarded_get(const edata_t *edata) {
-	return (bool)((edata->e_bits & EDATA_BITS_GUARDED_MASK) >>
-	    EDATA_BITS_GUARDED_SHIFT);
+	return (bool)((edata->e_bits & EDATA_BITS_GUARDED_MASK)
+	    >> EDATA_BITS_GUARDED_SHIFT);
 }
 
 static inline bool
 edata_zeroed_get(const edata_t *edata) {
-	return (bool)((edata->e_bits & EDATA_BITS_ZEROED_MASK) >>
-	    EDATA_BITS_ZEROED_SHIFT);
+	return (bool)((edata->e_bits & EDATA_BITS_ZEROED_MASK)
+	    >> EDATA_BITS_ZEROED_SHIFT);
 }
 
 static inline bool
 edata_committed_get(const edata_t *edata) {
-	return (bool)((edata->e_bits & EDATA_BITS_COMMITTED_MASK) >>
-	    EDATA_BITS_COMMITTED_SHIFT);
+	return (bool)((edata->e_bits & EDATA_BITS_COMMITTED_MASK)
+	    >> EDATA_BITS_COMMITTED_SHIFT);
 }
 
 static inline extent_pai_t
 edata_pai_get(const edata_t *edata) {
-	return (extent_pai_t)((edata->e_bits & EDATA_BITS_PAI_MASK) >>
-	    EDATA_BITS_PAI_SHIFT);
+	return (extent_pai_t)((edata->e_bits & EDATA_BITS_PAI_MASK)
+	    >> EDATA_BITS_PAI_SHIFT);
 }
 
 static inline bool
 edata_slab_get(const edata_t *edata) {
-	return (bool)((edata->e_bits & EDATA_BITS_SLAB_MASK) >>
-	    EDATA_BITS_SLAB_SHIFT);
+	return (bool)((edata->e_bits & EDATA_BITS_SLAB_MASK)
+	    >> EDATA_BITS_SLAB_SHIFT);
 }
 
 static inline unsigned
 edata_nfree_get(const edata_t *edata) {
 	assert(edata_slab_get(edata));
-	return (unsigned)((edata->e_bits & EDATA_BITS_NFREE_MASK) >>
-	    EDATA_BITS_NFREE_SHIFT);
+	return (unsigned)((edata->e_bits & EDATA_BITS_NFREE_MASK)
+	    >> EDATA_BITS_NFREE_SHIFT);
 }
 
 static inline void *
 edata_base_get(const edata_t *edata) {
-	assert(edata->e_addr == PAGE_ADDR2BASE(edata->e_addr) ||
-	    !edata_slab_get(edata));
+	assert(edata->e_addr == PAGE_ADDR2BASE(edata->e_addr)
+	    || !edata_slab_get(edata));
 	return PAGE_ADDR2BASE(edata->e_addr);
 }
 
 static inline void *
 edata_addr_get(const edata_t *edata) {
-	assert(edata->e_addr == PAGE_ADDR2BASE(edata->e_addr) ||
-	    !edata_slab_get(edata));
+	assert(edata->e_addr == PAGE_ADDR2BASE(edata->e_addr)
+	    || !edata_slab_get(edata));
 	return edata->e_addr;
 }
 
@@ -437,14 +452,14 @@ edata_before_get(const edata_t *edata) {
 
 static inline void *
 edata_last_get(const edata_t *edata) {
-	return (void *)((byte_t *)edata_base_get(edata) +
-	    edata_size_get(edata) - PAGE);
+	return (void *)((byte_t *)edata_base_get(edata) + edata_size_get(edata)
+	    - PAGE);
 }
 
 static inline void *
 edata_past_get(const edata_t *edata) {
-	return (void *)((byte_t *)edata_base_get(edata) +
-	    edata_size_get(edata));
+	return (
+	    void *)((byte_t *)edata_base_get(edata) + edata_size_get(edata));
 }
 
 static inline slab_data_t *
@@ -461,8 +476,8 @@ edata_slab_data_get_const(const edata_t *edata) {
 
 static inline prof_tctx_t *
 edata_prof_tctx_get(const edata_t *edata) {
-	return (prof_tctx_t *)atomic_load_p(&edata->e_prof_info.e_prof_tctx,
-	    ATOMIC_ACQUIRE);
+	return (prof_tctx_t *)atomic_load_p(
+	    &edata->e_prof_info.e_prof_tctx, ATOMIC_ACQUIRE);
 }
 
 static inline const nstime_t *
@@ -483,16 +498,16 @@ edata_prof_recent_alloc_get_dont_call_directly(const edata_t *edata) {
 
 static inline void
 edata_arena_ind_set(edata_t *edata, unsigned arena_ind) {
-	edata->e_bits = (edata->e_bits & ~EDATA_BITS_ARENA_MASK) |
-	    ((uint64_t)arena_ind << EDATA_BITS_ARENA_SHIFT);
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_ARENA_MASK)
+	    | ((uint64_t)arena_ind << EDATA_BITS_ARENA_SHIFT);
 }
 
 static inline void
 edata_binshard_set(edata_t *edata, unsigned binshard) {
 	/* The assertion assumes szind is set already. */
 	assert(binshard < bin_infos[edata_szind_get(edata)].n_shards);
-	edata->e_bits = (edata->e_bits & ~EDATA_BITS_BINSHARD_MASK) |
-	    ((uint64_t)binshard << EDATA_BITS_BINSHARD_SHIFT);
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_BINSHARD_MASK)
+	    | ((uint64_t)binshard << EDATA_BITS_BINSHARD_SHIFT);
 }
 
 static inline void
@@ -508,8 +523,8 @@ edata_size_set(edata_t *edata, size_t size) {
 
 static inline void
 edata_esn_set(edata_t *edata, size_t esn) {
-	edata->e_size_esn = (edata->e_size_esn & ~EDATA_ESN_MASK) | (esn &
-	    EDATA_ESN_MASK);
+	edata->e_size_esn = (edata->e_size_esn & ~EDATA_ESN_MASK)
+	    | (esn & EDATA_ESN_MASK);
 }
 
 static inline void
@@ -526,25 +541,26 @@ edata_ps_set(edata_t *edata, hpdata_t *ps) {
 static inline void
 edata_szind_set(edata_t *edata, szind_t szind) {
 	assert(szind <= SC_NSIZES); /* SC_NSIZES means "invalid". */
-	edata->e_bits = (edata->e_bits & ~EDATA_BITS_SZIND_MASK) |
-	    ((uint64_t)szind << EDATA_BITS_SZIND_SHIFT);
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_SZIND_MASK)
+	    | ((uint64_t)szind << EDATA_BITS_SZIND_SHIFT);
 }
 
 static inline void
 edata_nfree_set(edata_t *edata, unsigned nfree) {
 	assert(edata_slab_get(edata));
-	edata->e_bits = (edata->e_bits & ~EDATA_BITS_NFREE_MASK) |
-	    ((uint64_t)nfree << EDATA_BITS_NFREE_SHIFT);
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_NFREE_MASK)
+	    | ((uint64_t)nfree << EDATA_BITS_NFREE_SHIFT);
 }
 
 static inline void
 edata_nfree_binshard_set(edata_t *edata, unsigned nfree, unsigned binshard) {
 	/* The assertion assumes szind is set already. */
 	assert(binshard < bin_infos[edata_szind_get(edata)].n_shards);
-	edata->e_bits = (edata->e_bits &
-	    (~EDATA_BITS_NFREE_MASK & ~EDATA_BITS_BINSHARD_MASK)) |
-	    ((uint64_t)binshard << EDATA_BITS_BINSHARD_SHIFT) |
-	    ((uint64_t)nfree << EDATA_BITS_NFREE_SHIFT);
+	edata->e_bits = (edata->e_bits
+	                    & (~EDATA_BITS_NFREE_MASK
+	                        & ~EDATA_BITS_BINSHARD_MASK))
+	    | ((uint64_t)binshard << EDATA_BITS_BINSHARD_SHIFT)
+	    | ((uint64_t)nfree << EDATA_BITS_NFREE_SHIFT);
 }
 
 static inline void
@@ -572,38 +588,38 @@ edata_sn_set(edata_t *edata, uint64_t sn) {
 
 static inline void
 edata_state_set(edata_t *edata, extent_state_t state) {
-	edata->e_bits = (edata->e_bits & ~EDATA_BITS_STATE_MASK) |
-	    ((uint64_t)state << EDATA_BITS_STATE_SHIFT);
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_STATE_MASK)
+	    | ((uint64_t)state << EDATA_BITS_STATE_SHIFT);
 }
 
 static inline void
 edata_guarded_set(edata_t *edata, bool guarded) {
-	edata->e_bits = (edata->e_bits & ~EDATA_BITS_GUARDED_MASK) |
-	    ((uint64_t)guarded << EDATA_BITS_GUARDED_SHIFT);
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_GUARDED_MASK)
+	    | ((uint64_t)guarded << EDATA_BITS_GUARDED_SHIFT);
 }
 
 static inline void
 edata_zeroed_set(edata_t *edata, bool zeroed) {
-	edata->e_bits = (edata->e_bits & ~EDATA_BITS_ZEROED_MASK) |
-	    ((uint64_t)zeroed << EDATA_BITS_ZEROED_SHIFT);
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_ZEROED_MASK)
+	    | ((uint64_t)zeroed << EDATA_BITS_ZEROED_SHIFT);
 }
 
 static inline void
 edata_committed_set(edata_t *edata, bool committed) {
-	edata->e_bits = (edata->e_bits & ~EDATA_BITS_COMMITTED_MASK) |
-	    ((uint64_t)committed << EDATA_BITS_COMMITTED_SHIFT);
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_COMMITTED_MASK)
+	    | ((uint64_t)committed << EDATA_BITS_COMMITTED_SHIFT);
 }
 
 static inline void
 edata_pai_set(edata_t *edata, extent_pai_t pai) {
-	edata->e_bits = (edata->e_bits & ~EDATA_BITS_PAI_MASK) |
-	    ((uint64_t)pai << EDATA_BITS_PAI_SHIFT);
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_PAI_MASK)
+	    | ((uint64_t)pai << EDATA_BITS_PAI_SHIFT);
 }
 
 static inline void
 edata_slab_set(edata_t *edata, bool slab) {
-	edata->e_bits = (edata->e_bits & ~EDATA_BITS_SLAB_MASK) |
-	    ((uint64_t)slab << EDATA_BITS_SLAB_SHIFT);
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_SLAB_MASK)
+	    | ((uint64_t)slab << EDATA_BITS_SLAB_SHIFT);
 }
 
 static inline void
@@ -622,22 +638,22 @@ edata_prof_alloc_size_set(edata_t *edata, size_t size) {
 }
 
 static inline void
-edata_prof_recent_alloc_set_dont_call_directly(edata_t *edata,
-    prof_recent_t *recent_alloc) {
+edata_prof_recent_alloc_set_dont_call_directly(
+    edata_t *edata, prof_recent_t *recent_alloc) {
 	atomic_store_p(&edata->e_prof_info.e_prof_recent_alloc, recent_alloc,
 	    ATOMIC_RELAXED);
 }
 
 static inline bool
 edata_is_head_get(edata_t *edata) {
-	return (bool)((edata->e_bits & EDATA_BITS_IS_HEAD_MASK) >>
-	    EDATA_BITS_IS_HEAD_SHIFT);
+	return (bool)((edata->e_bits & EDATA_BITS_IS_HEAD_MASK)
+	    >> EDATA_BITS_IS_HEAD_SHIFT);
 }
 
 static inline void
 edata_is_head_set(edata_t *edata, bool is_head) {
-	edata->e_bits = (edata->e_bits & ~EDATA_BITS_IS_HEAD_MASK) |
-	    ((uint64_t)is_head << EDATA_BITS_IS_HEAD_SHIFT);
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_IS_HEAD_MASK)
+	    | ((uint64_t)is_head << EDATA_BITS_IS_HEAD_SHIFT);
 }
 
 static inline bool
@@ -676,8 +692,8 @@ edata_init(edata_t *edata, unsigned arena_ind, void *addr, size_t size,
 }
 
 static inline void
-edata_binit(edata_t *edata, void *addr, size_t bsize, uint64_t sn,
-    bool reused) {
+edata_binit(
+    edata_t *edata, void *addr, size_t bsize, uint64_t sn, bool reused) {
 	edata_arena_ind_set(edata, (1U << MALLOCX_ARENA_BITS) - 1);
 	edata_addr_set(edata, addr);
 	edata_bsize_set(edata, bsize);
@@ -729,11 +745,13 @@ edata_cmp_summary_encode(edata_cmp_summary_t src) {
 
 static inline int
 edata_cmp_summary_comp(edata_cmp_summary_t a, edata_cmp_summary_t b) {
-    unsigned __int128 a_encoded = edata_cmp_summary_encode(a);
-    unsigned __int128 b_encoded = edata_cmp_summary_encode(b);
-    if (a_encoded < b_encoded) return -1;
-    if (a_encoded == b_encoded) return 0;
-    return 1;
+	unsigned __int128 a_encoded = edata_cmp_summary_encode(a);
+	unsigned __int128 b_encoded = edata_cmp_summary_encode(b);
+	if (a_encoded < b_encoded)
+		return -1;
+	if (a_encoded == b_encoded)
+		return 0;
+	return 1;
 }
 #else
 static inline int
@@ -750,8 +768,8 @@ edata_cmp_summary_comp(edata_cmp_summary_t a, edata_cmp_summary_t b) {
 	 * prediction accuracy is not great. As a result, this implementation
 	 * is measurably faster (by around 30%).
 	 */
-	return (2 * ((a.sn > b.sn) - (a.sn < b.sn))) +
-	       ((a.addr > b.addr) - (a.addr < b.addr));
+	return (2 * ((a.sn > b.sn) - (a.sn < b.sn)))
+	    + ((a.addr > b.addr) - (a.addr < b.addr));
 }
 #endif
 
@@ -772,7 +790,6 @@ edata_esnead_comp(const edata_t *a, const edata_t *b) {
 	return (2 * edata_esn_comp(a, b)) + edata_ead_comp(a, b);
 }
 
-ph_proto(, edata_avail, edata_t)
-ph_proto(, edata_heap, edata_t)
+ph_proto(, edata_avail, edata_t) ph_proto(, edata_heap, edata_t)
 
 #endif /* JEMALLOC_INTERNAL_EDATA_H */
diff --git a/include/jemalloc/internal/edata_cache.h b/include/jemalloc/internal/edata_cache.h
index b2c7b4f1..d92d90cb 100644
--- a/include/jemalloc/internal/edata_cache.h
+++ b/include/jemalloc/internal/edata_cache.h
@@ -15,13 +15,13 @@
 
 typedef struct edata_cache_s edata_cache_t;
 struct edata_cache_s {
-	edata_avail_t avail;
-	atomic_zu_t count;
+	edata_avail_t  avail;
+	atomic_zu_t    count;
 	malloc_mutex_t mtx;
-	base_t *base;
+	base_t        *base;
 };
 
-bool edata_cache_init(edata_cache_t *edata_cache, base_t *base);
+bool     edata_cache_init(edata_cache_t *edata_cache, base_t *base);
 edata_t *edata_cache_get(tsdn_t *tsdn, edata_cache_t *edata_cache);
 void edata_cache_put(tsdn_t *tsdn, edata_cache_t *edata_cache, edata_t *edata);
 
@@ -37,14 +37,14 @@ void edata_cache_postfork_child(tsdn_t *tsdn, edata_cache_t *edata_cache);
 typedef struct edata_cache_fast_s edata_cache_fast_t;
 struct edata_cache_fast_s {
 	edata_list_inactive_t list;
-	edata_cache_t *fallback;
-	bool disabled;
+	edata_cache_t        *fallback;
+	bool                  disabled;
 };
 
 void edata_cache_fast_init(edata_cache_fast_t *ecs, edata_cache_t *fallback);
 edata_t *edata_cache_fast_get(tsdn_t *tsdn, edata_cache_fast_t *ecs);
-void edata_cache_fast_put(tsdn_t *tsdn, edata_cache_fast_t *ecs,
-    edata_t *edata);
+void     edata_cache_fast_put(
+        tsdn_t *tsdn, edata_cache_fast_t *ecs, edata_t *edata);
 void edata_cache_fast_disable(tsdn_t *tsdn, edata_cache_fast_t *ecs);
 
 #endif /* JEMALLOC_INTERNAL_EDATA_CACHE_H */
diff --git a/include/jemalloc/internal/ehooks.h b/include/jemalloc/internal/ehooks.h
index 947e056c..c65e189a 100644
--- a/include/jemalloc/internal/ehooks.h
+++ b/include/jemalloc/internal/ehooks.h
@@ -46,10 +46,10 @@ extern const extent_hooks_t ehooks_default_extent_hooks;
  */
 void *ehooks_default_alloc_impl(tsdn_t *tsdn, void *new_addr, size_t size,
     size_t alignment, bool *zero, bool *commit, unsigned arena_ind);
-bool ehooks_default_dalloc_impl(void *addr, size_t size);
-void ehooks_default_destroy_impl(void *addr, size_t size);
-bool ehooks_default_commit_impl(void *addr, size_t offset, size_t length);
-bool ehooks_default_decommit_impl(void *addr, size_t offset, size_t length);
+bool  ehooks_default_dalloc_impl(void *addr, size_t size);
+void  ehooks_default_destroy_impl(void *addr, size_t size);
+bool  ehooks_default_commit_impl(void *addr, size_t offset, size_t length);
+bool  ehooks_default_decommit_impl(void *addr, size_t offset, size_t length);
 #ifdef PAGES_CAN_PURGE_LAZY
 bool ehooks_default_purge_lazy_impl(void *addr, size_t offset, size_t length);
 #endif
@@ -116,8 +116,8 @@ ehooks_get_extent_hooks_ptr(ehooks_t *ehooks) {
 
 static inline bool
 ehooks_are_default(ehooks_t *ehooks) {
-	return ehooks_get_extent_hooks_ptr(ehooks) ==
-	    &ehooks_default_extent_hooks;
+	return ehooks_get_extent_hooks_ptr(ehooks)
+	    == &ehooks_default_extent_hooks;
 }
 
 /*
@@ -189,16 +189,15 @@ ehooks_debug_zero_check(void *addr, size_t size) {
 	}
 }
 
-
 static inline void *
 ehooks_alloc(tsdn_t *tsdn, ehooks_t *ehooks, void *new_addr, size_t size,
     size_t alignment, bool *zero, bool *commit) {
-	bool orig_zero = *zero;
-	void *ret;
+	bool            orig_zero = *zero;
+	void           *ret;
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
 	if (extent_hooks == &ehooks_default_extent_hooks) {
-		ret = ehooks_default_alloc_impl(tsdn, new_addr, size,
-		    alignment, zero, commit, ehooks_ind_get(ehooks));
+		ret = ehooks_default_alloc_impl(tsdn, new_addr, size, alignment,
+		    zero, commit, ehooks_ind_get(ehooks));
 	} else {
 		ehooks_pre_reentrancy(tsdn);
 		ret = extent_hooks->alloc(extent_hooks, new_addr, size,
@@ -214,8 +213,8 @@ ehooks_alloc(tsdn_t *tsdn, ehooks_t *ehooks, void *new_addr, size_t size,
 }
 
 static inline bool
-ehooks_dalloc(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
-    bool committed) {
+ehooks_dalloc(
+    tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size, bool committed) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
 	if (extent_hooks == &ehooks_default_extent_hooks) {
 		return ehooks_default_dalloc_impl(addr, size);
@@ -231,8 +230,8 @@ ehooks_dalloc(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
 }
 
 static inline void
-ehooks_destroy(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
-    bool committed) {
+ehooks_destroy(
+    tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size, bool committed) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
 	if (extent_hooks == &ehooks_default_extent_hooks) {
 		ehooks_default_destroy_impl(addr, size);
@@ -250,15 +249,15 @@ static inline bool
 ehooks_commit(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
     size_t offset, size_t length) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
-	bool err;
+	bool            err;
 	if (extent_hooks == &ehooks_default_extent_hooks) {
 		err = ehooks_default_commit_impl(addr, offset, length);
 	} else if (extent_hooks->commit == NULL) {
 		err = true;
 	} else {
 		ehooks_pre_reentrancy(tsdn);
-		err = extent_hooks->commit(extent_hooks, addr, size,
-		    offset, length, ehooks_ind_get(ehooks));
+		err = extent_hooks->commit(extent_hooks, addr, size, offset,
+		    length, ehooks_ind_get(ehooks));
 		ehooks_post_reentrancy(tsdn);
 	}
 	if (!err) {
@@ -384,7 +383,7 @@ ehooks_zero(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size) {
 
 static inline bool
 ehooks_guard(tsdn_t *tsdn, ehooks_t *ehooks, void *guard1, void *guard2) {
-	bool err;
+	bool            err;
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
 
 	if (extent_hooks == &ehooks_default_extent_hooks) {
@@ -399,7 +398,7 @@ ehooks_guard(tsdn_t *tsdn, ehooks_t *ehooks, void *guard1, void *guard2) {
 
 static inline bool
 ehooks_unguard(tsdn_t *tsdn, ehooks_t *ehooks, void *guard1, void *guard2) {
-	bool err;
+	bool            err;
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
 
 	if (extent_hooks == &ehooks_default_extent_hooks) {
diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index fba46abe..88692356 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -10,9 +10,9 @@
  *     EMAP_DECLARE_RTREE_CTX;
  * in uses will avoid empty-statement warnings.
  */
-#define EMAP_DECLARE_RTREE_CTX						\
-    rtree_ctx_t rtree_ctx_fallback;					\
-    rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback)
+#define EMAP_DECLARE_RTREE_CTX                                                 \
+	rtree_ctx_t  rtree_ctx_fallback;                                       \
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback)
 
 typedef struct emap_s emap_t;
 struct emap_s {
@@ -22,25 +22,25 @@ struct emap_s {
 /* Used to pass rtree lookup context down the path. */
 typedef struct emap_alloc_ctx_s emap_alloc_ctx_t;
 struct emap_alloc_ctx_s {
-	size_t usize;
+	size_t  usize;
 	szind_t szind;
-	bool slab;
+	bool    slab;
 };
 
 typedef struct emap_full_alloc_ctx_s emap_full_alloc_ctx_t;
 struct emap_full_alloc_ctx_s {
-	szind_t szind;
-	bool slab;
+	szind_t  szind;
+	bool     slab;
 	edata_t *edata;
 };
 
 bool emap_init(emap_t *emap, base_t *base, bool zeroed);
 
-void emap_remap(tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind,
-    bool slab);
+void emap_remap(
+    tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind, bool slab);
 
-void emap_update_edata_state(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
-    extent_state_t state);
+void emap_update_edata_state(
+    tsdn_t *tsdn, emap_t *emap, edata_t *edata, extent_state_t state);
 
 /*
  * The two acquire functions below allow accessing neighbor edatas, if it's safe
@@ -62,16 +62,16 @@ edata_t *emap_try_acquire_edata_neighbor(tsdn_t *tsdn, emap_t *emap,
     bool forward);
 edata_t *emap_try_acquire_edata_neighbor_expand(tsdn_t *tsdn, emap_t *emap,
     edata_t *edata, extent_pai_t pai, extent_state_t expected_state);
-void emap_release_edata(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
-    extent_state_t new_state);
+void     emap_release_edata(
+        tsdn_t *tsdn, emap_t *emap, edata_t *edata, extent_state_t new_state);
 
 /*
  * Associate the given edata with its beginning and end address, setting the
  * szind and slab info appropriately.
  * Returns true on error (i.e. resource exhaustion).
  */
-bool emap_register_boundary(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
-    szind_t szind, bool slab);
+bool emap_register_boundary(
+    tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind, bool slab);
 
 /*
  * Does the same thing, but with the interior of the range, for slab
@@ -92,8 +92,8 @@ bool emap_register_boundary(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
  * touched, so no allocation is necessary to fill the interior once the boundary
  * has been touched.
  */
-void emap_register_interior(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
-    szind_t szind);
+void emap_register_interior(
+    tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind);
 
 void emap_deregister_boundary(tsdn_t *tsdn, emap_t *emap, edata_t *edata);
 void emap_deregister_interior(tsdn_t *tsdn, emap_t *emap, edata_t *edata);
@@ -161,8 +161,8 @@ emap_edata_in_transition(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
 	emap_assert_mapped(tsdn, emap, edata);
 
 	EMAP_DECLARE_RTREE_CTX;
-	rtree_contents_t contents = rtree_read(tsdn, &emap->rtree, rtree_ctx,
-	    (uintptr_t)edata_base_get(edata));
+	rtree_contents_t contents = rtree_read(
+	    tsdn, &emap->rtree, rtree_ctx, (uintptr_t)edata_base_get(edata));
 
 	return edata_state_in_transition(contents.metadata.state);
 }
@@ -194,9 +194,9 @@ emap_edata_is_acquired(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
 	}
 	rtree_contents_t contents = rtree_leaf_elm_read(tsdn, &emap->rtree, elm,
 	    /* dependent */ false);
-	if (contents.edata == NULL ||
-	    contents.metadata.state == extent_state_active ||
-	    edata_state_in_transition(contents.metadata.state)) {
+	if (contents.edata == NULL
+	    || contents.metadata.state == extent_state_active
+	    || edata_state_in_transition(contents.metadata.state)) {
 		return true;
 	}
 
@@ -211,8 +211,8 @@ extent_assert_can_coalesce(const edata_t *inner, const edata_t *outer) {
 	assert(edata_state_get(inner) == extent_state_active);
 	assert(edata_state_get(outer) == extent_state_merging);
 	assert(!edata_guarded_get(inner) && !edata_guarded_get(outer));
-	assert(edata_base_get(inner) == edata_past_get(outer) ||
-	    edata_base_get(outer) == edata_past_get(inner));
+	assert(edata_base_get(inner) == edata_past_get(outer)
+	    || edata_base_get(outer) == edata_past_get(inner));
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -232,13 +232,13 @@ emap_edata_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-emap_alloc_ctx_init(emap_alloc_ctx_t *alloc_ctx, szind_t szind, bool slab,
-    size_t usize) {
+emap_alloc_ctx_init(
+    emap_alloc_ctx_t *alloc_ctx, szind_t szind, bool slab, size_t usize) {
 	alloc_ctx->szind = szind;
 	alloc_ctx->slab = slab;
 	alloc_ctx->usize = usize;
-	assert(sz_large_size_classes_disabled() ||
-	    usize == sz_index2size(szind));
+	assert(
+	    sz_large_size_classes_disabled() || usize == sz_index2size(szind));
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
@@ -248,27 +248,29 @@ emap_alloc_ctx_usize_get(emap_alloc_ctx_t *alloc_ctx) {
 		assert(alloc_ctx->usize == sz_index2size(alloc_ctx->szind));
 		return sz_index2size(alloc_ctx->szind);
 	}
-	assert(sz_large_size_classes_disabled() ||
-	    alloc_ctx->usize == sz_index2size(alloc_ctx->szind));
+	assert(sz_large_size_classes_disabled()
+	    || alloc_ctx->usize == sz_index2size(alloc_ctx->szind));
 	assert(alloc_ctx->usize <= SC_LARGE_MAXCLASS);
 	return alloc_ctx->usize;
 }
 
 /* Fills in alloc_ctx with the info in the map. */
 JEMALLOC_ALWAYS_INLINE void
-emap_alloc_ctx_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
-    emap_alloc_ctx_t *alloc_ctx) {
+emap_alloc_ctx_lookup(
+    tsdn_t *tsdn, emap_t *emap, const void *ptr, emap_alloc_ctx_t *alloc_ctx) {
 	EMAP_DECLARE_RTREE_CTX;
 
-	rtree_contents_t contents = rtree_read(tsdn, &emap->rtree,
-	    rtree_ctx, (uintptr_t)ptr);
+	rtree_contents_t contents = rtree_read(
+	    tsdn, &emap->rtree, rtree_ctx, (uintptr_t)ptr);
 	/*
 	 * If the alloc is invalid, do not calculate usize since edata
 	 * could be corrupted.
 	 */
 	emap_alloc_ctx_init(alloc_ctx, contents.metadata.szind,
-	    contents.metadata.slab, (contents.metadata.szind == SC_NSIZES
-	    || contents.edata == NULL)? 0: edata_usize_get(contents.edata));
+	    contents.metadata.slab,
+	    (contents.metadata.szind == SC_NSIZES || contents.edata == NULL)
+	        ? 0
+	        : edata_usize_get(contents.edata));
 }
 
 /* The pointer must be mapped. */
@@ -277,8 +279,8 @@ emap_full_alloc_ctx_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
     emap_full_alloc_ctx_t *full_alloc_ctx) {
 	EMAP_DECLARE_RTREE_CTX;
 
-	rtree_contents_t contents = rtree_read(tsdn, &emap->rtree, rtree_ctx,
-	    (uintptr_t)ptr);
+	rtree_contents_t contents = rtree_read(
+	    tsdn, &emap->rtree, rtree_ctx, (uintptr_t)ptr);
 	full_alloc_ctx->edata = contents.edata;
 	full_alloc_ctx->szind = contents.metadata.szind;
 	full_alloc_ctx->slab = contents.metadata.slab;
@@ -295,8 +297,8 @@ emap_full_alloc_ctx_try_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
 	EMAP_DECLARE_RTREE_CTX;
 
 	rtree_contents_t contents;
-	bool err = rtree_read_independent(tsdn, &emap->rtree, rtree_ctx,
-	    (uintptr_t)ptr, &contents);
+	bool             err = rtree_read_independent(
+            tsdn, &emap->rtree, rtree_ctx, (uintptr_t)ptr, &contents);
 	if (err) {
 		return true;
 	}
@@ -311,14 +313,14 @@ emap_full_alloc_ctx_try_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
  * fast path, e.g. when the metadata key is not cached.
  */
 JEMALLOC_ALWAYS_INLINE bool
-emap_alloc_ctx_try_lookup_fast(tsd_t *tsd, emap_t *emap, const void *ptr,
-    emap_alloc_ctx_t *alloc_ctx) {
+emap_alloc_ctx_try_lookup_fast(
+    tsd_t *tsd, emap_t *emap, const void *ptr, emap_alloc_ctx_t *alloc_ctx) {
 	/* Use the unsafe getter since this may gets called during exit. */
 	rtree_ctx_t *rtree_ctx = tsd_rtree_ctxp_get_unsafe(tsd);
 
 	rtree_metadata_t metadata;
-	bool err = rtree_metadata_try_read_fast(tsd_tsdn(tsd), &emap->rtree,
-	    rtree_ctx, (uintptr_t)ptr, &metadata);
+	bool             err = rtree_metadata_try_read_fast(
+            tsd_tsdn(tsd), &emap->rtree, rtree_ctx, (uintptr_t)ptr, &metadata);
 	if (err) {
 		return true;
 	}
@@ -345,11 +347,12 @@ typedef const void *(*emap_ptr_getter)(void *ctx, size_t ind);
  * This allows size-checking assertions, which we can only do while we're in the
  * process of edata lookups.
  */
-typedef void (*emap_metadata_visitor)(void *ctx, emap_full_alloc_ctx_t *alloc_ctx);
+typedef void (*emap_metadata_visitor)(
+    void *ctx, emap_full_alloc_ctx_t *alloc_ctx);
 
 typedef union emap_batch_lookup_result_u emap_batch_lookup_result_t;
 union emap_batch_lookup_result_u {
-	edata_t *edata;
+	edata_t          *edata;
 	rtree_leaf_elm_t *rtree_leaf;
 };
 
@@ -375,8 +378,8 @@ emap_edata_lookup_batch(tsd_t *tsd, emap_t *emap, size_t nptrs,
 
 	for (size_t i = 0; i < nptrs; i++) {
 		rtree_leaf_elm_t *elm = result[i].rtree_leaf;
-		rtree_contents_t contents = rtree_leaf_elm_read(tsd_tsdn(tsd),
-		    &emap->rtree, elm, /* dependent */ true);
+		rtree_contents_t  contents = rtree_leaf_elm_read(
+                    tsd_tsdn(tsd), &emap->rtree, elm, /* dependent */ true);
 		result[i].edata = contents.edata;
 		emap_full_alloc_ctx_t alloc_ctx;
 		/*
diff --git a/include/jemalloc/internal/emitter.h b/include/jemalloc/internal/emitter.h
index 11153254..a4073e6a 100644
--- a/include/jemalloc/internal/emitter.h
+++ b/include/jemalloc/internal/emitter.h
@@ -44,18 +44,18 @@ typedef struct emitter_col_s emitter_col_t;
 struct emitter_col_s {
 	/* Filled in by the user. */
 	emitter_justify_t justify;
-	int width;
-	emitter_type_t type;
+	int               width;
+	emitter_type_t    type;
 	union {
-		bool bool_val;
-		int int_val;
-		unsigned unsigned_val;
-		uint32_t uint32_val;
-		uint32_t uint32_t_val;
-		uint64_t uint64_val;
-		uint64_t uint64_t_val;
-		size_t size_val;
-		ssize_t ssize_val;
+		bool        bool_val;
+		int         int_val;
+		unsigned    unsigned_val;
+		uint32_t    uint32_val;
+		uint32_t    uint32_t_val;
+		uint64_t    uint64_val;
+		uint64_t    uint64_t_val;
+		size_t      size_val;
+		ssize_t     ssize_val;
 		const char *str_val;
 	};
 
@@ -73,8 +73,8 @@ struct emitter_s {
 	emitter_output_t output;
 	/* The output information. */
 	write_cb_t *write_cb;
-	void *cbopaque;
-	int nesting_depth;
+	void       *cbopaque;
+	int         nesting_depth;
 	/* True if we've already emitted a value at the given depth. */
 	bool item_at_depth;
 	/* True if we emitted a key and will emit corresponding value next. */
@@ -83,8 +83,8 @@ struct emitter_s {
 
 static inline bool
 emitter_outputs_json(emitter_t *emitter) {
-	return emitter->output == emitter_output_json ||
-	    emitter->output == emitter_output_json_compact;
+	return emitter->output == emitter_output_json
+	    || emitter->output == emitter_output_json_compact;
 }
 
 /* Internal convenience function.  Write to the emitter the given string. */
@@ -98,23 +98,23 @@ emitter_printf(emitter_t *emitter, const char *format, ...) {
 	va_end(ap);
 }
 
-static inline const char * JEMALLOC_FORMAT_ARG(3)
-emitter_gen_fmt(char *out_fmt, size_t out_size, const char *fmt_specifier,
-    emitter_justify_t justify, int width) {
+static inline const char *
+JEMALLOC_FORMAT_ARG(3) emitter_gen_fmt(char *out_fmt, size_t out_size,
+    const char *fmt_specifier, emitter_justify_t justify, int width) {
 	size_t written;
 	fmt_specifier++;
 	if (justify == emitter_justify_none) {
-		written = malloc_snprintf(out_fmt, out_size,
-		    "%%%s", fmt_specifier);
+		written = malloc_snprintf(
+		    out_fmt, out_size, "%%%s", fmt_specifier);
 	} else if (justify == emitter_justify_left) {
-		written = malloc_snprintf(out_fmt, out_size,
-		    "%%-%d%s", width, fmt_specifier);
+		written = malloc_snprintf(
+		    out_fmt, out_size, "%%-%d%s", width, fmt_specifier);
 	} else {
-		written = malloc_snprintf(out_fmt, out_size,
-		    "%%%d%s", width, fmt_specifier);
+		written = malloc_snprintf(
+		    out_fmt, out_size, "%%%d%s", width, fmt_specifier);
 	}
 	/* Only happens in case of bad format string, which *we* choose. */
-	assert(written <  out_size);
+	assert(written < out_size);
 	return out_fmt;
 }
 
@@ -122,10 +122,10 @@ static inline void
 emitter_emit_str(emitter_t *emitter, emitter_justify_t justify, int width,
     char *fmt, size_t fmt_size, const char *str) {
 #define BUF_SIZE 256
-	char buf[BUF_SIZE];
+	char   buf[BUF_SIZE];
 	size_t str_written = malloc_snprintf(buf, BUF_SIZE, "\"%s\"", str);
-	emitter_printf(emitter,
-	    emitter_gen_fmt(fmt, fmt_size, "%s", justify, width), buf);
+	emitter_printf(
+	    emitter, emitter_gen_fmt(fmt, fmt_size, "%s", justify, width), buf);
 	if (str_written < BUF_SIZE) {
 		return;
 	}
@@ -168,16 +168,16 @@ emitter_print_value(emitter_t *emitter, emitter_justify_t justify, int width,
 	 */
 	char fmt[FMT_SIZE];
 
-#define EMIT_SIMPLE(type, format)					\
-	emitter_printf(emitter,						\
-	    emitter_gen_fmt(fmt, FMT_SIZE, format, justify, width),	\
+#define EMIT_SIMPLE(type, format)                                              \
+	emitter_printf(emitter,                                                \
+	    emitter_gen_fmt(fmt, FMT_SIZE, format, justify, width),            \
 	    *(const type *)value);
 
 	switch (value_type) {
 	case emitter_type_bool:
 		emitter_printf(emitter,
 		    emitter_gen_fmt(fmt, FMT_SIZE, "%s", justify, width),
-		    *(const bool *)value ?  "true" : "false");
+		    *(const bool *)value ? "true" : "false");
 		break;
 	case emitter_type_int:
 		EMIT_SIMPLE(int, "%d")
@@ -213,7 +213,6 @@ emitter_print_value(emitter_t *emitter, emitter_justify_t justify, int width,
 #undef FMT_SIZE
 }
 
-
 /* Internal functions.  In json mode, tracks nesting state. */
 static inline void
 emitter_nest_inc(emitter_t *emitter) {
@@ -229,7 +228,7 @@ emitter_nest_dec(emitter_t *emitter) {
 
 static inline void
 emitter_indent(emitter_t *emitter) {
-	int amount = emitter->nesting_depth;
+	int         amount = emitter->nesting_depth;
 	const char *indent_str;
 	assert(emitter->output != emitter_output_json_compact);
 	if (emitter->output == emitter_output_json) {
@@ -291,12 +290,12 @@ emitter_json_key(emitter_t *emitter, const char *json_key) {
 }
 
 static inline void
-emitter_json_value(emitter_t *emitter, emitter_type_t value_type,
-    const void *value) {
+emitter_json_value(
+    emitter_t *emitter, emitter_type_t value_type, const void *value) {
 	if (emitter_outputs_json(emitter)) {
 		emitter_json_key_prefix(emitter);
-		emitter_print_value(emitter, emitter_justify_none, -1,
-		    value_type, value);
+		emitter_print_value(
+		    emitter, emitter_justify_none, -1, value_type, value);
 		emitter->item_at_depth = true;
 	}
 }
@@ -367,7 +366,6 @@ emitter_json_object_end(emitter_t *emitter) {
 	}
 }
 
-
 /******************************************************************************/
 /* Table public API. */
 
@@ -389,14 +387,13 @@ emitter_table_dict_end(emitter_t *emitter) {
 
 static inline void
 emitter_table_kv_note(emitter_t *emitter, const char *table_key,
-    emitter_type_t value_type, const void *value,
-    const char *table_note_key, emitter_type_t table_note_value_type,
-    const void *table_note_value) {
+    emitter_type_t value_type, const void *value, const char *table_note_key,
+    emitter_type_t table_note_value_type, const void *table_note_value) {
 	if (emitter->output == emitter_output_table) {
 		emitter_indent(emitter);
 		emitter_printf(emitter, "%s: ", table_key);
-		emitter_print_value(emitter, emitter_justify_none, -1,
-		    value_type, value);
+		emitter_print_value(
+		    emitter, emitter_justify_none, -1, value_type, value);
 		if (table_note_key != NULL) {
 			emitter_printf(emitter, " (%s: ", table_note_key);
 			emitter_print_value(emitter, emitter_justify_none, -1,
@@ -415,7 +412,6 @@ emitter_table_kv(emitter_t *emitter, const char *table_key,
 	    emitter_type_bool, NULL);
 }
 
-
 /* Write to the emitter the given string, but only in table mode. */
 JEMALLOC_FORMAT_PRINTF(2, 3)
 static inline void
@@ -423,7 +419,8 @@ emitter_table_printf(emitter_t *emitter, const char *format, ...) {
 	if (emitter->output == emitter_output_table) {
 		va_list ap;
 		va_start(ap, format);
-		malloc_vcprintf(emitter->write_cb, emitter->cbopaque, format, ap);
+		malloc_vcprintf(
+		    emitter->write_cb, emitter->cbopaque, format, ap);
 		va_end(ap);
 	}
 }
@@ -434,7 +431,7 @@ emitter_table_row(emitter_t *emitter, emitter_row_t *row) {
 		return;
 	}
 	emitter_col_t *col;
-	ql_foreach(col, &row->cols, link) {
+	ql_foreach (col, &row->cols, link) {
 		emitter_print_value(emitter, col->justify, col->width,
 		    col->type, (const void *)&col->bool_val);
 	}
@@ -452,7 +449,6 @@ emitter_col_init(emitter_col_t *col, emitter_row_t *row) {
 	ql_tail_insert(&row->cols, col, link);
 }
 
-
 /******************************************************************************/
 /*
  * Generalized public API. Emits using either JSON or table, according to
@@ -464,9 +460,8 @@ emitter_col_init(emitter_col_t *col, emitter_row_t *row) {
  */
 static inline void
 emitter_kv_note(emitter_t *emitter, const char *json_key, const char *table_key,
-    emitter_type_t value_type, const void *value,
-    const char *table_note_key, emitter_type_t table_note_value_type,
-    const void *table_note_value) {
+    emitter_type_t value_type, const void *value, const char *table_note_key,
+    emitter_type_t table_note_value_type, const void *table_note_value) {
 	if (emitter_outputs_json(emitter)) {
 		emitter_json_key(emitter, json_key);
 		emitter_json_value(emitter, value_type, value);
@@ -485,8 +480,8 @@ emitter_kv(emitter_t *emitter, const char *json_key, const char *table_key,
 }
 
 static inline void
-emitter_dict_begin(emitter_t *emitter, const char *json_key,
-    const char *table_header) {
+emitter_dict_begin(
+    emitter_t *emitter, const char *json_key, const char *table_header) {
 	if (emitter_outputs_json(emitter)) {
 		emitter_json_key(emitter, json_key);
 		emitter_json_object_begin(emitter);
@@ -526,8 +521,9 @@ emitter_end(emitter_t *emitter) {
 	if (emitter_outputs_json(emitter)) {
 		assert(emitter->nesting_depth == 1);
 		emitter_nest_dec(emitter);
-		emitter_printf(emitter, "%s", emitter->output ==
-		    emitter_output_json_compact ? "}" : "\n}\n");
+		emitter_printf(emitter, "%s",
+		    emitter->output == emitter_output_json_compact ? "}"
+		                                                   : "\n}\n");
 	}
 }
 
diff --git a/include/jemalloc/internal/exp_grow.h b/include/jemalloc/internal/exp_grow.h
index 40a1add0..8206ba85 100644
--- a/include/jemalloc/internal/exp_grow.h
+++ b/include/jemalloc/internal/exp_grow.h
@@ -27,8 +27,7 @@ exp_grow_size_prepare(exp_grow_t *exp_grow, size_t alloc_size_min,
 	*r_alloc_size = sz_pind2sz(exp_grow->next + *r_skip);
 	while (*r_alloc_size < alloc_size_min) {
 		(*r_skip)++;
-		if (exp_grow->next + *r_skip  >=
-		    sz_psz2ind(SC_LARGE_MAXCLASS)) {
+		if (exp_grow->next + *r_skip >= sz_psz2ind(SC_LARGE_MAXCLASS)) {
 			/* Outside legal range. */
 			return true;
 		}
@@ -44,7 +43,6 @@ exp_grow_size_commit(exp_grow_t *exp_grow, pszind_t skip) {
 	} else {
 		exp_grow->next = exp_grow->limit;
 	}
-
 }
 
 void exp_grow_init(exp_grow_t *exp_grow);
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index be61db8d..e81dff2c 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -26,9 +26,10 @@ extern size_t opt_process_madvise_max_batch;
 
 #ifdef JEMALLOC_HAVE_PROCESS_MADVISE
 /* The iovec is on stack.  Limit the max batch to avoid stack overflow. */
-#define PROCESS_MADVISE_MAX_BATCH_LIMIT (VARIABLE_ARRAY_SIZE_MAX / sizeof(struct iovec))
+#	define PROCESS_MADVISE_MAX_BATCH_LIMIT                                \
+		(VARIABLE_ARRAY_SIZE_MAX / sizeof(struct iovec))
 #else
-#define PROCESS_MADVISE_MAX_BATCH_LIMIT 0
+#	define PROCESS_MADVISE_MAX_BATCH_LIMIT 0
 #endif
 
 edata_t *ecache_alloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
@@ -37,44 +38,43 @@ edata_t *ecache_alloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 edata_t *ecache_alloc_grow(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *expand_edata, size_t size, size_t alignment,
     bool zero, bool guarded);
-void ecache_dalloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    ecache_t *ecache, edata_t *edata);
+void ecache_dalloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
+    edata_t *edata);
 edata_t *ecache_evict(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, size_t npages_min);
 
 void extent_gdump_add(tsdn_t *tsdn, const edata_t *edata);
 void extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
     edata_t *edata);
-void extent_dalloc_gap(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    edata_t *edata);
+void extent_dalloc_gap(
+    tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *edata);
 edata_t *extent_alloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     void *new_addr, size_t size, size_t alignment, bool zero, bool *commit,
     bool growing_retained);
-void extent_dalloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    edata_t *edata);
-void extent_dalloc_wrapper_purged(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    edata_t *edata);
-void extent_destroy_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    edata_t *edata);
+void     extent_dalloc_wrapper(
+        tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *edata);
+void extent_dalloc_wrapper_purged(
+    tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *edata);
+void extent_destroy_wrapper(
+    tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *edata);
 bool extent_commit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length);
 bool extent_purge_lazy_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length);
 bool extent_purge_forced_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length);
-edata_t *extent_split_wrapper(tsdn_t *tsdn, pac_t *pac,
-    ehooks_t *ehooks, edata_t *edata, size_t size_a, size_t size_b,
-    bool holding_core_locks);
-bool extent_merge_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    edata_t *a, edata_t *b);
-bool extent_commit_zero(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
-    bool commit, bool zero, bool growing_retained);
+edata_t *extent_split_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
+    edata_t *edata, size_t size_a, size_t size_b, bool holding_core_locks);
+bool     extent_merge_wrapper(
+        tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *a, edata_t *b);
+bool   extent_commit_zero(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+      bool commit, bool zero, bool growing_retained);
 size_t extent_sn_next(pac_t *pac);
-bool extent_boot(void);
+bool   extent_boot(void);
 
 JEMALLOC_ALWAYS_INLINE bool
-extent_neighbor_head_state_mergeable(bool edata_is_head,
-    bool neighbor_is_head, bool forward) {
+extent_neighbor_head_state_mergeable(
+    bool edata_is_head, bool neighbor_is_head, bool forward) {
 	/*
 	 * Head states checking: disallow merging if the higher addr extent is a
 	 * head extent.  This helps preserve first-fit, and more importantly
@@ -102,8 +102,8 @@ extent_can_acquire_neighbor(edata_t *edata, rtree_contents_t contents,
 	}
 	/* It's not safe to access *neighbor yet; must verify states first. */
 	bool neighbor_is_head = contents.metadata.is_head;
-	if (!extent_neighbor_head_state_mergeable(edata_is_head_get(edata),
-	    neighbor_is_head, forward)) {
+	if (!extent_neighbor_head_state_mergeable(
+	        edata_is_head_get(edata), neighbor_is_head, forward)) {
 		return false;
 	}
 	extent_state_t neighbor_state = contents.metadata.state;
@@ -112,8 +112,9 @@ extent_can_acquire_neighbor(edata_t *edata, rtree_contents_t contents,
 			return false;
 		}
 		/* From this point, it's safe to access *neighbor. */
-		if (!expanding && (edata_committed_get(edata) !=
-		    edata_committed_get(neighbor))) {
+		if (!expanding
+		    && (edata_committed_get(edata)
+		        != edata_committed_get(neighbor))) {
 			/*
 			 * Some platforms (e.g. Windows) require an explicit
 			 * commit step (and writing to uncommitted memory is not
@@ -133,11 +134,11 @@ extent_can_acquire_neighbor(edata_t *edata, rtree_contents_t contents,
 		return false;
 	}
 	if (opt_retain) {
-		assert(edata_arena_ind_get(edata) ==
-		    edata_arena_ind_get(neighbor));
+		assert(edata_arena_ind_get(edata)
+		    == edata_arena_ind_get(neighbor));
 	} else {
-		if (edata_arena_ind_get(edata) !=
-		    edata_arena_ind_get(neighbor)) {
+		if (edata_arena_ind_get(edata)
+		    != edata_arena_ind_get(neighbor)) {
 			return false;
 		}
 	}
diff --git a/include/jemalloc/internal/extent_dss.h b/include/jemalloc/internal/extent_dss.h
index c8e71e82..4bb3f51d 100644
--- a/include/jemalloc/internal/extent_dss.h
+++ b/include/jemalloc/internal/extent_dss.h
@@ -6,11 +6,11 @@
 #include "jemalloc/internal/tsd_types.h"
 
 typedef enum {
-	dss_prec_disabled  = 0,
-	dss_prec_primary   = 1,
+	dss_prec_disabled = 0,
+	dss_prec_primary = 1,
 	dss_prec_secondary = 2,
 
-	dss_prec_limit     = 3
+	dss_prec_limit = 3
 } dss_prec_t;
 #define DSS_PREC_DEFAULT dss_prec_secondary
 #define DSS_DEFAULT "secondary"
@@ -20,11 +20,11 @@ extern const char *const dss_prec_names[];
 extern const char *opt_dss;
 
 dss_prec_t extent_dss_prec_get(void);
-bool extent_dss_prec_set(dss_prec_t dss_prec);
-void *extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr,
-    size_t size, size_t alignment, bool *zero, bool *commit);
-bool extent_in_dss(void *addr);
-bool extent_dss_mergeable(void *addr_a, void *addr_b);
-void extent_dss_boot(void);
+bool       extent_dss_prec_set(dss_prec_t dss_prec);
+void      *extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr,
+         size_t size, size_t alignment, bool *zero, bool *commit);
+bool       extent_in_dss(void *addr);
+bool       extent_dss_mergeable(void *addr_a, void *addr_b);
+void       extent_dss_boot(void);
 
 #endif /* JEMALLOC_INTERNAL_EXTENT_DSS_H */
diff --git a/include/jemalloc/internal/extent_mmap.h b/include/jemalloc/internal/extent_mmap.h
index e6a4649e..aa469896 100644
--- a/include/jemalloc/internal/extent_mmap.h
+++ b/include/jemalloc/internal/extent_mmap.h
@@ -5,8 +5,8 @@
 
 extern bool opt_retain;
 
-void *extent_alloc_mmap(void *new_addr, size_t size, size_t alignment,
-    bool *zero, bool *commit);
+void *extent_alloc_mmap(
+    void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit);
 bool extent_dalloc_mmap(void *addr, size_t size);
 
 #endif /* JEMALLOC_INTERNAL_EXTENT_MMAP_EXTERNS_H */
diff --git a/include/jemalloc/internal/fb.h b/include/jemalloc/internal/fb.h
index e38095af..bf76f362 100644
--- a/include/jemalloc/internal/fb.h
+++ b/include/jemalloc/internal/fb.h
@@ -15,8 +15,8 @@
 
 typedef unsigned long fb_group_t;
 #define FB_GROUP_BITS (ZU(1) << (LG_SIZEOF_LONG + 3))
-#define FB_NGROUPS(nbits) ((nbits) / FB_GROUP_BITS \
-    + ((nbits) % FB_GROUP_BITS == 0 ? 0 : 1))
+#define FB_NGROUPS(nbits)                                                      \
+	((nbits) / FB_GROUP_BITS + ((nbits) % FB_GROUP_BITS == 0 ? 0 : 1))
 
 static inline void
 fb_init(fb_group_t *fb, size_t nbits) {
@@ -75,7 +75,6 @@ fb_unset(fb_group_t *fb, size_t nbits, size_t bit) {
 	fb[group_ind] &= ~((fb_group_t)1 << bit_ind);
 }
 
-
 /*
  * Some implementation details.  This visitation function lets us apply a group
  * visitor to each group in the bitmap (potentially modifying it).  The mask
@@ -94,7 +93,8 @@ fb_visit_impl(fb_group_t *fb, size_t nbits, fb_group_visitor_t visit, void *ctx,
 	 * to from bit 0.
 	 */
 	size_t first_group_cnt = (start_bit_ind + cnt > FB_GROUP_BITS
-		? FB_GROUP_BITS - start_bit_ind : cnt);
+	        ? FB_GROUP_BITS - start_bit_ind
+	        : cnt);
 	/*
 	 * We can basically split affected words into:
 	 *   - The first group, where we touch only the high bits
@@ -104,8 +104,8 @@ fb_visit_impl(fb_group_t *fb, size_t nbits, fb_group_visitor_t visit, void *ctx,
 	 * this can lead to bad codegen for those middle words.
 	 */
 	/* First group */
-	fb_group_t mask = ((~(fb_group_t)0)
-	    >> (FB_GROUP_BITS - first_group_cnt))
+	fb_group_t mask =
+	    ((~(fb_group_t)0) >> (FB_GROUP_BITS - first_group_cnt))
 	    << start_bit_ind;
 	visit(ctx, &fb[group_ind], mask);
 
@@ -176,12 +176,12 @@ fb_ucount(fb_group_t *fb, size_t nbits, size_t start, size_t cnt) {
  * Returns the number of bits in the bitmap if no such bit exists.
  */
 JEMALLOC_ALWAYS_INLINE ssize_t
-fb_find_impl(fb_group_t *fb, size_t nbits, size_t start, bool val,
-    bool forward) {
+fb_find_impl(
+    fb_group_t *fb, size_t nbits, size_t start, bool val, bool forward) {
 	assert(start < nbits);
-	size_t ngroups = FB_NGROUPS(nbits);
+	size_t  ngroups = FB_NGROUPS(nbits);
 	ssize_t group_ind = start / FB_GROUP_BITS;
-	size_t bit_ind = start % FB_GROUP_BITS;
+	size_t  bit_ind = start % FB_GROUP_BITS;
 
 	fb_group_t maybe_invert = (val ? 0 : (fb_group_t)-1);
 
@@ -265,8 +265,8 @@ fb_iter_range_impl(fb_group_t *fb, size_t nbits, size_t start, size_t *r_begin,
 		return false;
 	}
 	/* Half open range; the set bits are [begin, end). */
-	ssize_t next_range_end = fb_find_impl(fb, nbits, next_range_begin, !val,
-	    forward);
+	ssize_t next_range_end = fb_find_impl(
+	    fb, nbits, next_range_begin, !val, forward);
 	if (forward) {
 		*r_begin = next_range_begin;
 		*r_len = next_range_end - next_range_begin;
@@ -324,8 +324,9 @@ fb_range_longest_impl(fb_group_t *fb, size_t nbits, bool val) {
 	size_t begin = 0;
 	size_t longest_len = 0;
 	size_t len = 0;
-	while (begin < nbits && fb_iter_range_impl(fb, nbits, begin, &begin,
-	    &len, val, /* forward */ true)) {
+	while (begin < nbits
+	    && fb_iter_range_impl(
+	        fb, nbits, begin, &begin, &len, val, /* forward */ true)) {
 		if (len > longest_len) {
 			longest_len = len;
 		}
diff --git a/include/jemalloc/internal/fxp.h b/include/jemalloc/internal/fxp.h
index e42425f9..8ca4f3c6 100644
--- a/include/jemalloc/internal/fxp.h
+++ b/include/jemalloc/internal/fxp.h
@@ -89,7 +89,7 @@ fxp_round_down(fxp_t a) {
 
 static inline uint32_t
 fxp_round_nearest(fxp_t a) {
-	uint32_t fractional_part = (a  & ((1U << 16) - 1));
+	uint32_t fractional_part = (a & ((1U << 16) - 1));
 	uint32_t increment = (uint32_t)(fractional_part >= (1U << 15));
 	return (a >> 16) + increment;
 }
diff --git a/include/jemalloc/internal/hash.h b/include/jemalloc/internal/hash.h
index 15162b94..73e2214e 100644
--- a/include/jemalloc/internal/hash.h
+++ b/include/jemalloc/internal/hash.h
@@ -25,7 +25,7 @@ hash_rotl_64(uint64_t x, int8_t r) {
 static inline uint32_t
 hash_get_block_32(const uint32_t *p, int i) {
 	/* Handle unaligned read. */
-	if (unlikely((uintptr_t)p & (sizeof(uint32_t)-1)) != 0) {
+	if (unlikely((uintptr_t)p & (sizeof(uint32_t) - 1)) != 0) {
 		uint32_t ret;
 
 		memcpy(&ret, (uint8_t *)(p + i), sizeof(uint32_t));
@@ -38,7 +38,7 @@ hash_get_block_32(const uint32_t *p, int i) {
 static inline uint64_t
 hash_get_block_64(const uint64_t *p, int i) {
 	/* Handle unaligned read. */
-	if (unlikely((uintptr_t)p & (sizeof(uint64_t)-1)) != 0) {
+	if (unlikely((uintptr_t)p & (sizeof(uint64_t) - 1)) != 0) {
 		uint64_t ret;
 
 		memcpy(&ret, (uint8_t *)(p + i), sizeof(uint64_t));
@@ -72,8 +72,8 @@ hash_fmix_64(uint64_t k) {
 
 static inline uint32_t
 hash_x86_32(const void *key, int len, uint32_t seed) {
-	const uint8_t *data = (const uint8_t *) key;
-	const int nblocks = len / 4;
+	const uint8_t *data = (const uint8_t *)key;
+	const int      nblocks = len / 4;
 
 	uint32_t h1 = seed;
 
@@ -82,8 +82,8 @@ hash_x86_32(const void *key, int len, uint32_t seed) {
 
 	/* body */
 	{
-		const uint32_t *blocks = (const uint32_t *) (data + nblocks*4);
-		int i;
+		const uint32_t *blocks = (const uint32_t *)(data + nblocks * 4);
+		int             i;
 
 		for (i = -nblocks; i; i++) {
 			uint32_t k1 = hash_get_block_32(blocks, i);
@@ -94,21 +94,29 @@ hash_x86_32(const void *key, int len, uint32_t seed) {
 
 			h1 ^= k1;
 			h1 = hash_rotl_32(h1, 13);
-			h1 = h1*5 + 0xe6546b64;
+			h1 = h1 * 5 + 0xe6546b64;
 		}
 	}
 
 	/* tail */
 	{
-		const uint8_t *tail = (const uint8_t *) (data + nblocks*4);
+		const uint8_t *tail = (const uint8_t *)(data + nblocks * 4);
 
 		uint32_t k1 = 0;
 
 		switch (len & 3) {
-		case 3: k1 ^= tail[2] << 16; JEMALLOC_FALLTHROUGH;
-		case 2: k1 ^= tail[1] << 8; JEMALLOC_FALLTHROUGH;
-		case 1: k1 ^= tail[0]; k1 *= c1; k1 = hash_rotl_32(k1, 15);
-			k1 *= c2; h1 ^= k1;
+		case 3:
+			k1 ^= tail[2] << 16;
+			JEMALLOC_FALLTHROUGH;
+		case 2:
+			k1 ^= tail[1] << 8;
+			JEMALLOC_FALLTHROUGH;
+		case 1:
+			k1 ^= tail[0];
+			k1 *= c1;
+			k1 = hash_rotl_32(k1, 15);
+			k1 *= c2;
+			h1 ^= k1;
 		}
 	}
 
@@ -121,10 +129,9 @@ hash_x86_32(const void *key, int len, uint32_t seed) {
 }
 
 static inline void
-hash_x86_128(const void *key, const int len, uint32_t seed,
-    uint64_t r_out[2]) {
-	const uint8_t * data = (const uint8_t *) key;
-	const int nblocks = len / 16;
+hash_x86_128(const void *key, const int len, uint32_t seed, uint64_t r_out[2]) {
+	const uint8_t *data = (const uint8_t *)key;
+	const int      nblocks = len / 16;
 
 	uint32_t h1 = seed;
 	uint32_t h2 = seed;
@@ -138,95 +145,161 @@ hash_x86_128(const void *key, const int len, uint32_t seed,
 
 	/* body */
 	{
-		const uint32_t *blocks = (const uint32_t *) (data + nblocks*16);
-		int i;
+		const uint32_t *blocks = (const uint32_t *)(data
+		    + nblocks * 16);
+		int             i;
 
 		for (i = -nblocks; i; i++) {
-			uint32_t k1 = hash_get_block_32(blocks, i*4 + 0);
-			uint32_t k2 = hash_get_block_32(blocks, i*4 + 1);
-			uint32_t k3 = hash_get_block_32(blocks, i*4 + 2);
-			uint32_t k4 = hash_get_block_32(blocks, i*4 + 3);
+			uint32_t k1 = hash_get_block_32(blocks, i * 4 + 0);
+			uint32_t k2 = hash_get_block_32(blocks, i * 4 + 1);
+			uint32_t k3 = hash_get_block_32(blocks, i * 4 + 2);
+			uint32_t k4 = hash_get_block_32(blocks, i * 4 + 3);
 
-			k1 *= c1; k1 = hash_rotl_32(k1, 15); k1 *= c2; h1 ^= k1;
+			k1 *= c1;
+			k1 = hash_rotl_32(k1, 15);
+			k1 *= c2;
+			h1 ^= k1;
 
-			h1 = hash_rotl_32(h1, 19); h1 += h2;
-			h1 = h1*5 + 0x561ccd1b;
+			h1 = hash_rotl_32(h1, 19);
+			h1 += h2;
+			h1 = h1 * 5 + 0x561ccd1b;
 
-			k2 *= c2; k2 = hash_rotl_32(k2, 16); k2 *= c3; h2 ^= k2;
+			k2 *= c2;
+			k2 = hash_rotl_32(k2, 16);
+			k2 *= c3;
+			h2 ^= k2;
 
-			h2 = hash_rotl_32(h2, 17); h2 += h3;
-			h2 = h2*5 + 0x0bcaa747;
+			h2 = hash_rotl_32(h2, 17);
+			h2 += h3;
+			h2 = h2 * 5 + 0x0bcaa747;
 
-			k3 *= c3; k3 = hash_rotl_32(k3, 17); k3 *= c4; h3 ^= k3;
+			k3 *= c3;
+			k3 = hash_rotl_32(k3, 17);
+			k3 *= c4;
+			h3 ^= k3;
 
-			h3 = hash_rotl_32(h3, 15); h3 += h4;
-			h3 = h3*5 + 0x96cd1c35;
+			h3 = hash_rotl_32(h3, 15);
+			h3 += h4;
+			h3 = h3 * 5 + 0x96cd1c35;
 
-			k4 *= c4; k4 = hash_rotl_32(k4, 18); k4 *= c1; h4 ^= k4;
+			k4 *= c4;
+			k4 = hash_rotl_32(k4, 18);
+			k4 *= c1;
+			h4 ^= k4;
 
-			h4 = hash_rotl_32(h4, 13); h4 += h1;
-			h4 = h4*5 + 0x32ac3b17;
+			h4 = hash_rotl_32(h4, 13);
+			h4 += h1;
+			h4 = h4 * 5 + 0x32ac3b17;
 		}
 	}
 
 	/* tail */
 	{
-		const uint8_t *tail = (const uint8_t *) (data + nblocks*16);
-		uint32_t k1 = 0;
-		uint32_t k2 = 0;
-		uint32_t k3 = 0;
-		uint32_t k4 = 0;
+		const uint8_t *tail = (const uint8_t *)(data + nblocks * 16);
+		uint32_t       k1 = 0;
+		uint32_t       k2 = 0;
+		uint32_t       k3 = 0;
+		uint32_t       k4 = 0;
 
 		switch (len & 15) {
-		case 15: k4 ^= tail[14] << 16; JEMALLOC_FALLTHROUGH;
-		case 14: k4 ^= tail[13] << 8; JEMALLOC_FALLTHROUGH;
-		case 13: k4 ^= tail[12] << 0;
-			k4 *= c4; k4 = hash_rotl_32(k4, 18); k4 *= c1; h4 ^= k4;
+		case 15:
+			k4 ^= tail[14] << 16;
 			JEMALLOC_FALLTHROUGH;
-		case 12: k3 ^= (uint32_t) tail[11] << 24; JEMALLOC_FALLTHROUGH;
-		case 11: k3 ^= tail[10] << 16; JEMALLOC_FALLTHROUGH;
-		case 10: k3 ^= tail[ 9] << 8; JEMALLOC_FALLTHROUGH;
-		case  9: k3 ^= tail[ 8] << 0;
-			k3 *= c3; k3 = hash_rotl_32(k3, 17); k3 *= c4; h3 ^= k3;
+		case 14:
+			k4 ^= tail[13] << 8;
 			JEMALLOC_FALLTHROUGH;
-		case  8: k2 ^= (uint32_t) tail[ 7] << 24; JEMALLOC_FALLTHROUGH;
-		case  7: k2 ^= tail[ 6] << 16; JEMALLOC_FALLTHROUGH;
-		case  6: k2 ^= tail[ 5] << 8; JEMALLOC_FALLTHROUGH;
-		case  5: k2 ^= tail[ 4] << 0;
-			k2 *= c2; k2 = hash_rotl_32(k2, 16); k2 *= c3; h2 ^= k2;
+		case 13:
+			k4 ^= tail[12] << 0;
+			k4 *= c4;
+			k4 = hash_rotl_32(k4, 18);
+			k4 *= c1;
+			h4 ^= k4;
 			JEMALLOC_FALLTHROUGH;
-		case  4: k1 ^= (uint32_t) tail[ 3] << 24; JEMALLOC_FALLTHROUGH;
-		case  3: k1 ^= tail[ 2] << 16; JEMALLOC_FALLTHROUGH;
-		case  2: k1 ^= tail[ 1] << 8; JEMALLOC_FALLTHROUGH;
-		case  1: k1 ^= tail[ 0] << 0;
-			k1 *= c1; k1 = hash_rotl_32(k1, 15); k1 *= c2; h1 ^= k1;
+		case 12:
+			k3 ^= (uint32_t)tail[11] << 24;
+			JEMALLOC_FALLTHROUGH;
+		case 11:
+			k3 ^= tail[10] << 16;
+			JEMALLOC_FALLTHROUGH;
+		case 10:
+			k3 ^= tail[9] << 8;
+			JEMALLOC_FALLTHROUGH;
+		case 9:
+			k3 ^= tail[8] << 0;
+			k3 *= c3;
+			k3 = hash_rotl_32(k3, 17);
+			k3 *= c4;
+			h3 ^= k3;
+			JEMALLOC_FALLTHROUGH;
+		case 8:
+			k2 ^= (uint32_t)tail[7] << 24;
+			JEMALLOC_FALLTHROUGH;
+		case 7:
+			k2 ^= tail[6] << 16;
+			JEMALLOC_FALLTHROUGH;
+		case 6:
+			k2 ^= tail[5] << 8;
+			JEMALLOC_FALLTHROUGH;
+		case 5:
+			k2 ^= tail[4] << 0;
+			k2 *= c2;
+			k2 = hash_rotl_32(k2, 16);
+			k2 *= c3;
+			h2 ^= k2;
+			JEMALLOC_FALLTHROUGH;
+		case 4:
+			k1 ^= (uint32_t)tail[3] << 24;
+			JEMALLOC_FALLTHROUGH;
+		case 3:
+			k1 ^= tail[2] << 16;
+			JEMALLOC_FALLTHROUGH;
+		case 2:
+			k1 ^= tail[1] << 8;
+			JEMALLOC_FALLTHROUGH;
+		case 1:
+			k1 ^= tail[0] << 0;
+			k1 *= c1;
+			k1 = hash_rotl_32(k1, 15);
+			k1 *= c2;
+			h1 ^= k1;
 			break;
 		}
 	}
 
 	/* finalization */
-	h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len;
+	h1 ^= len;
+	h2 ^= len;
+	h3 ^= len;
+	h4 ^= len;
 
-	h1 += h2; h1 += h3; h1 += h4;
-	h2 += h1; h3 += h1; h4 += h1;
+	h1 += h2;
+	h1 += h3;
+	h1 += h4;
+	h2 += h1;
+	h3 += h1;
+	h4 += h1;
 
 	h1 = hash_fmix_32(h1);
 	h2 = hash_fmix_32(h2);
 	h3 = hash_fmix_32(h3);
 	h4 = hash_fmix_32(h4);
 
-	h1 += h2; h1 += h3; h1 += h4;
-	h2 += h1; h3 += h1; h4 += h1;
+	h1 += h2;
+	h1 += h3;
+	h1 += h4;
+	h2 += h1;
+	h3 += h1;
+	h4 += h1;
 
-	r_out[0] = (((uint64_t) h2) << 32) | h1;
-	r_out[1] = (((uint64_t) h4) << 32) | h3;
+	r_out[0] = (((uint64_t)h2) << 32) | h1;
+	r_out[1] = (((uint64_t)h4) << 32) | h3;
 }
 
 static inline void
-hash_x64_128(const void *key, const int len, const uint32_t seed,
-    uint64_t r_out[2]) {
-	const uint8_t *data = (const uint8_t *) key;
-	const int nblocks = len / 16;
+hash_x64_128(
+    const void *key, const int len, const uint32_t seed, uint64_t r_out[2]) {
+	const uint8_t *data = (const uint8_t *)key;
+	const int      nblocks = len / 16;
 
 	uint64_t h1 = seed;
 	uint64_t h2 = seed;
@@ -236,56 +309,99 @@ hash_x64_128(const void *key, const int len, const uint32_t seed,
 
 	/* body */
 	{
-		const uint64_t *blocks = (const uint64_t *) (data);
-		int i;
+		const uint64_t *blocks = (const uint64_t *)(data);
+		int             i;
 
 		for (i = 0; i < nblocks; i++) {
-			uint64_t k1 = hash_get_block_64(blocks, i*2 + 0);
-			uint64_t k2 = hash_get_block_64(blocks, i*2 + 1);
+			uint64_t k1 = hash_get_block_64(blocks, i * 2 + 0);
+			uint64_t k2 = hash_get_block_64(blocks, i * 2 + 1);
 
-			k1 *= c1; k1 = hash_rotl_64(k1, 31); k1 *= c2; h1 ^= k1;
+			k1 *= c1;
+			k1 = hash_rotl_64(k1, 31);
+			k1 *= c2;
+			h1 ^= k1;
 
-			h1 = hash_rotl_64(h1, 27); h1 += h2;
-			h1 = h1*5 + 0x52dce729;
+			h1 = hash_rotl_64(h1, 27);
+			h1 += h2;
+			h1 = h1 * 5 + 0x52dce729;
 
-			k2 *= c2; k2 = hash_rotl_64(k2, 33); k2 *= c1; h2 ^= k2;
+			k2 *= c2;
+			k2 = hash_rotl_64(k2, 33);
+			k2 *= c1;
+			h2 ^= k2;
 
-			h2 = hash_rotl_64(h2, 31); h2 += h1;
-			h2 = h2*5 + 0x38495ab5;
+			h2 = hash_rotl_64(h2, 31);
+			h2 += h1;
+			h2 = h2 * 5 + 0x38495ab5;
 		}
 	}
 
 	/* tail */
 	{
-		const uint8_t *tail = (const uint8_t*)(data + nblocks*16);
-		uint64_t k1 = 0;
-		uint64_t k2 = 0;
+		const uint8_t *tail = (const uint8_t *)(data + nblocks * 16);
+		uint64_t       k1 = 0;
+		uint64_t       k2 = 0;
 
 		switch (len & 15) {
-		case 15: k2 ^= ((uint64_t)(tail[14])) << 48; JEMALLOC_FALLTHROUGH;
-		case 14: k2 ^= ((uint64_t)(tail[13])) << 40; JEMALLOC_FALLTHROUGH;
-		case 13: k2 ^= ((uint64_t)(tail[12])) << 32; JEMALLOC_FALLTHROUGH;
-		case 12: k2 ^= ((uint64_t)(tail[11])) << 24; JEMALLOC_FALLTHROUGH;
-		case 11: k2 ^= ((uint64_t)(tail[10])) << 16; JEMALLOC_FALLTHROUGH;
-		case 10: k2 ^= ((uint64_t)(tail[ 9])) << 8;  JEMALLOC_FALLTHROUGH;
-		case  9: k2 ^= ((uint64_t)(tail[ 8])) << 0;
-			k2 *= c2; k2 = hash_rotl_64(k2, 33); k2 *= c1; h2 ^= k2;
+		case 15:
+			k2 ^= ((uint64_t)(tail[14])) << 48;
 			JEMALLOC_FALLTHROUGH;
-		case  8: k1 ^= ((uint64_t)(tail[ 7])) << 56; JEMALLOC_FALLTHROUGH;
-		case  7: k1 ^= ((uint64_t)(tail[ 6])) << 48; JEMALLOC_FALLTHROUGH;
-		case  6: k1 ^= ((uint64_t)(tail[ 5])) << 40; JEMALLOC_FALLTHROUGH;
-		case  5: k1 ^= ((uint64_t)(tail[ 4])) << 32; JEMALLOC_FALLTHROUGH;
-		case  4: k1 ^= ((uint64_t)(tail[ 3])) << 24; JEMALLOC_FALLTHROUGH;
-		case  3: k1 ^= ((uint64_t)(tail[ 2])) << 16; JEMALLOC_FALLTHROUGH;
-		case  2: k1 ^= ((uint64_t)(tail[ 1])) << 8;  JEMALLOC_FALLTHROUGH;
-		case  1: k1 ^= ((uint64_t)(tail[ 0])) << 0;
-			k1 *= c1; k1 = hash_rotl_64(k1, 31); k1 *= c2; h1 ^= k1;
+		case 14:
+			k2 ^= ((uint64_t)(tail[13])) << 40;
+			JEMALLOC_FALLTHROUGH;
+		case 13:
+			k2 ^= ((uint64_t)(tail[12])) << 32;
+			JEMALLOC_FALLTHROUGH;
+		case 12:
+			k2 ^= ((uint64_t)(tail[11])) << 24;
+			JEMALLOC_FALLTHROUGH;
+		case 11:
+			k2 ^= ((uint64_t)(tail[10])) << 16;
+			JEMALLOC_FALLTHROUGH;
+		case 10:
+			k2 ^= ((uint64_t)(tail[9])) << 8;
+			JEMALLOC_FALLTHROUGH;
+		case 9:
+			k2 ^= ((uint64_t)(tail[8])) << 0;
+			k2 *= c2;
+			k2 = hash_rotl_64(k2, 33);
+			k2 *= c1;
+			h2 ^= k2;
+			JEMALLOC_FALLTHROUGH;
+		case 8:
+			k1 ^= ((uint64_t)(tail[7])) << 56;
+			JEMALLOC_FALLTHROUGH;
+		case 7:
+			k1 ^= ((uint64_t)(tail[6])) << 48;
+			JEMALLOC_FALLTHROUGH;
+		case 6:
+			k1 ^= ((uint64_t)(tail[5])) << 40;
+			JEMALLOC_FALLTHROUGH;
+		case 5:
+			k1 ^= ((uint64_t)(tail[4])) << 32;
+			JEMALLOC_FALLTHROUGH;
+		case 4:
+			k1 ^= ((uint64_t)(tail[3])) << 24;
+			JEMALLOC_FALLTHROUGH;
+		case 3:
+			k1 ^= ((uint64_t)(tail[2])) << 16;
+			JEMALLOC_FALLTHROUGH;
+		case 2:
+			k1 ^= ((uint64_t)(tail[1])) << 8;
+			JEMALLOC_FALLTHROUGH;
+		case 1:
+			k1 ^= ((uint64_t)(tail[0])) << 0;
+			k1 *= c1;
+			k1 = hash_rotl_64(k1, 31);
+			k1 *= c2;
+			h1 ^= k1;
 			break;
 		}
 	}
 
 	/* finalization */
-	h1 ^= len; h2 ^= len;
+	h1 ^= len;
+	h2 ^= len;
 
 	h1 += h2;
 	h2 += h1;
diff --git a/include/jemalloc/internal/hook.h b/include/jemalloc/internal/hook.h
index 76b9130d..bbbcb320 100644
--- a/include/jemalloc/internal/hook.h
+++ b/include/jemalloc/internal/hook.h
@@ -83,7 +83,6 @@ enum hook_dalloc_e {
 };
 typedef enum hook_dalloc_e hook_dalloc_t;
 
-
 enum hook_expand_e {
 	hook_expand_realloc,
 	hook_expand_rallocx,
@@ -91,23 +90,22 @@ enum hook_expand_e {
 };
 typedef enum hook_expand_e hook_expand_t;
 
-typedef void (*hook_alloc)(
-    void *extra, hook_alloc_t type, void *result, uintptr_t result_raw,
-    uintptr_t args_raw[3]);
+typedef void (*hook_alloc)(void *extra, hook_alloc_t type, void *result,
+    uintptr_t result_raw, uintptr_t args_raw[3]);
 
 typedef void (*hook_dalloc)(
     void *extra, hook_dalloc_t type, void *address, uintptr_t args_raw[3]);
 
-typedef void (*hook_expand)(
-    void *extra, hook_expand_t type, void *address, size_t old_usize,
-    size_t new_usize, uintptr_t result_raw, uintptr_t args_raw[4]);
+typedef void (*hook_expand)(void *extra, hook_expand_t type, void *address,
+    size_t old_usize, size_t new_usize, uintptr_t result_raw,
+    uintptr_t args_raw[4]);
 
 typedef struct hooks_s hooks_t;
 struct hooks_s {
-	hook_alloc alloc_hook;
+	hook_alloc  alloc_hook;
 	hook_dalloc dalloc_hook;
 	hook_expand expand_hook;
-	void *extra;
+	void       *extra;
 };
 
 /*
@@ -156,8 +154,8 @@ void hook_remove(tsdn_t *tsdn, void *opaque);
 void hook_invoke_alloc(hook_alloc_t type, void *result, uintptr_t result_raw,
     uintptr_t args_raw[3]);
 
-void hook_invoke_dalloc(hook_dalloc_t type, void *address,
-    uintptr_t args_raw[3]);
+void hook_invoke_dalloc(
+    hook_dalloc_t type, void *address, uintptr_t args_raw[3]);
 
 void hook_invoke_expand(hook_expand_t type, void *address, size_t old_usize,
     size_t new_usize, uintptr_t result_raw, uintptr_t args_raw[4]);
diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index 2e9fccc2..7a6ba0b9 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -27,7 +27,7 @@ struct hpa_central_s {
 	 *
 	 * Guarded by grow_mtx.
 	 */
-	void *eden;
+	void  *eden;
 	size_t eden_len;
 	/* Source for metadata. */
 	base_t *base;
@@ -78,7 +78,7 @@ struct hpa_shard_nonderived_stats_s {
 /* Completely derived; only used by CTL. */
 typedef struct hpa_shard_stats_s hpa_shard_stats_t;
 struct hpa_shard_stats_s {
-	psset_stats_t psset_stats;
+	psset_stats_t                psset_stats;
 	hpa_shard_nonderived_stats_t nonderived_stats;
 };
 
@@ -156,14 +156,15 @@ bool hpa_hugepage_size_exceeds_limit(void);
  * just that it can function properly given the system it's running on.
  */
 bool hpa_supported(void);
-bool hpa_central_init(hpa_central_t *central, base_t *base, const hpa_hooks_t *hooks);
+bool hpa_central_init(
+    hpa_central_t *central, base_t *base, const hpa_hooks_t *hooks);
 bool hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap,
     base_t *base, edata_cache_t *edata_cache, unsigned ind,
     const hpa_shard_opts_t *opts);
 
 void hpa_shard_stats_accum(hpa_shard_stats_t *dst, hpa_shard_stats_t *src);
-void hpa_shard_stats_merge(tsdn_t *tsdn, hpa_shard_t *shard,
-    hpa_shard_stats_t *dst);
+void hpa_shard_stats_merge(
+    tsdn_t *tsdn, hpa_shard_t *shard, hpa_shard_stats_t *dst);
 
 /*
  * Notify the shard that we won't use it for allocations much longer.  Due to
@@ -173,8 +174,8 @@ void hpa_shard_stats_merge(tsdn_t *tsdn, hpa_shard_t *shard,
 void hpa_shard_disable(tsdn_t *tsdn, hpa_shard_t *shard);
 void hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard);
 
-void hpa_shard_set_deferral_allowed(tsdn_t *tsdn, hpa_shard_t *shard,
-    bool deferral_allowed);
+void hpa_shard_set_deferral_allowed(
+    tsdn_t *tsdn, hpa_shard_t *shard, bool deferral_allowed);
 void hpa_shard_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard);
 
 /*
diff --git a/include/jemalloc/internal/hpa_hooks.h b/include/jemalloc/internal/hpa_hooks.h
index d0618f89..f50ff58f 100644
--- a/include/jemalloc/internal/hpa_hooks.h
+++ b/include/jemalloc/internal/hpa_hooks.h
@@ -13,7 +13,7 @@ struct hpa_hooks_s {
 	void (*dehugify)(void *ptr, size_t size);
 	void (*curtime)(nstime_t *r_time, bool first_reading);
 	uint64_t (*ms_since)(nstime_t *r_time);
-	bool (*vectorized_purge)(void* vec, size_t vlen, size_t nbytes);
+	bool (*vectorized_purge)(void *vec, size_t vlen, size_t nbytes);
 };
 
 extern const hpa_hooks_t hpa_hooks_default;
diff --git a/include/jemalloc/internal/hpa_utils.h b/include/jemalloc/internal/hpa_utils.h
index 283510b9..53bcb670 100644
--- a/include/jemalloc/internal/hpa_utils.h
+++ b/include/jemalloc/internal/hpa_utils.h
@@ -8,26 +8,27 @@
 typedef struct iovec hpa_io_vector_t;
 #else
 typedef struct {
-    void *iov_base;
-    size_t iov_len;
+	void  *iov_base;
+	size_t iov_len;
 } hpa_io_vector_t;
 #endif
 
 /* Actually invoke hooks. If we fail vectorized, use single purges */
 static void
 hpa_try_vectorized_purge(
-  hpa_shard_t *shard, hpa_io_vector_t *vec, size_t vlen, size_t nbytes) {
-    bool success = opt_process_madvise_max_batch > 0
-      && !shard->central->hooks.vectorized_purge(vec, vlen, nbytes);
-    if (!success) {
-        /* On failure, it is safe to purge again (potential perf
+    hpa_shard_t *shard, hpa_io_vector_t *vec, size_t vlen, size_t nbytes) {
+	bool success = opt_process_madvise_max_batch > 0
+	    && !shard->central->hooks.vectorized_purge(vec, vlen, nbytes);
+	if (!success) {
+		/* On failure, it is safe to purge again (potential perf
          * penalty) If kernel can tell exactly which regions
          * failed, we could avoid that penalty.
          */
-        for (size_t i = 0; i < vlen; ++i) {
-            shard->central->hooks.purge(vec[i].iov_base, vec[i].iov_len);
-        }
-    }
+		for (size_t i = 0; i < vlen; ++i) {
+			shard->central->hooks.purge(
+			    vec[i].iov_base, vec[i].iov_len);
+		}
+	}
 }
 
 /*
@@ -35,48 +36,48 @@ hpa_try_vectorized_purge(
  * It invokes the hook when batch limit is reached
  */
 typedef struct {
-    hpa_io_vector_t *vp;
-    size_t cur;
-    size_t total_bytes;
-    size_t capacity;
+	hpa_io_vector_t *vp;
+	size_t           cur;
+	size_t           total_bytes;
+	size_t           capacity;
 } hpa_range_accum_t;
 
 static inline void
 hpa_range_accum_init(hpa_range_accum_t *ra, hpa_io_vector_t *v, size_t sz) {
-    ra->vp = v;
-    ra->capacity = sz;
-    ra->total_bytes = 0;
-    ra->cur = 0;
+	ra->vp = v;
+	ra->capacity = sz;
+	ra->total_bytes = 0;
+	ra->cur = 0;
 }
 
 static inline void
 hpa_range_accum_flush(hpa_range_accum_t *ra, hpa_shard_t *shard) {
-    assert(ra->total_bytes > 0 && ra->cur > 0);
-    hpa_try_vectorized_purge(shard, ra->vp, ra->cur, ra->total_bytes);
-    ra->cur = 0;
-    ra->total_bytes = 0;
+	assert(ra->total_bytes > 0 && ra->cur > 0);
+	hpa_try_vectorized_purge(shard, ra->vp, ra->cur, ra->total_bytes);
+	ra->cur = 0;
+	ra->total_bytes = 0;
 }
 
 static inline void
 hpa_range_accum_add(
-  hpa_range_accum_t *ra, void *addr, size_t sz, hpa_shard_t *shard) {
-    assert(ra->cur < ra->capacity);
+    hpa_range_accum_t *ra, void *addr, size_t sz, hpa_shard_t *shard) {
+	assert(ra->cur < ra->capacity);
 
-    ra->vp[ra->cur].iov_base = addr;
-    ra->vp[ra->cur].iov_len = sz;
-    ra->total_bytes += sz;
-    ra->cur++;
+	ra->vp[ra->cur].iov_base = addr;
+	ra->vp[ra->cur].iov_len = sz;
+	ra->total_bytes += sz;
+	ra->cur++;
 
-    if (ra->cur == ra->capacity) {
-        hpa_range_accum_flush(ra, shard);
-    }
+	if (ra->cur == ra->capacity) {
+		hpa_range_accum_flush(ra, shard);
+	}
 }
 
 static inline void
 hpa_range_accum_finish(hpa_range_accum_t *ra, hpa_shard_t *shard) {
-    if (ra->cur > 0) {
-        hpa_range_accum_flush(ra, shard);
-    }
+	if (ra->cur > 0) {
+		hpa_range_accum_flush(ra, shard);
+	}
 }
 
 /*
@@ -84,14 +85,14 @@ hpa_range_accum_finish(hpa_range_accum_t *ra, hpa_shard_t *shard) {
  */
 typedef struct {
 	hpdata_purge_state_t state;
-	hpdata_t *hp;
-	bool dehugify;
+	hpdata_t            *hp;
+	bool                 dehugify;
 } hpa_purge_item_t;
 
 typedef struct hpa_purge_batch_s hpa_purge_batch_t;
 struct hpa_purge_batch_s {
 	hpa_purge_item_t *items;
-	size_t items_capacity;
+	size_t            items_capacity;
 	/* Number of huge pages to purge in current batch */
 	size_t item_cnt;
 	/* Number of ranges to purge in current batch */
diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index a8a4a552..75550f9b 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -73,7 +73,7 @@ struct hpdata_s {
 	bool h_hugify_allowed;
 	/* When we became a hugification candidate. */
 	nstime_t h_time_hugify_allowed;
-	bool h_in_psset_hugify_container;
+	bool     h_in_psset_hugify_container;
 
 	/* Whether or not a purge or hugify is currently happening. */
 	bool h_mid_purge;
@@ -186,8 +186,8 @@ hpdata_purge_allowed_get(const hpdata_t *hpdata) {
 
 static inline void
 hpdata_purge_allowed_set(hpdata_t *hpdata, bool purge_allowed) {
-       assert(purge_allowed == false || !hpdata->h_mid_purge);
-       hpdata->h_purge_allowed = purge_allowed;
+	assert(purge_allowed == false || !hpdata->h_mid_purge);
+	hpdata->h_purge_allowed = purge_allowed;
 }
 
 static inline bool
@@ -250,7 +250,6 @@ hpdata_changing_state_get(const hpdata_t *hpdata) {
 	return hpdata->h_mid_purge || hpdata->h_mid_hugify;
 }
 
-
 static inline bool
 hpdata_updating_get(const hpdata_t *hpdata) {
 	return hpdata->h_updating;
@@ -317,7 +316,7 @@ hpdata_assert_empty(hpdata_t *hpdata) {
  */
 static inline bool
 hpdata_consistent(hpdata_t *hpdata) {
-	if(fb_urange_longest(hpdata->active_pages, HUGEPAGE_PAGES)
+	if (fb_urange_longest(hpdata->active_pages, HUGEPAGE_PAGES)
 	    != hpdata_longest_free_range_get(hpdata)) {
 		return false;
 	}
@@ -368,7 +367,7 @@ void hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age);
  * offset within that allocation.
  */
 void *hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz);
-void hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz);
+void  hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz);
 
 /*
  * The hpdata_purge_prepare_t allows grabbing the metadata required to purge
@@ -377,10 +376,10 @@ void hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz);
  */
 typedef struct hpdata_purge_state_s hpdata_purge_state_t;
 struct hpdata_purge_state_s {
-	size_t npurged;
-	size_t ndirty_to_purge;
+	size_t     npurged;
+	size_t     ndirty_to_purge;
 	fb_group_t to_purge[FB_NGROUPS(HUGEPAGE_PAGES)];
-	size_t next_purge_search_begin;
+	size_t     next_purge_search_begin;
 };
 
 /*
@@ -398,8 +397,8 @@ struct hpdata_purge_state_s {
  * Returns the number of dirty pages that will be purged and sets nranges
  * to number of ranges with dirty pages that will be purged.
  */
-size_t hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
-    size_t *nranges);
+size_t hpdata_purge_begin(
+    hpdata_t *hpdata, hpdata_purge_state_t *purge_state, size_t *nranges);
 
 /*
  * If there are more extents to purge, sets *r_purge_addr and *r_purge_size to
diff --git a/include/jemalloc/internal/inspect.h b/include/jemalloc/internal/inspect.h
index 0da920ca..e8ed44d3 100644
--- a/include/jemalloc/internal/inspect.h
+++ b/include/jemalloc/internal/inspect.h
@@ -26,7 +26,7 @@ typedef struct inspect_extent_util_stats_verbose_s
     inspect_extent_util_stats_verbose_t;
 
 struct inspect_extent_util_stats_verbose_s {
-	void *slabcur_addr;
+	void  *slabcur_addr;
 	size_t nfree;
 	size_t nregs;
 	size_t size;
@@ -34,10 +34,10 @@ struct inspect_extent_util_stats_verbose_s {
 	size_t bin_nregs;
 };
 
-void inspect_extent_util_stats_get(tsdn_t *tsdn, const void *ptr,
-    size_t *nfree, size_t *nregs, size_t *size);
+void inspect_extent_util_stats_get(
+    tsdn_t *tsdn, const void *ptr, size_t *nfree, size_t *nregs, size_t *size);
 void inspect_extent_util_stats_verbose_get(tsdn_t *tsdn, const void *ptr,
-    size_t *nfree, size_t *nregs, size_t *size,
-    size_t *bin_nfree, size_t *bin_nregs, void **slabcur_addr);
+    size_t *nfree, size_t *nregs, size_t *size, size_t *bin_nfree,
+    size_t *bin_nregs, void **slabcur_addr);
 
 #endif /* JEMALLOC_INTERNAL_INSPECT_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_decls.h b/include/jemalloc/internal/jemalloc_internal_decls.h
index 0bca9133..2ca12c4a 100644
--- a/include/jemalloc/internal/jemalloc_internal_decls.h
+++ b/include/jemalloc/internal/jemalloc_internal_decls.h
@@ -3,64 +3,65 @@
 
 #include <math.h>
 #ifdef _WIN32
-#  include <windows.h>
-#  include "msvc_compat/windows_extra.h"
-#  include "msvc_compat/strings.h"
-#  ifdef _WIN64
-#    if LG_VADDR <= 32
-#      error Generate the headers using x64 vcargs
-#    endif
-#  else
-#    if LG_VADDR > 32
-#      undef LG_VADDR
-#      define LG_VADDR 32
-#    endif
-#  endif
+#	include <windows.h>
+#	include "msvc_compat/windows_extra.h"
+#	include "msvc_compat/strings.h"
+#	ifdef _WIN64
+#		if LG_VADDR <= 32
+#			error Generate the headers using x64 vcargs
+#		endif
+#	else
+#		if LG_VADDR > 32
+#			undef LG_VADDR
+#			define LG_VADDR 32
+#		endif
+#	endif
 #else
-#  include <sys/param.h>
-#  include <sys/mman.h>
-#  if !defined(__pnacl__) && !defined(__native_client__)
-#    include <sys/syscall.h>
-#    if !defined(SYS_write) && defined(__NR_write)
-#      define SYS_write __NR_write
-#    endif
-#    if defined(SYS_open) && defined(__aarch64__)
-       /* Android headers may define SYS_open to __NR_open even though
+#	include <sys/param.h>
+#	include <sys/mman.h>
+#	if !defined(__pnacl__) && !defined(__native_client__)
+#		include <sys/syscall.h>
+#		if !defined(SYS_write) && defined(__NR_write)
+#			define SYS_write __NR_write
+#		endif
+#		if defined(SYS_open) && defined(__aarch64__)
+/* Android headers may define SYS_open to __NR_open even though
         * __NR_open may not exist on AArch64 (superseded by __NR_openat). */
-#      undef SYS_open
-#    endif
-#    include <sys/uio.h>
-#  endif
-#  include <pthread.h>
-#  if defined(__FreeBSD__) || defined(__DragonFly__) || defined(__OpenBSD__)
-#  include <pthread_np.h>
-#  include <sched.h>
-#  if defined(__FreeBSD__)
-#    define cpu_set_t cpuset_t
-#  endif
-#  endif
-#  include <signal.h>
-#  ifdef JEMALLOC_OS_UNFAIR_LOCK
-#    include <os/lock.h>
-#  endif
-#  ifdef JEMALLOC_GLIBC_MALLOC_HOOK
-#    include <sched.h>
-#  endif
-#  include <errno.h>
-#  include <sys/time.h>
-#  include <time.h>
-#  ifdef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME
-#    include <mach/mach_time.h>
-#  endif
+#			undef SYS_open
+#		endif
+#		include <sys/uio.h>
+#	endif
+#	include <pthread.h>
+#	if defined(__FreeBSD__) || defined(__DragonFly__)                     \
+	    || defined(__OpenBSD__)
+#		include <pthread_np.h>
+#		include <sched.h>
+#		if defined(__FreeBSD__)
+#			define cpu_set_t cpuset_t
+#		endif
+#	endif
+#	include <signal.h>
+#	ifdef JEMALLOC_OS_UNFAIR_LOCK
+#		include <os/lock.h>
+#	endif
+#	ifdef JEMALLOC_GLIBC_MALLOC_HOOK
+#		include <sched.h>
+#	endif
+#	include <errno.h>
+#	include <sys/time.h>
+#	include <time.h>
+#	ifdef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME
+#		include <mach/mach_time.h>
+#	endif
 #endif
 #include <sys/types.h>
 
 #include <limits.h>
 #ifndef SIZE_T_MAX
-#  define SIZE_T_MAX	SIZE_MAX
+#	define SIZE_T_MAX SIZE_MAX
 #endif
 #ifndef SSIZE_MAX
-#  define SSIZE_MAX	((ssize_t)(SIZE_T_MAX >> 1))
+#	define SSIZE_MAX ((ssize_t)(SIZE_T_MAX >> 1))
 #endif
 #include <stdarg.h>
 #include <stdbool.h>
@@ -69,30 +70,30 @@
 #include <stdint.h>
 #include <stddef.h>
 #ifndef offsetof
-#  define offsetof(type, member)	((size_t)&(((type *)NULL)->member))
+#	define offsetof(type, member) ((size_t) & (((type *)NULL)->member))
 #endif
 #include <string.h>
 #include <strings.h>
 #include <ctype.h>
 #ifdef _MSC_VER
-#  include <io.h>
+#	include <io.h>
 typedef intptr_t ssize_t;
-#  define PATH_MAX 1024
-#  define STDERR_FILENO 2
-#  define __func__ __FUNCTION__
-#  ifdef JEMALLOC_HAS_RESTRICT
-#    define restrict __restrict
-#  endif
+#	define PATH_MAX 1024
+#	define STDERR_FILENO 2
+#	define __func__ __FUNCTION__
+#	ifdef JEMALLOC_HAS_RESTRICT
+#		define restrict __restrict
+#	endif
 /* Disable warnings about deprecated system functions. */
-#  pragma warning(disable: 4996)
-#if _MSC_VER < 1800
+#	pragma warning(disable : 4996)
+#	if _MSC_VER < 1800
 static int
 isblank(int c) {
 	return (c == '\t' || c == ' ');
 }
-#endif
+#	endif
 #else
-#  include <unistd.h>
+#	include <unistd.h>
 #endif
 #include <fcntl.h>
 
@@ -102,7 +103,7 @@ isblank(int c) {
  * classes.
  */
 #ifdef small
-#  undef small
+#	undef small
 #endif
 
 /*
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index 3b42f833..b502c7e7 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -12,34 +12,34 @@
 extern bool malloc_slow;
 
 /* Run-time options. */
-extern bool opt_abort;
-extern bool opt_abort_conf;
-extern bool opt_trust_madvise;
-extern bool opt_confirm_conf;
-extern bool opt_hpa;
+extern bool             opt_abort;
+extern bool             opt_abort_conf;
+extern bool             opt_trust_madvise;
+extern bool             opt_confirm_conf;
+extern bool             opt_hpa;
 extern hpa_shard_opts_t opt_hpa_opts;
-extern sec_opts_t opt_hpa_sec_opts;
+extern sec_opts_t       opt_hpa_sec_opts;
 
 extern const char *opt_junk;
-extern bool opt_junk_alloc;
-extern bool opt_junk_free;
+extern bool        opt_junk_alloc;
+extern bool        opt_junk_free;
 extern void (*JET_MUTABLE junk_free_callback)(void *ptr, size_t size);
 extern void (*JET_MUTABLE junk_alloc_callback)(void *ptr, size_t size);
 extern void (*JET_MUTABLE invalid_conf_abort)(void);
-extern bool opt_utrace;
-extern bool opt_xmalloc;
-extern bool opt_experimental_infallible_new;
-extern bool opt_experimental_tcache_gc;
-extern bool opt_zero;
-extern unsigned opt_narenas;
+extern bool                  opt_utrace;
+extern bool                  opt_xmalloc;
+extern bool                  opt_experimental_infallible_new;
+extern bool                  opt_experimental_tcache_gc;
+extern bool                  opt_zero;
+extern unsigned              opt_narenas;
 extern zero_realloc_action_t opt_zero_realloc_action;
-extern malloc_init_t malloc_init_state;
-extern const char *const zero_realloc_mode_names[];
-extern atomic_zu_t zero_realloc_count;
-extern bool opt_cache_oblivious;
-extern unsigned opt_debug_double_free_max_scan;
-extern size_t opt_calloc_madvise_threshold;
-extern bool opt_disable_large_size_classes;
+extern malloc_init_t         malloc_init_state;
+extern const char *const     zero_realloc_mode_names[];
+extern atomic_zu_t           zero_realloc_count;
+extern bool                  opt_cache_oblivious;
+extern unsigned              opt_debug_double_free_max_scan;
+extern size_t                opt_calloc_madvise_threshold;
+extern bool                  opt_disable_large_size_classes;
 
 extern const char *opt_malloc_conf_symlink;
 extern const char *opt_malloc_conf_env_var;
@@ -64,24 +64,24 @@ extern atomic_p_t arenas[];
 
 extern unsigned huge_arena_ind;
 
-void *a0malloc(size_t size);
-void a0dalloc(void *ptr);
-void *bootstrap_malloc(size_t size);
-void *bootstrap_calloc(size_t num, size_t size);
-void bootstrap_free(void *ptr);
-void arena_set(unsigned ind, arena_t *arena);
+void    *a0malloc(size_t size);
+void     a0dalloc(void *ptr);
+void    *bootstrap_malloc(size_t size);
+void    *bootstrap_calloc(size_t num, size_t size);
+void     bootstrap_free(void *ptr);
+void     arena_set(unsigned ind, arena_t *arena);
 unsigned narenas_total_get(void);
 arena_t *arena_init(tsdn_t *tsdn, unsigned ind, const arena_config_t *config);
 arena_t *arena_choose_hard(tsd_t *tsd, bool internal);
-void arena_migrate(tsd_t *tsd, arena_t *oldarena, arena_t *newarena);
-void iarena_cleanup(tsd_t *tsd);
-void arena_cleanup(tsd_t *tsd);
-size_t batch_alloc(void **ptrs, size_t num, size_t size, int flags);
-void jemalloc_prefork(void);
-void jemalloc_postfork_parent(void);
-void jemalloc_postfork_child(void);
-void sdallocx_default(void *ptr, size_t size, int flags);
-void free_default(void *ptr);
-void *malloc_default(size_t size);
+void     arena_migrate(tsd_t *tsd, arena_t *oldarena, arena_t *newarena);
+void     iarena_cleanup(tsd_t *tsd);
+void     arena_cleanup(tsd_t *tsd);
+size_t   batch_alloc(void **ptrs, size_t num, size_t size, int flags);
+void     jemalloc_prefork(void);
+void     jemalloc_postfork_parent(void);
+void     jemalloc_postfork_child(void);
+void     sdallocx_default(void *ptr, size_t size, int flags);
+void     free_default(void *ptr);
+void    *malloc_default(size_t size);
 
 #endif /* JEMALLOC_INTERNAL_EXTERNS_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
index 111cda42..8513effd 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -20,12 +20,12 @@ malloc_getcpu(void) {
 	return (malloc_cpuid_t)sched_getcpu();
 #elif defined(JEMALLOC_HAVE_RDTSCP)
 	unsigned int ecx;
-	asm volatile("rdtscp" : "=c" (ecx) :: "eax", "edx");
+	asm volatile("rdtscp" : "=c"(ecx)::"eax", "edx");
 	return (malloc_cpuid_t)(ecx & 0xfff);
 #elif defined(__aarch64__) && defined(__APPLE__)
 	/* Other oses most likely use tpidr_el0 instead */
 	uintptr_t c;
-	asm volatile("mrs %x0, tpidrro_el0" : "=r"(c) :: "memory");
+	asm volatile("mrs %x0, tpidrro_el0" : "=r"(c)::"memory");
 	return (malloc_cpuid_t)(c & (1 << 3) - 1);
 #else
 	not_reached();
@@ -42,8 +42,8 @@ percpu_arena_choose(void) {
 	assert(cpuid >= 0);
 
 	unsigned arena_ind;
-	if ((opt_percpu_arena == percpu_arena) || ((unsigned)cpuid < ncpus /
-	    2)) {
+	if ((opt_percpu_arena == percpu_arena)
+	    || ((unsigned)cpuid < ncpus / 2)) {
 		arena_ind = cpuid;
 	} else {
 		assert(opt_percpu_arena == per_phycpu_arena);
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_b.h b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
index 2ddb4a89..dad37a9c 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_b.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
@@ -24,13 +24,12 @@ percpu_arena_update(tsd_t *tsd, unsigned cpu) {
 		if (tcache != NULL) {
 			tcache_slow_t *tcache_slow = tsd_tcache_slowp_get(tsd);
 			assert(tcache_slow->arena != NULL);
-			tcache_arena_reassociate(tsd_tsdn(tsd), tcache_slow,
-			    tcache, newarena);
+			tcache_arena_reassociate(
+			    tsd_tsdn(tsd), tcache_slow, tcache, newarena);
 		}
 	}
 }
 
-
 /* Choose an arena based on a per-thread value. */
 static inline arena_t *
 arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal) {
@@ -51,18 +50,18 @@ arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal) {
 		assert(ret);
 		if (tcache_available(tsd)) {
 			tcache_slow_t *tcache_slow = tsd_tcache_slowp_get(tsd);
-			tcache_t *tcache = tsd_tcachep_get(tsd);
+			tcache_t      *tcache = tsd_tcachep_get(tsd);
 			if (tcache_slow->arena != NULL) {
 				/* See comments in tsd_tcache_data_init().*/
-				assert(tcache_slow->arena ==
-				    arena_get(tsd_tsdn(tsd), 0, false));
+				assert(tcache_slow->arena
+				    == arena_get(tsd_tsdn(tsd), 0, false));
 				if (tcache_slow->arena != ret) {
 					tcache_arena_reassociate(tsd_tsdn(tsd),
 					    tcache_slow, tcache, ret);
 				}
 			} else {
-				tcache_arena_associate(tsd_tsdn(tsd),
-				    tcache_slow, tcache, ret);
+				tcache_arena_associate(
+				    tsd_tsdn(tsd), tcache_slow, tcache, ret);
 			}
 		}
 	}
@@ -72,10 +71,10 @@ arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal) {
 	 * auto percpu arena range, (i.e. thread is assigned to a manually
 	 * managed arena), then percpu arena is skipped.
 	 */
-	if (have_percpu_arena && PERCPU_ARENA_ENABLED(opt_percpu_arena) &&
-	    !internal && (arena_ind_get(ret) <
-	    percpu_arena_ind_limit(opt_percpu_arena)) && (ret->last_thd !=
-	    tsd_tsdn(tsd))) {
+	if (have_percpu_arena && PERCPU_ARENA_ENABLED(opt_percpu_arena)
+	    && !internal
+	    && (arena_ind_get(ret) < percpu_arena_ind_limit(opt_percpu_arena))
+	    && (ret->last_thd != tsd_tsdn(tsd))) {
 		unsigned ind = percpu_arena_choose();
 		if (arena_ind_get(ret) != ind) {
 			percpu_arena_update(tsd, ind);
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index 39c196a5..2c61f8c4 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -63,11 +63,12 @@ iallocztm_explicit_slab(tsdn_t *tsdn, size_t size, szind_t ind, bool zero,
 	assert(!is_internal || tcache == NULL);
 	assert(!is_internal || arena == NULL || arena_is_auto(arena));
 	if (!tsdn_null(tsdn) && tsd_reentrancy_level_get(tsdn_tsd(tsdn)) == 0) {
-		witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-		    WITNESS_RANK_CORE, 0);
+		witness_assert_depth_to_rank(
+		    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 	}
 
-	ret = arena_malloc(tsdn, arena, size, ind, zero, slab, tcache, slow_path);
+	ret = arena_malloc(
+	    tsdn, arena, size, ind, zero, slab, tcache, slow_path);
 	if (config_stats && is_internal && likely(ret != NULL)) {
 		arena_internal_add(iaalloc(tsdn, ret), isalloc(tsdn, ret));
 	}
@@ -78,8 +79,8 @@ JEMALLOC_ALWAYS_INLINE void *
 iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache,
     bool is_internal, arena_t *arena, bool slow_path) {
 	bool slab = sz_can_use_slab(size);
-	return iallocztm_explicit_slab(tsdn, size, ind, zero, slab, tcache,
-	    is_internal, arena, slow_path);
+	return iallocztm_explicit_slab(
+	    tsdn, size, ind, zero, slab, tcache, is_internal, arena, slow_path);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
@@ -89,8 +90,8 @@ ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero, bool slow_path) {
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-ipallocztm_explicit_slab(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
-    bool slab, tcache_t *tcache, bool is_internal, arena_t *arena) {
+ipallocztm_explicit_slab(tsdn_t *tsdn, size_t usize, size_t alignment,
+    bool zero, bool slab, tcache_t *tcache, bool is_internal, arena_t *arena) {
 	void *ret;
 
 	assert(!slab || sz_can_use_slab(usize)); /* slab && large is illegal */
@@ -98,8 +99,8 @@ ipallocztm_explicit_slab(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero
 	assert(usize == sz_sa2u(usize, alignment));
 	assert(!is_internal || tcache == NULL);
 	assert(!is_internal || arena == NULL || arena_is_auto(arena));
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 
 	ret = arena_palloc(tsdn, arena, usize, alignment, zero, slab, tcache);
 	assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret);
@@ -123,10 +124,10 @@ ipalloct(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-ipalloct_explicit_slab(tsdn_t *tsdn, size_t usize, size_t alignment,
-    bool zero, bool slab, tcache_t *tcache, arena_t *arena) {
-	return ipallocztm_explicit_slab(tsdn, usize, alignment, zero, slab,
-	    tcache, false, arena);
+ipalloct_explicit_slab(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
+    bool slab, tcache_t *tcache, arena_t *arena) {
+	return ipallocztm_explicit_slab(
+	    tsdn, usize, alignment, zero, slab, tcache, false, arena);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
@@ -146,13 +147,13 @@ idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 	assert(ptr != NULL);
 	assert(!is_internal || tcache == NULL);
 	assert(!is_internal || arena_is_auto(iaalloc(tsdn, ptr)));
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 	if (config_stats && is_internal) {
 		arena_internal_sub(iaalloc(tsdn, ptr), isalloc(tsdn, ptr));
 	}
-	if (!is_internal && !tsdn_null(tsdn) &&
-	    tsd_reentrancy_level_get(tsdn_tsd(tsdn)) != 0) {
+	if (!is_internal && !tsdn_null(tsdn)
+	    && tsd_reentrancy_level_get(tsdn_tsd(tsdn)) != 0) {
 		assert(tcache == NULL);
 	}
 	arena_dalloc(tsdn, ptr, tcache, alloc_ctx, slow_path);
@@ -166,8 +167,8 @@ idalloc(tsd_t *tsd, void *ptr) {
 JEMALLOC_ALWAYS_INLINE void
 isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
     emap_alloc_ctx_t *alloc_ctx, bool slow_path) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 	arena_sdalloc(tsdn, ptr, size, tcache, alloc_ctx, slow_path);
 }
 
@@ -175,17 +176,17 @@ JEMALLOC_ALWAYS_INLINE void *
 iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
     size_t alignment, bool zero, bool slab, tcache_t *tcache, arena_t *arena,
     hook_ralloc_args_t *hook_args) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
-	void *p;
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
+	void  *p;
 	size_t usize, copysize;
 
 	usize = sz_sa2u(size, alignment);
 	if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) {
 		return NULL;
 	}
-	p = ipalloct_explicit_slab(tsdn, usize, alignment, zero, slab,
-	    tcache, arena);
+	p = ipalloct_explicit_slab(
+	    tsdn, usize, alignment, zero, slab, tcache, arena);
 	if (p == NULL) {
 		return NULL;
 	}
@@ -195,11 +196,12 @@ iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
 	 */
 	copysize = (size < oldsize) ? size : oldsize;
 	memcpy(p, ptr, copysize);
-	hook_invoke_alloc(hook_args->is_realloc
-	    ? hook_alloc_realloc : hook_alloc_rallocx, p, (uintptr_t)p,
-	    hook_args->args);
-	hook_invoke_dalloc(hook_args->is_realloc
-	    ? hook_dalloc_realloc : hook_dalloc_rallocx, ptr, hook_args->args);
+	hook_invoke_alloc(
+	    hook_args->is_realloc ? hook_alloc_realloc : hook_alloc_rallocx, p,
+	    (uintptr_t)p, hook_args->args);
+	hook_invoke_dalloc(
+	    hook_args->is_realloc ? hook_dalloc_realloc : hook_dalloc_rallocx,
+	    ptr, hook_args->args);
 	isdalloct(tsdn, ptr, oldsize, tcache, NULL, true);
 	return p;
 }
@@ -214,15 +216,14 @@ iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
 JEMALLOC_ALWAYS_INLINE void *
 iralloct_explicit_slab(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
     size_t alignment, bool zero, bool slab, tcache_t *tcache, arena_t *arena,
-    hook_ralloc_args_t *hook_args)
-{
+    hook_ralloc_args_t *hook_args) {
 	assert(ptr != NULL);
 	assert(size != 0);
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 
-	if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1))
-	    != 0) {
+	if (alignment != 0
+	    && ((uintptr_t)ptr & ((uintptr_t)alignment - 1)) != 0) {
 		/*
 		 * Existing object alignment is inadequate; allocate new space
 		 * and copy.
@@ -238,8 +239,7 @@ iralloct_explicit_slab(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
 JEMALLOC_ALWAYS_INLINE void *
 iralloct(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t alignment,
     size_t usize, bool zero, tcache_t *tcache, arena_t *arena,
-    hook_ralloc_args_t *hook_args)
-{
+    hook_ralloc_args_t *hook_args) {
 	bool slab = sz_can_use_slab(usize);
 	return iralloct_explicit_slab(tsdn, ptr, oldsize, size, alignment, zero,
 	    slab, tcache, arena, hook_args);
@@ -257,23 +257,23 @@ ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t extra,
     size_t alignment, bool zero, size_t *newsize) {
 	assert(ptr != NULL);
 	assert(size != 0);
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 
-	if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1))
-	    != 0) {
+	if (alignment != 0
+	    && ((uintptr_t)ptr & ((uintptr_t)alignment - 1)) != 0) {
 		/* Existing object alignment is inadequate. */
 		*newsize = oldsize;
 		return true;
 	}
 
-	return arena_ralloc_no_move(tsdn, ptr, oldsize, size, extra, zero,
-	    newsize);
+	return arena_ralloc_no_move(
+	    tsdn, ptr, oldsize, size, extra, zero, newsize);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-fastpath_success_finish(tsd_t *tsd, uint64_t allocated_after,
-    cache_bin_t *bin, void *ret) {
+fastpath_success_finish(
+    tsd_t *tsd, uint64_t allocated_after, cache_bin_t *bin, void *ret) {
 	thread_allocated_set(tsd, allocated_after);
 	if (config_stats) {
 		bin->tstats.nrequests++;
@@ -331,8 +331,8 @@ imalloc_fastpath(size_t size, void *(fallback_alloc)(size_t)) {
 	sz_size2index_usize_fastpath(size, &ind, &usize);
 	/* Fast path relies on size being a bin. */
 	assert(ind < SC_NBINS);
-	assert((SC_LOOKUP_MAXCLASS < SC_SMALL_MAXCLASS) &&
-	    (size <= SC_SMALL_MAXCLASS));
+	assert((SC_LOOKUP_MAXCLASS < SC_SMALL_MAXCLASS)
+	    && (size <= SC_SMALL_MAXCLASS));
 
 	uint64_t allocated, threshold;
 	te_malloc_fastpath_ctx(tsd, &allocated, &threshold);
@@ -363,7 +363,7 @@ imalloc_fastpath(size_t size, void *(fallback_alloc)(size_t)) {
 	cache_bin_t *bin = &tcache->bins[ind];
 	/* Suppress spurious warning from static analysis */
 	assert(bin != NULL);
-	bool tcache_success;
+	bool  tcache_success;
 	void *ret;
 
 	/*
@@ -388,56 +388,56 @@ imalloc_fastpath(size_t size, void *(fallback_alloc)(size_t)) {
 
 JEMALLOC_ALWAYS_INLINE tcache_t *
 tcache_get_from_ind(tsd_t *tsd, unsigned tcache_ind, bool slow, bool is_alloc) {
-        tcache_t *tcache;
-        if (tcache_ind == TCACHE_IND_AUTOMATIC) {
-                if (likely(!slow)) {
-                        /* Getting tcache ptr unconditionally. */
-                        tcache = tsd_tcachep_get(tsd);
-                        assert(tcache == tcache_get(tsd));
-                } else if (is_alloc ||
-                    likely(tsd_reentrancy_level_get(tsd) == 0)) {
-                        tcache = tcache_get(tsd);
-                } else {
-                        tcache = NULL;
-                }
-        } else {
-                /*
+	tcache_t *tcache;
+	if (tcache_ind == TCACHE_IND_AUTOMATIC) {
+		if (likely(!slow)) {
+			/* Getting tcache ptr unconditionally. */
+			tcache = tsd_tcachep_get(tsd);
+			assert(tcache == tcache_get(tsd));
+		} else if (is_alloc
+		    || likely(tsd_reentrancy_level_get(tsd) == 0)) {
+			tcache = tcache_get(tsd);
+		} else {
+			tcache = NULL;
+		}
+	} else {
+		/*
                  * Should not specify tcache on deallocation path when being
                  * reentrant.
                  */
-                assert(is_alloc || tsd_reentrancy_level_get(tsd) == 0 ||
-                    tsd_state_nocleanup(tsd));
-                if (tcache_ind == TCACHE_IND_NONE) {
-                        tcache = NULL;
-                } else {
-                        tcache = tcaches_get(tsd, tcache_ind);
-                }
-        }
-        return tcache;
+		assert(is_alloc || tsd_reentrancy_level_get(tsd) == 0
+		    || tsd_state_nocleanup(tsd));
+		if (tcache_ind == TCACHE_IND_NONE) {
+			tcache = NULL;
+		} else {
+			tcache = tcaches_get(tsd, tcache_ind);
+		}
+	}
+	return tcache;
 }
 
 JEMALLOC_ALWAYS_INLINE bool
 maybe_check_alloc_ctx(tsd_t *tsd, void *ptr, emap_alloc_ctx_t *alloc_ctx) {
-        if (config_opt_size_checks) {
-                emap_alloc_ctx_t dbg_ctx;
-                emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr,
-                    &dbg_ctx);
-                if (alloc_ctx->szind != dbg_ctx.szind) {
-                        safety_check_fail_sized_dealloc(
-                            /* current_dealloc */ true, ptr,
-                            /* true_size */ emap_alloc_ctx_usize_get(&dbg_ctx),
-                            /* input_size */ emap_alloc_ctx_usize_get(
-                            alloc_ctx));
-                        return true;
-                }
-                if (alloc_ctx->slab != dbg_ctx.slab) {
-                        safety_check_fail(
-                            "Internal heap corruption detected: "
-                            "mismatch in slab bit");
-                        return true;
-                }
-        }
-        return false;
+	if (config_opt_size_checks) {
+		emap_alloc_ctx_t dbg_ctx;
+		emap_alloc_ctx_lookup(
+		    tsd_tsdn(tsd), &arena_emap_global, ptr, &dbg_ctx);
+		if (alloc_ctx->szind != dbg_ctx.szind) {
+			safety_check_fail_sized_dealloc(
+			    /* current_dealloc */ true, ptr,
+			    /* true_size */ emap_alloc_ctx_usize_get(&dbg_ctx),
+			    /* input_size */
+			    emap_alloc_ctx_usize_get(alloc_ctx));
+			return true;
+		}
+		if (alloc_ctx->slab != dbg_ctx.slab) {
+			safety_check_fail(
+			    "Internal heap corruption detected: "
+			    "mismatch in slab bit");
+			return true;
+		}
+	}
+	return false;
 }
 
 JEMALLOC_ALWAYS_INLINE bool
@@ -447,7 +447,7 @@ prof_sample_aligned(const void *ptr) {
 
 JEMALLOC_ALWAYS_INLINE bool
 free_fastpath_nonfast_aligned(void *ptr, bool check_prof) {
-        /*
+	/*
          * free_fastpath do not handle two uncommon cases: 1) sampled profiled
          * objects and 2) sampled junk & stash for use-after-free detection.
          * Both have special alignments which are used to escape the fastpath.
@@ -456,144 +456,145 @@ free_fastpath_nonfast_aligned(void *ptr, bool check_prof) {
          * are enabled (the assertion below).  Avoiding redundant checks since
          * this is on the fastpath -- at most one runtime branch from this.
          */
-        if (config_debug && cache_bin_nonfast_aligned(ptr)) {
-                assert(prof_sample_aligned(ptr));
-        }
+	if (config_debug && cache_bin_nonfast_aligned(ptr)) {
+		assert(prof_sample_aligned(ptr));
+	}
 
-        if (config_prof && check_prof) {
-                /* When prof is enabled, the prof_sample alignment is enough. */
-                if (prof_sample_aligned(ptr)) {
-                        return true;
-                } else {
-                        return false;
-                }
-        }
+	if (config_prof && check_prof) {
+		/* When prof is enabled, the prof_sample alignment is enough. */
+		if (prof_sample_aligned(ptr)) {
+			return true;
+		} else {
+			return false;
+		}
+	}
 
-        if (config_uaf_detection) {
-                if (cache_bin_nonfast_aligned(ptr)) {
-                        return true;
-                } else {
-                        return false;
-                }
-        }
+	if (config_uaf_detection) {
+		if (cache_bin_nonfast_aligned(ptr)) {
+			return true;
+		} else {
+			return false;
+		}
+	}
 
-        return false;
+	return false;
 }
 
 /* Returns whether or not the free attempt was successful. */
 JEMALLOC_ALWAYS_INLINE
-bool free_fastpath(void *ptr, size_t size, bool size_hint) {
-        tsd_t *tsd = tsd_get(false);
-        /* The branch gets optimized away unless tsd_get_allocates(). */
-        if (unlikely(tsd == NULL)) {
-                return false;
-        }
-        /*
+bool
+free_fastpath(void *ptr, size_t size, bool size_hint) {
+	tsd_t *tsd = tsd_get(false);
+	/* The branch gets optimized away unless tsd_get_allocates(). */
+	if (unlikely(tsd == NULL)) {
+		return false;
+	}
+	/*
          *  The tsd_fast() / initialized checks are folded into the branch
          *  testing (deallocated_after >= threshold) later in this function.
          *  The threshold will be set to 0 when !tsd_fast.
          */
-        assert(tsd_fast(tsd) ||
-            *tsd_thread_deallocated_next_event_fastp_get_unsafe(tsd) == 0);
+	assert(tsd_fast(tsd)
+	    || *tsd_thread_deallocated_next_event_fastp_get_unsafe(tsd) == 0);
 
-        emap_alloc_ctx_t alloc_ctx JEMALLOC_CC_SILENCE_INIT({0, 0, false});
-	size_t usize;
-        if (!size_hint) {
-                bool err = emap_alloc_ctx_try_lookup_fast(tsd,
-                    &arena_emap_global, ptr, &alloc_ctx);
+	emap_alloc_ctx_t alloc_ctx JEMALLOC_CC_SILENCE_INIT({0, 0, false});
+	size_t                     usize;
+	if (!size_hint) {
+		bool err = emap_alloc_ctx_try_lookup_fast(
+		    tsd, &arena_emap_global, ptr, &alloc_ctx);
 
-                /* Note: profiled objects will have alloc_ctx.slab set */
-                if (unlikely(err || !alloc_ctx.slab ||
-                    free_fastpath_nonfast_aligned(ptr,
-                    /* check_prof */ false))) {
-                        return false;
-                }
-                assert(alloc_ctx.szind != SC_NSIZES);
+		/* Note: profiled objects will have alloc_ctx.slab set */
+		if (unlikely(err || !alloc_ctx.slab
+		        || free_fastpath_nonfast_aligned(ptr,
+		            /* check_prof */ false))) {
+			return false;
+		}
+		assert(alloc_ctx.szind != SC_NSIZES);
 		usize = sz_index2size(alloc_ctx.szind);
-        } else {
-                /*
+	} else {
+		/*
                  * Check for both sizes that are too large, and for sampled /
                  * special aligned objects.  The alignment check will also check
                  * for null ptr.
                  */
-                if (unlikely(size > SC_LOOKUP_MAXCLASS ||
-                    free_fastpath_nonfast_aligned(ptr,
-                    /* check_prof */ true))) {
-                        return false;
-                }
+		if (unlikely(size > SC_LOOKUP_MAXCLASS
+		        || free_fastpath_nonfast_aligned(ptr,
+		            /* check_prof */ true))) {
+			return false;
+		}
 		sz_size2index_usize_fastpath(size, &alloc_ctx.szind, &usize);
-                /* Max lookup class must be small. */
-                assert(alloc_ctx.szind < SC_NBINS);
-                /* This is a dead store, except when opt size checking is on. */
-                alloc_ctx.slab = true;
-        }
-        /*
+		/* Max lookup class must be small. */
+		assert(alloc_ctx.szind < SC_NBINS);
+		/* This is a dead store, except when opt size checking is on. */
+		alloc_ctx.slab = true;
+	}
+	/*
          * Currently the fastpath only handles small sizes.  The branch on
          * SC_LOOKUP_MAXCLASS makes sure of it.  This lets us avoid checking
          * tcache szind upper limit (i.e. tcache_max) as well.
          */
-        assert(alloc_ctx.slab);
+	assert(alloc_ctx.slab);
 
-        uint64_t deallocated, threshold;
-        te_free_fastpath_ctx(tsd, &deallocated, &threshold);
+	uint64_t deallocated, threshold;
+	te_free_fastpath_ctx(tsd, &deallocated, &threshold);
 
-        uint64_t deallocated_after = deallocated + usize;
-        /*
+	uint64_t deallocated_after = deallocated + usize;
+	/*
          * Check for events and tsd non-nominal (fast_threshold will be set to
          * 0) in a single branch.  Note that this handles the uninitialized case
          * as well (TSD init will be triggered on the non-fastpath).  Therefore
          * anything depends on a functional TSD (e.g. the alloc_ctx sanity check
          * below) needs to be after this branch.
          */
-        if (unlikely(deallocated_after >= threshold)) {
-                return false;
-        }
-        assert(tsd_fast(tsd));
-        bool fail = maybe_check_alloc_ctx(tsd, ptr, &alloc_ctx);
-        if (fail) {
-                /* See the comment in isfree. */
-                return true;
-        }
+	if (unlikely(deallocated_after >= threshold)) {
+		return false;
+	}
+	assert(tsd_fast(tsd));
+	bool fail = maybe_check_alloc_ctx(tsd, ptr, &alloc_ctx);
+	if (fail) {
+		/* See the comment in isfree. */
+		return true;
+	}
 
-        tcache_t *tcache = tcache_get_from_ind(tsd, TCACHE_IND_AUTOMATIC,
-            /* slow */ false, /* is_alloc */ false);
-        cache_bin_t *bin = &tcache->bins[alloc_ctx.szind];
+	tcache_t    *tcache = tcache_get_from_ind(tsd, TCACHE_IND_AUTOMATIC,
+	       /* slow */ false, /* is_alloc */ false);
+	cache_bin_t *bin = &tcache->bins[alloc_ctx.szind];
 
-        /*
+	/*
          * If junking were enabled, this is where we would do it.  It's not
          * though, since we ensured above that we're on the fast path.  Assert
          * that to double-check.
          */
-        assert(!opt_junk_free);
+	assert(!opt_junk_free);
 
-        if (!cache_bin_dalloc_easy(bin, ptr)) {
-                return false;
-        }
+	if (!cache_bin_dalloc_easy(bin, ptr)) {
+		return false;
+	}
 
-        *tsd_thread_deallocatedp_get(tsd) = deallocated_after;
+	*tsd_thread_deallocatedp_get(tsd) = deallocated_after;
 
-        return true;
+	return true;
 }
 
 JEMALLOC_ALWAYS_INLINE void JEMALLOC_NOTHROW
 je_sdallocx_noflags(void *ptr, size_t size) {
-        if (!free_fastpath(ptr, size, true)) {
-                sdallocx_default(ptr, size, 0);
-        }
+	if (!free_fastpath(ptr, size, true)) {
+		sdallocx_default(ptr, size, 0);
+	}
 }
 
 JEMALLOC_ALWAYS_INLINE void JEMALLOC_NOTHROW
 je_sdallocx_impl(void *ptr, size_t size, int flags) {
-        if (flags != 0 || !free_fastpath(ptr, size, true)) {
-                sdallocx_default(ptr, size, flags);
-        }
+	if (flags != 0 || !free_fastpath(ptr, size, true)) {
+		sdallocx_default(ptr, size, flags);
+	}
 }
 
 JEMALLOC_ALWAYS_INLINE void JEMALLOC_NOTHROW
 je_free_impl(void *ptr) {
-        if (!free_fastpath(ptr, 0, false)) {
-                free_default(ptr);
-        }
+	if (!free_fastpath(ptr, 0, false)) {
+		free_default(ptr);
+	}
 }
 
 #endif /* JEMALLOC_INTERNAL_INLINES_C_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_macros.h b/include/jemalloc/internal/jemalloc_internal_macros.h
index 407e868a..eb1ca119 100644
--- a/include/jemalloc/internal/jemalloc_internal_macros.h
+++ b/include/jemalloc/internal/jemalloc_internal_macros.h
@@ -2,45 +2,46 @@
 #define JEMALLOC_INTERNAL_MACROS_H
 
 #ifdef JEMALLOC_DEBUG
-#  define JEMALLOC_ALWAYS_INLINE static inline
+#	define JEMALLOC_ALWAYS_INLINE static inline
 #else
-#  ifdef _MSC_VER
-#    define JEMALLOC_ALWAYS_INLINE static __forceinline
-#  else
-#    define JEMALLOC_ALWAYS_INLINE JEMALLOC_ATTR(always_inline) static inline
-#  endif
+#	ifdef _MSC_VER
+#		define JEMALLOC_ALWAYS_INLINE static __forceinline
+#	else
+#		define JEMALLOC_ALWAYS_INLINE                                 \
+			JEMALLOC_ATTR(always_inline) static inline
+#	endif
 #endif
 #ifdef _MSC_VER
-#  define inline _inline
+#	define inline _inline
 #endif
 
 #define UNUSED JEMALLOC_ATTR(unused)
 
-#define ZU(z)	((size_t)z)
-#define ZD(z)	((ssize_t)z)
-#define QU(q)	((uint64_t)q)
-#define QD(q)	((int64_t)q)
+#define ZU(z) ((size_t)z)
+#define ZD(z) ((ssize_t)z)
+#define QU(q) ((uint64_t)q)
+#define QD(q) ((int64_t)q)
 
-#define KZU(z)	ZU(z##ULL)
-#define KZD(z)	ZD(z##LL)
-#define KQU(q)	QU(q##ULL)
-#define KQD(q)	QI(q##LL)
+#define KZU(z) ZU(z##ULL)
+#define KZD(z) ZD(z##LL)
+#define KQU(q) QU(q##ULL)
+#define KQD(q) QI(q##LL)
 
 #ifndef __DECONST
-#  define	__DECONST(type, var)	((type)(uintptr_t)(const void *)(var))
+#	define __DECONST(type, var) ((type)(uintptr_t)(const void *)(var))
 #endif
 
 #if !defined(JEMALLOC_HAS_RESTRICT) || defined(__cplusplus)
-#  define restrict
+#	define restrict
 #endif
 
 /* Various function pointers are static and immutable except during testing. */
 #ifdef JEMALLOC_JET
-#  define JET_MUTABLE
-#  define JET_EXTERN extern
+#	define JET_MUTABLE
+#	define JET_EXTERN extern
 #else
-#  define JET_MUTABLE const
-#  define JET_EXTERN static
+#	define JET_MUTABLE const
+#	define JET_EXTERN static
 #endif
 
 #define JEMALLOC_VA_ARGS_HEAD(head, ...) head
@@ -48,91 +49,93 @@
 
 /* Diagnostic suppression macros */
 #if defined(_MSC_VER) && !defined(__clang__)
-#  define JEMALLOC_DIAGNOSTIC_PUSH __pragma(warning(push))
-#  define JEMALLOC_DIAGNOSTIC_POP __pragma(warning(pop))
-#  define JEMALLOC_DIAGNOSTIC_IGNORE(W) __pragma(warning(disable:W))
-#  define JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
-#  define JEMALLOC_DIAGNOSTIC_IGNORE_FRAME_ADDRESS
-#  define JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS
-#  define JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
-#  define JEMALLOC_DIAGNOSTIC_IGNORE_DEPRECATED
-#  define JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
+#	define JEMALLOC_DIAGNOSTIC_PUSH __pragma(warning(push))
+#	define JEMALLOC_DIAGNOSTIC_POP __pragma(warning(pop))
+#	define JEMALLOC_DIAGNOSTIC_IGNORE(W) __pragma(warning(disable : W))
+#	define JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
+#	define JEMALLOC_DIAGNOSTIC_IGNORE_FRAME_ADDRESS
+#	define JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS
+#	define JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
+#	define JEMALLOC_DIAGNOSTIC_IGNORE_DEPRECATED
+#	define JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
 /* #pragma GCC diagnostic first appeared in gcc 4.6. */
-#elif (defined(__GNUC__) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && \
-  (__GNUC_MINOR__ > 5)))) || defined(__clang__)
+#elif (defined(__GNUC__)                                                       \
+    && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 5))))          \
+    || defined(__clang__)
 /*
  * The JEMALLOC_PRAGMA__ macro is an implementation detail of the GCC and Clang
  * diagnostic suppression macros and should not be used anywhere else.
  */
-#  define JEMALLOC_PRAGMA__(X) _Pragma(#X)
-#  define JEMALLOC_DIAGNOSTIC_PUSH JEMALLOC_PRAGMA__(GCC diagnostic push)
-#  define JEMALLOC_DIAGNOSTIC_POP JEMALLOC_PRAGMA__(GCC diagnostic pop)
-#  define JEMALLOC_DIAGNOSTIC_IGNORE(W) \
-     JEMALLOC_PRAGMA__(GCC diagnostic ignored W)
+#	define JEMALLOC_PRAGMA__(X) _Pragma(#X)
+#	define JEMALLOC_DIAGNOSTIC_PUSH JEMALLOC_PRAGMA__(GCC diagnostic push)
+#	define JEMALLOC_DIAGNOSTIC_POP JEMALLOC_PRAGMA__(GCC diagnostic pop)
+#	define JEMALLOC_DIAGNOSTIC_IGNORE(W)                                  \
+		JEMALLOC_PRAGMA__(GCC diagnostic ignored W)
 
 /*
  * The -Wmissing-field-initializers warning is buggy in GCC versions < 5.1 and
  * all clang versions up to version 7 (currently trunk, unreleased).  This macro
  * suppresses the warning for the affected compiler versions only.
  */
-#  if ((defined(__GNUC__) && !defined(__clang__)) && (__GNUC__ < 5)) || \
-     defined(__clang__)
-#    define JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS  \
-          JEMALLOC_DIAGNOSTIC_IGNORE("-Wmissing-field-initializers")
-#  else
-#    define JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
-#  endif
+#	if ((defined(__GNUC__) && !defined(__clang__)) && (__GNUC__ < 5))     \
+	    || defined(__clang__)
+#		define JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS \
+			JEMALLOC_DIAGNOSTIC_IGNORE(                                  \
+			    "-Wmissing-field-initializers")
+#	else
+#		define JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
+#	endif
 
-#  define JEMALLOC_DIAGNOSTIC_IGNORE_FRAME_ADDRESS  \
-     JEMALLOC_DIAGNOSTIC_IGNORE("-Wframe-address")
-#  define JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS  \
-     JEMALLOC_DIAGNOSTIC_IGNORE("-Wtype-limits")
-#  define JEMALLOC_DIAGNOSTIC_IGNORE_UNUSED_PARAMETER \
-     JEMALLOC_DIAGNOSTIC_IGNORE("-Wunused-parameter")
-#  if defined(__GNUC__) && !defined(__clang__) && (__GNUC__ >= 7)
-#    define JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN \
-       JEMALLOC_DIAGNOSTIC_IGNORE("-Walloc-size-larger-than=")
-#  else
-#    define JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
-#  endif
-#  ifdef JEMALLOC_HAVE_ATTR_DEPRECATED
-#    define JEMALLOC_DIAGNOSTIC_IGNORE_DEPRECATED \
-       JEMALLOC_DIAGNOSTIC_IGNORE("-Wdeprecated-declarations")
-#  else
-#    define JEMALLOC_DIAGNOSTIC_IGNORE_DEPRECATED
-#  endif
-#  define JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS \
-  JEMALLOC_DIAGNOSTIC_PUSH \
-  JEMALLOC_DIAGNOSTIC_IGNORE_UNUSED_PARAMETER
+#	define JEMALLOC_DIAGNOSTIC_IGNORE_FRAME_ADDRESS                       \
+		JEMALLOC_DIAGNOSTIC_IGNORE("-Wframe-address")
+#	define JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS                         \
+		JEMALLOC_DIAGNOSTIC_IGNORE("-Wtype-limits")
+#	define JEMALLOC_DIAGNOSTIC_IGNORE_UNUSED_PARAMETER                    \
+		JEMALLOC_DIAGNOSTIC_IGNORE("-Wunused-parameter")
+#	if defined(__GNUC__) && !defined(__clang__) && (__GNUC__ >= 7)
+#		define JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN      \
+			JEMALLOC_DIAGNOSTIC_IGNORE("-Walloc-size-larger-than=")
+#	else
+#		define JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
+#	endif
+#	ifdef JEMALLOC_HAVE_ATTR_DEPRECATED
+#		define JEMALLOC_DIAGNOSTIC_IGNORE_DEPRECATED                  \
+			JEMALLOC_DIAGNOSTIC_IGNORE("-Wdeprecated-declarations")
+#	else
+#		define JEMALLOC_DIAGNOSTIC_IGNORE_DEPRECATED
+#	endif
+#	define JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS                           \
+		JEMALLOC_DIAGNOSTIC_PUSH                                       \
+		JEMALLOC_DIAGNOSTIC_IGNORE_UNUSED_PARAMETER
 #else
-#  define JEMALLOC_DIAGNOSTIC_PUSH
-#  define JEMALLOC_DIAGNOSTIC_POP
-#  define JEMALLOC_DIAGNOSTIC_IGNORE(W)
-#  define JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
-#  define JEMALLOC_DIAGNOSTIC_IGNORE_FRAME_ADDRESS
-#  define JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS
-#  define JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
-#  define JEMALLOC_DIAGNOSTIC_IGNORE_DEPRECATED
-#  define JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
+#	define JEMALLOC_DIAGNOSTIC_PUSH
+#	define JEMALLOC_DIAGNOSTIC_POP
+#	define JEMALLOC_DIAGNOSTIC_IGNORE(W)
+#	define JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
+#	define JEMALLOC_DIAGNOSTIC_IGNORE_FRAME_ADDRESS
+#	define JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS
+#	define JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
+#	define JEMALLOC_DIAGNOSTIC_IGNORE_DEPRECATED
+#	define JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
 #endif
 
 #ifdef __clang_analyzer__
-#  define JEMALLOC_CLANG_ANALYZER
+#	define JEMALLOC_CLANG_ANALYZER
 #endif
 
 #ifdef JEMALLOC_CLANG_ANALYZER
-#  define JEMALLOC_CLANG_ANALYZER_SUPPRESS __attribute__((suppress))
-#  define JEMALLOC_CLANG_ANALYZER_SILENCE_INIT(v) = v
+#	define JEMALLOC_CLANG_ANALYZER_SUPPRESS __attribute__((suppress))
+#	define JEMALLOC_CLANG_ANALYZER_SILENCE_INIT(v) = v
 #else
-#  define JEMALLOC_CLANG_ANALYZER_SUPPRESS
-#  define JEMALLOC_CLANG_ANALYZER_SILENCE_INIT(v)
+#	define JEMALLOC_CLANG_ANALYZER_SUPPRESS
+#	define JEMALLOC_CLANG_ANALYZER_SILENCE_INIT(v)
 #endif
 
-#define JEMALLOC_SUPPRESS_WARN_ON_USAGE(...) \
-   JEMALLOC_DIAGNOSTIC_PUSH \
-   JEMALLOC_DIAGNOSTIC_IGNORE_DEPRECATED \
-   __VA_ARGS__ \
-   JEMALLOC_DIAGNOSTIC_POP
+#define JEMALLOC_SUPPRESS_WARN_ON_USAGE(...)                                   \
+	JEMALLOC_DIAGNOSTIC_PUSH                                               \
+	JEMALLOC_DIAGNOSTIC_IGNORE_DEPRECATED                                  \
+	__VA_ARGS__                                                            \
+	JEMALLOC_DIAGNOSTIC_POP
 
 /*
  * Disables spurious diagnostics for all headers.  Since these headers are not
diff --git a/include/jemalloc/internal/jemalloc_internal_overrides.h b/include/jemalloc/internal/jemalloc_internal_overrides.h
index 5fbbe249..bf74a612 100644
--- a/include/jemalloc/internal/jemalloc_internal_overrides.h
+++ b/include/jemalloc/internal/jemalloc_internal_overrides.h
@@ -9,13 +9,14 @@
  */
 
 #ifdef JEMALLOC_OVERRIDE_LG_PAGE
-    #undef LG_PAGE
-    #define LG_PAGE JEMALLOC_OVERRIDE_LG_PAGE
+#	undef LG_PAGE
+#	define LG_PAGE JEMALLOC_OVERRIDE_LG_PAGE
 #endif
 
 #ifdef JEMALLOC_OVERRIDE_JEMALLOC_CONFIG_MALLOC_CONF
-	#undef JEMALLOC_CONFIG_MALLOC_CONF
-	#define JEMALLOC_CONFIG_MALLOC_CONF JEMALLOC_OVERRIDE_JEMALLOC_CONFIG_MALLOC_CONF
+#	undef JEMALLOC_CONFIG_MALLOC_CONF
+#	define JEMALLOC_CONFIG_MALLOC_CONF                                    \
+		JEMALLOC_OVERRIDE_JEMALLOC_CONFIG_MALLOC_CONF
 #endif
 
 #endif /* JEMALLOC_INTERNAL_OVERRIDES_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_types.h b/include/jemalloc/internal/jemalloc_internal_types.h
index cddbfb65..0ade5461 100644
--- a/include/jemalloc/internal/jemalloc_internal_types.h
+++ b/include/jemalloc/internal/jemalloc_internal_types.h
@@ -18,13 +18,13 @@ enum zero_realloc_action_e {
 typedef enum zero_realloc_action_e zero_realloc_action_t;
 
 /* Signature of write callback. */
-typedef void (write_cb_t)(void *, const char *);
+typedef void(write_cb_t)(void *, const char *);
 
 enum malloc_init_e {
-	malloc_init_uninitialized	= 3,
-	malloc_init_a0_initialized	= 2,
-	malloc_init_recursible		= 1,
-	malloc_init_initialized		= 0 /* Common case --> jnz. */
+	malloc_init_uninitialized = 3,
+	malloc_init_a0_initialized = 2,
+	malloc_init_recursible = 1,
+	malloc_init_initialized = 0 /* Common case --> jnz. */
 };
 typedef enum malloc_init_e malloc_init_t;
 
@@ -39,48 +39,46 @@ typedef enum malloc_init_e malloc_init_t;
  *
  * aaaaaaaa aaaatttt tttttttt 0znnnnnn
  */
-#define MALLOCX_ARENA_BITS	12
-#define MALLOCX_TCACHE_BITS	12
-#define MALLOCX_LG_ALIGN_BITS	6
-#define MALLOCX_ARENA_SHIFT	20
-#define MALLOCX_TCACHE_SHIFT	8
-#define MALLOCX_ARENA_MASK \
-    ((unsigned)(((1U << MALLOCX_ARENA_BITS) - 1) << MALLOCX_ARENA_SHIFT))
+#define MALLOCX_ARENA_BITS 12
+#define MALLOCX_TCACHE_BITS 12
+#define MALLOCX_LG_ALIGN_BITS 6
+#define MALLOCX_ARENA_SHIFT 20
+#define MALLOCX_TCACHE_SHIFT 8
+#define MALLOCX_ARENA_MASK                                                     \
+	((unsigned)(((1U << MALLOCX_ARENA_BITS) - 1) << MALLOCX_ARENA_SHIFT))
 /* NB: Arena index bias decreases the maximum number of arenas by 1. */
-#define MALLOCX_ARENA_LIMIT	((unsigned)((1U << MALLOCX_ARENA_BITS) - 1))
-#define MALLOCX_TCACHE_MASK \
-    ((unsigned)(((1U << MALLOCX_TCACHE_BITS) - 1) << MALLOCX_TCACHE_SHIFT))
-#define MALLOCX_TCACHE_MAX	((unsigned)((1U << MALLOCX_TCACHE_BITS) - 3))
-#define MALLOCX_LG_ALIGN_MASK	((1 << MALLOCX_LG_ALIGN_BITS) - 1)
+#define MALLOCX_ARENA_LIMIT ((unsigned)((1U << MALLOCX_ARENA_BITS) - 1))
+#define MALLOCX_TCACHE_MASK                                                    \
+	((unsigned)(((1U << MALLOCX_TCACHE_BITS) - 1) << MALLOCX_TCACHE_SHIFT))
+#define MALLOCX_TCACHE_MAX ((unsigned)((1U << MALLOCX_TCACHE_BITS) - 3))
+#define MALLOCX_LG_ALIGN_MASK ((1 << MALLOCX_LG_ALIGN_BITS) - 1)
 /* Use MALLOCX_ALIGN_GET() if alignment may not be specified in flags. */
-#define MALLOCX_ALIGN_GET_SPECIFIED(flags)				\
-    (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK))
-#define MALLOCX_ALIGN_GET(flags)					\
-    (MALLOCX_ALIGN_GET_SPECIFIED(flags) & (SIZE_T_MAX-1))
-#define MALLOCX_ZERO_GET(flags)						\
-    ((bool)(flags & MALLOCX_ZERO))
+#define MALLOCX_ALIGN_GET_SPECIFIED(flags)                                     \
+	(ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK))
+#define MALLOCX_ALIGN_GET(flags)                                               \
+	(MALLOCX_ALIGN_GET_SPECIFIED(flags) & (SIZE_T_MAX - 1))
+#define MALLOCX_ZERO_GET(flags) ((bool)(flags & MALLOCX_ZERO))
 
-#define MALLOCX_TCACHE_GET(flags)					\
-    (((unsigned)((flags & MALLOCX_TCACHE_MASK) >> MALLOCX_TCACHE_SHIFT)) - 2)
-#define MALLOCX_ARENA_GET(flags)					\
-    (((unsigned)(((unsigned)flags) >> MALLOCX_ARENA_SHIFT)) - 1)
+#define MALLOCX_TCACHE_GET(flags)                                              \
+	(((unsigned)((flags & MALLOCX_TCACHE_MASK) >> MALLOCX_TCACHE_SHIFT))   \
+	    - 2)
+#define MALLOCX_ARENA_GET(flags)                                               \
+	(((unsigned)(((unsigned)flags) >> MALLOCX_ARENA_SHIFT)) - 1)
 
 /* Smallest size class to support. */
-#define TINY_MIN		(1U << LG_TINY_MIN)
+#define TINY_MIN (1U << LG_TINY_MIN)
 
-#define LONG			((size_t)(1U << LG_SIZEOF_LONG))
-#define LONG_MASK		(LONG - 1)
+#define LONG ((size_t)(1U << LG_SIZEOF_LONG))
+#define LONG_MASK (LONG - 1)
 
 /* Return the smallest long multiple that is >= a. */
-#define LONG_CEILING(a)							\
-	(((a) + LONG_MASK) & ~LONG_MASK)
+#define LONG_CEILING(a) (((a) + LONG_MASK) & ~LONG_MASK)
 
-#define SIZEOF_PTR		(1U << LG_SIZEOF_PTR)
-#define PTR_MASK		(SIZEOF_PTR - 1)
+#define SIZEOF_PTR (1U << LG_SIZEOF_PTR)
+#define PTR_MASK (SIZEOF_PTR - 1)
 
 /* Return the smallest (void *) multiple that is >= a. */
-#define PTR_CEILING(a)							\
-	(((a) + PTR_MASK) & ~PTR_MASK)
+#define PTR_CEILING(a) (((a) + PTR_MASK) & ~PTR_MASK)
 
 /*
  * Maximum size of L1 cache line.  This is used to avoid cache line aliasing.
@@ -89,25 +87,24 @@ typedef enum malloc_init_e malloc_init_t;
  * CACHELINE cannot be based on LG_CACHELINE because __declspec(align()) can
  * only handle raw constants.
  */
-#define LG_CACHELINE		6
-#define CACHELINE		64
-#define CACHELINE_MASK		(CACHELINE - 1)
+#define LG_CACHELINE 6
+#define CACHELINE 64
+#define CACHELINE_MASK (CACHELINE - 1)
 
 /* Return the smallest cacheline multiple that is >= s. */
-#define CACHELINE_CEILING(s)						\
-	(((s) + CACHELINE_MASK) & ~CACHELINE_MASK)
+#define CACHELINE_CEILING(s) (((s) + CACHELINE_MASK) & ~CACHELINE_MASK)
 
 /* Return the nearest aligned address at or below a. */
-#define ALIGNMENT_ADDR2BASE(a, alignment)				\
-	((void *)(((byte_t *)(a)) - (((uintptr_t)(a)) -			\
-	    ((uintptr_t)(a) & ((~(alignment)) + 1)))))
+#define ALIGNMENT_ADDR2BASE(a, alignment)                                      \
+	((void *)(((byte_t *)(a))                                              \
+	    - (((uintptr_t)(a)) - ((uintptr_t)(a) & ((~(alignment)) + 1)))))
 
 /* Return the offset between a and the nearest aligned address at or below a. */
-#define ALIGNMENT_ADDR2OFFSET(a, alignment)				\
+#define ALIGNMENT_ADDR2OFFSET(a, alignment)                                    \
 	((size_t)((uintptr_t)(a) & (alignment - 1)))
 
 /* Return the smallest alignment multiple that is >= s. */
-#define ALIGNMENT_CEILING(s, alignment)					\
+#define ALIGNMENT_CEILING(s, alignment)                                        \
 	(((s) + (alignment - 1)) & ((~(alignment)) + 1))
 
 /*
@@ -119,30 +116,31 @@ typedef enum malloc_init_e malloc_init_t;
  * provenance from the compiler. See the block-comment on the
  * definition of `byte_t` for more details.
  */
-#define ALIGNMENT_ADDR2CEILING(a, alignment)				\
-	((void *)(((byte_t *)(a)) + (((((uintptr_t)(a)) +		\
-	    (alignment - 1)) & ((~(alignment)) + 1)) - ((uintptr_t)(a)))))
+#define ALIGNMENT_ADDR2CEILING(a, alignment)                                   \
+	((void *)(((byte_t *)(a))                                              \
+	    + (((((uintptr_t)(a)) + (alignment - 1)) & ((~(alignment)) + 1))   \
+	        - ((uintptr_t)(a)))))
 
 /* Declare a variable-length array. */
 #if __STDC_VERSION__ < 199901L || defined(__STDC_NO_VLA__)
-#  ifdef _MSC_VER
-#    include <malloc.h>
-#    define alloca _alloca
-#  else
-#    ifdef JEMALLOC_HAS_ALLOCA_H
-#      include <alloca.h>
-#    else
-#      include <stdlib.h>
-#    endif
-#  endif
-#  define VARIABLE_ARRAY_UNSAFE(type, name, count) \
-	type *name = alloca(sizeof(type) * (count))
+#	ifdef _MSC_VER
+#		include <malloc.h>
+#		define alloca _alloca
+#	else
+#		ifdef JEMALLOC_HAS_ALLOCA_H
+#			include <alloca.h>
+#		else
+#			include <stdlib.h>
+#		endif
+#	endif
+#	define VARIABLE_ARRAY_UNSAFE(type, name, count)                       \
+		type *name = alloca(sizeof(type) * (count))
 #else
-#  define VARIABLE_ARRAY_UNSAFE(type, name, count) type name[(count)]
+#	define VARIABLE_ARRAY_UNSAFE(type, name, count) type name[(count)]
 #endif
-#define VARIABLE_ARRAY_SIZE_MAX	2048
-#define VARIABLE_ARRAY(type, name, count)	\
-	assert(sizeof(type) * (count) <= VARIABLE_ARRAY_SIZE_MAX);	\
+#define VARIABLE_ARRAY_SIZE_MAX 2048
+#define VARIABLE_ARRAY(type, name, count)                                      \
+	assert(sizeof(type) * (count) <= VARIABLE_ARRAY_SIZE_MAX);             \
 	VARIABLE_ARRAY_UNSAFE(type, name, count)
 
 #define CALLOC_MADVISE_THRESHOLD_DEFAULT (((size_t)1) << 23) /* 8 MB */
diff --git a/include/jemalloc/internal/large_externs.h b/include/jemalloc/internal/large_externs.h
index ce9c8689..7cee6752 100644
--- a/include/jemalloc/internal/large_externs.h
+++ b/include/jemalloc/internal/large_externs.h
@@ -6,20 +6,20 @@
 #include "jemalloc/internal/hook.h"
 
 void *large_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero);
-void *large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
-    bool zero);
-bool large_ralloc_no_move(tsdn_t *tsdn, edata_t *edata, size_t usize_min,
-    size_t usize_max, bool zero);
+void *large_palloc(
+    tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, bool zero);
+bool  large_ralloc_no_move(tsdn_t *tsdn, edata_t *edata, size_t usize_min,
+     size_t usize_max, bool zero);
 void *large_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t usize,
     size_t alignment, bool zero, tcache_t *tcache,
     hook_ralloc_args_t *hook_args);
 
-void large_dalloc_prep_locked(tsdn_t *tsdn, edata_t *edata);
-void large_dalloc_finish(tsdn_t *tsdn, edata_t *edata);
-void large_dalloc(tsdn_t *tsdn, edata_t *edata);
+void   large_dalloc_prep_locked(tsdn_t *tsdn, edata_t *edata);
+void   large_dalloc_finish(tsdn_t *tsdn, edata_t *edata);
+void   large_dalloc(tsdn_t *tsdn, edata_t *edata);
 size_t large_salloc(tsdn_t *tsdn, const edata_t *edata);
-void large_prof_info_get(tsd_t *tsd, edata_t *edata, prof_info_t *prof_info,
-    bool reset_recent);
+void   large_prof_info_get(
+      tsd_t *tsd, edata_t *edata, prof_info_t *prof_info, bool reset_recent);
 void large_prof_tctx_reset(edata_t *edata);
 void large_prof_info_set(edata_t *edata, prof_tctx_t *tctx, size_t size);
 
diff --git a/include/jemalloc/internal/lockedint.h b/include/jemalloc/internal/lockedint.h
index 062dedbf..46aba8ff 100644
--- a/include/jemalloc/internal/lockedint.h
+++ b/include/jemalloc/internal/lockedint.h
@@ -30,33 +30,34 @@ struct locked_zu_s {
 };
 
 #ifndef JEMALLOC_ATOMIC_U64
-#  define LOCKEDINT_MTX_DECLARE(name) malloc_mutex_t name;
-#  define LOCKEDINT_MTX_INIT(mu, name, rank, rank_mode)			\
-    malloc_mutex_init(&(mu), name, rank, rank_mode)
-#  define LOCKEDINT_MTX(mtx) (&(mtx))
-#  define LOCKEDINT_MTX_LOCK(tsdn, mu) malloc_mutex_lock(tsdn, &(mu))
-#  define LOCKEDINT_MTX_UNLOCK(tsdn, mu) malloc_mutex_unlock(tsdn, &(mu))
-#  define LOCKEDINT_MTX_PREFORK(tsdn, mu) malloc_mutex_prefork(tsdn, &(mu))
-#  define LOCKEDINT_MTX_POSTFORK_PARENT(tsdn, mu)			\
-    malloc_mutex_postfork_parent(tsdn, &(mu))
-#  define LOCKEDINT_MTX_POSTFORK_CHILD(tsdn, mu)			\
-    malloc_mutex_postfork_child(tsdn, &(mu))
+#	define LOCKEDINT_MTX_DECLARE(name) malloc_mutex_t name;
+#	define LOCKEDINT_MTX_INIT(mu, name, rank, rank_mode)                  \
+		malloc_mutex_init(&(mu), name, rank, rank_mode)
+#	define LOCKEDINT_MTX(mtx) (&(mtx))
+#	define LOCKEDINT_MTX_LOCK(tsdn, mu) malloc_mutex_lock(tsdn, &(mu))
+#	define LOCKEDINT_MTX_UNLOCK(tsdn, mu) malloc_mutex_unlock(tsdn, &(mu))
+#	define LOCKEDINT_MTX_PREFORK(tsdn, mu)                                \
+		malloc_mutex_prefork(tsdn, &(mu))
+#	define LOCKEDINT_MTX_POSTFORK_PARENT(tsdn, mu)                        \
+		malloc_mutex_postfork_parent(tsdn, &(mu))
+#	define LOCKEDINT_MTX_POSTFORK_CHILD(tsdn, mu)                         \
+		malloc_mutex_postfork_child(tsdn, &(mu))
 #else
-#  define LOCKEDINT_MTX_DECLARE(name)
-#  define LOCKEDINT_MTX(mtx) NULL
-#  define LOCKEDINT_MTX_INIT(mu, name, rank, rank_mode) false
-#  define LOCKEDINT_MTX_LOCK(tsdn, mu)
-#  define LOCKEDINT_MTX_UNLOCK(tsdn, mu)
-#  define LOCKEDINT_MTX_PREFORK(tsdn, mu)
-#  define LOCKEDINT_MTX_POSTFORK_PARENT(tsdn, mu)
-#  define LOCKEDINT_MTX_POSTFORK_CHILD(tsdn, mu)
+#	define LOCKEDINT_MTX_DECLARE(name)
+#	define LOCKEDINT_MTX(mtx) NULL
+#	define LOCKEDINT_MTX_INIT(mu, name, rank, rank_mode) false
+#	define LOCKEDINT_MTX_LOCK(tsdn, mu)
+#	define LOCKEDINT_MTX_UNLOCK(tsdn, mu)
+#	define LOCKEDINT_MTX_PREFORK(tsdn, mu)
+#	define LOCKEDINT_MTX_POSTFORK_PARENT(tsdn, mu)
+#	define LOCKEDINT_MTX_POSTFORK_CHILD(tsdn, mu)
 #endif
 
 #ifdef JEMALLOC_ATOMIC_U64
-#  define LOCKEDINT_MTX_ASSERT_INTERNAL(tsdn, mtx) assert((mtx) == NULL)
+#	define LOCKEDINT_MTX_ASSERT_INTERNAL(tsdn, mtx) assert((mtx) == NULL)
 #else
-#  define LOCKEDINT_MTX_ASSERT_INTERNAL(tsdn, mtx)			\
-    malloc_mutex_assert_owner(tsdn, (mtx))
+#	define LOCKEDINT_MTX_ASSERT_INTERNAL(tsdn, mtx)                       \
+		malloc_mutex_assert_owner(tsdn, (mtx))
 #endif
 
 static inline uint64_t
@@ -70,8 +71,7 @@ locked_read_u64(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_u64_t *p) {
 }
 
 static inline void
-locked_inc_u64(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_u64_t *p,
-    uint64_t x) {
+locked_inc_u64(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_u64_t *p, uint64_t x) {
 	LOCKEDINT_MTX_ASSERT_INTERNAL(tsdn, mtx);
 #ifdef JEMALLOC_ATOMIC_U64
 	atomic_fetch_add_u64(&p->val, x, ATOMIC_RELAXED);
@@ -81,8 +81,7 @@ locked_inc_u64(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_u64_t *p,
 }
 
 static inline void
-locked_dec_u64(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_u64_t *p,
-    uint64_t x) {
+locked_dec_u64(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_u64_t *p, uint64_t x) {
 	LOCKEDINT_MTX_ASSERT_INTERNAL(tsdn, mtx);
 #ifdef JEMALLOC_ATOMIC_U64
 	uint64_t r = atomic_fetch_sub_u64(&p->val, x, ATOMIC_RELAXED);
@@ -99,7 +98,7 @@ locked_inc_mod_u64(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_u64_t *p,
     const uint64_t x, const uint64_t modulus) {
 	LOCKEDINT_MTX_ASSERT_INTERNAL(tsdn, mtx);
 	uint64_t before, after;
-	bool overflow;
+	bool     overflow;
 #ifdef JEMALLOC_ATOMIC_U64
 	before = atomic_load_u64(&p->val, ATOMIC_RELAXED);
 	do {
@@ -109,8 +108,8 @@ locked_inc_mod_u64(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_u64_t *p,
 		if (overflow) {
 			after %= modulus;
 		}
-	} while (!atomic_compare_exchange_weak_u64(&p->val, &before, after,
-	    ATOMIC_RELAXED, ATOMIC_RELAXED));
+	} while (!atomic_compare_exchange_weak_u64(
+	    &p->val, &before, after, ATOMIC_RELAXED, ATOMIC_RELAXED));
 #else
 	before = p->val;
 	after = before + x;
@@ -167,8 +166,7 @@ locked_read_zu(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_zu_t *p) {
 }
 
 static inline void
-locked_inc_zu(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_zu_t *p,
-    size_t x) {
+locked_inc_zu(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_zu_t *p, size_t x) {
 	LOCKEDINT_MTX_ASSERT_INTERNAL(tsdn, mtx);
 #ifdef JEMALLOC_ATOMIC_U64
 	atomic_fetch_add_zu(&p->val, x, ATOMIC_RELAXED);
@@ -179,8 +177,7 @@ locked_inc_zu(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_zu_t *p,
 }
 
 static inline void
-locked_dec_zu(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_zu_t *p,
-    size_t x) {
+locked_dec_zu(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_zu_t *p, size_t x) {
 	LOCKEDINT_MTX_ASSERT_INTERNAL(tsdn, mtx);
 #ifdef JEMALLOC_ATOMIC_U64
 	size_t r = atomic_fetch_sub_zu(&p->val, x, ATOMIC_RELAXED);
diff --git a/include/jemalloc/internal/log.h b/include/jemalloc/internal/log.h
index 7b074abd..f213beda 100644
--- a/include/jemalloc/internal/log.h
+++ b/include/jemalloc/internal/log.h
@@ -7,9 +7,9 @@
 #include "jemalloc/internal/mutex.h"
 
 #ifdef JEMALLOC_LOG
-#  define JEMALLOC_LOG_VAR_BUFSIZE 1000
+#	define JEMALLOC_LOG_VAR_BUFSIZE 1000
 #else
-#  define JEMALLOC_LOG_VAR_BUFSIZE 1
+#	define JEMALLOC_LOG_VAR_BUFSIZE 1
 #endif
 
 #define JEMALLOC_LOG_BUFSIZE 4096
@@ -36,7 +36,7 @@
  * statements.
  */
 
-extern char log_var_names[JEMALLOC_LOG_VAR_BUFSIZE];
+extern char       log_var_names[JEMALLOC_LOG_VAR_BUFSIZE];
 extern atomic_b_t log_init_done;
 
 typedef struct log_var_s log_var_t;
@@ -45,7 +45,7 @@ struct log_var_s {
 	 * Lowest bit is "inited", second lowest is "enabled".  Putting them in
 	 * a single word lets us avoid any fences on weak architectures.
 	 */
-	atomic_u_t state;
+	atomic_u_t  state;
 	const char *name;
 };
 
@@ -53,7 +53,8 @@ struct log_var_s {
 #define LOG_INITIALIZED_NOT_ENABLED 1U
 #define LOG_ENABLED 2U
 
-#define LOG_VAR_INIT(name_str) {ATOMIC_INIT(LOG_NOT_INITIALIZED), name_str}
+#define LOG_VAR_INIT(name_str)                                                 \
+	{ ATOMIC_INIT(LOG_NOT_INITIALIZED), name_str }
 
 /*
  * Returns the value we should assume for state (which is not necessarily
@@ -63,21 +64,21 @@ struct log_var_s {
 unsigned log_var_update_state(log_var_t *log_var);
 
 /* We factor out the metadata management to allow us to test more easily. */
-#define log_do_begin(log_var)						\
-if (config_log) {							\
-	unsigned log_state = atomic_load_u(&(log_var).state,		\
-	    ATOMIC_RELAXED);						\
-	if (unlikely(log_state == LOG_NOT_INITIALIZED)) {		\
-		log_state = log_var_update_state(&(log_var));		\
-		assert(log_state != LOG_NOT_INITIALIZED);		\
-	}								\
-	if (log_state == LOG_ENABLED) {					\
-		{
-			/* User code executes here. */
-#define log_do_end(log_var)						\
-		}							\
-	}								\
-}
+#define log_do_begin(log_var)                                                  \
+	if (config_log) {                                                      \
+		unsigned log_state = atomic_load_u(                            \
+		    &(log_var).state, ATOMIC_RELAXED);                         \
+		if (unlikely(log_state == LOG_NOT_INITIALIZED)) {              \
+			log_state = log_var_update_state(&(log_var));          \
+			assert(log_state != LOG_NOT_INITIALIZED);              \
+		}                                                              \
+		if (log_state == LOG_ENABLED) {                                \
+			{
+/* User code executes here. */
+#define log_do_end(log_var)                                                    \
+	}                                                                      \
+	}                                                                      \
+	}
 
 /*
  * MSVC has some preprocessor bugs in its expansion of __VA_ARGS__ during
@@ -88,28 +89,29 @@ if (config_log) {							\
  */
 static inline void
 log_impl_varargs(const char *name, ...) {
-	char buf[JEMALLOC_LOG_BUFSIZE];
+	char    buf[JEMALLOC_LOG_BUFSIZE];
 	va_list ap;
 
 	va_start(ap, name);
 	const char *format = va_arg(ap, const char *);
-	size_t dst_offset = 0;
+	size_t      dst_offset = 0;
 	dst_offset += malloc_snprintf(buf, JEMALLOC_LOG_BUFSIZE, "%s: ", name);
-	dst_offset += malloc_vsnprintf(buf + dst_offset,
-	    JEMALLOC_LOG_BUFSIZE - dst_offset, format, ap);
-	malloc_snprintf(buf + dst_offset, JEMALLOC_LOG_BUFSIZE - dst_offset, "\n");
+	dst_offset += malloc_vsnprintf(
+	    buf + dst_offset, JEMALLOC_LOG_BUFSIZE - dst_offset, format, ap);
+	malloc_snprintf(
+	    buf + dst_offset, JEMALLOC_LOG_BUFSIZE - dst_offset, "\n");
 	va_end(ap);
 
 	malloc_write(buf);
 }
 
 /* Call as log("log.var.str", "format_string %d", arg_for_format_string); */
-#define LOG(log_var_str, ...)						\
-do {									\
-	static log_var_t log_var = LOG_VAR_INIT(log_var_str);		\
-	log_do_begin(log_var)						\
-		log_impl_varargs((log_var).name, __VA_ARGS__);		\
-	log_do_end(log_var)						\
-} while (0)
+#define LOG(log_var_str, ...)                                                  \
+	do {                                                                   \
+		static log_var_t log_var = LOG_VAR_INIT(log_var_str);          \
+		log_do_begin(log_var)                                          \
+		    log_impl_varargs((log_var).name, __VA_ARGS__);             \
+		log_do_end(log_var)                                            \
+	} while (0)
 
 #endif /* JEMALLOC_INTERNAL_LOG_H */
diff --git a/include/jemalloc/internal/malloc_io.h b/include/jemalloc/internal/malloc_io.h
index 9c7c6ec2..0f82f678 100644
--- a/include/jemalloc/internal/malloc_io.h
+++ b/include/jemalloc/internal/malloc_io.h
@@ -5,64 +5,63 @@
 #include "jemalloc/internal/jemalloc_internal_types.h"
 
 #ifdef _WIN32
-#  ifdef _WIN64
-#    define FMT64_PREFIX "ll"
-#    define FMTPTR_PREFIX "ll"
-#  else
-#    define FMT64_PREFIX "ll"
-#    define FMTPTR_PREFIX ""
-#  endif
-#  define FMTd32 "d"
-#  define FMTu32 "u"
-#  define FMTx32 "x"
-#  define FMTd64 FMT64_PREFIX "d"
-#  define FMTu64 FMT64_PREFIX "u"
-#  define FMTx64 FMT64_PREFIX "x"
-#  define FMTdPTR FMTPTR_PREFIX "d"
-#  define FMTuPTR FMTPTR_PREFIX "u"
-#  define FMTxPTR FMTPTR_PREFIX "x"
+#	ifdef _WIN64
+#		define FMT64_PREFIX "ll"
+#		define FMTPTR_PREFIX "ll"
+#	else
+#		define FMT64_PREFIX "ll"
+#		define FMTPTR_PREFIX ""
+#	endif
+#	define FMTd32 "d"
+#	define FMTu32 "u"
+#	define FMTx32 "x"
+#	define FMTd64 FMT64_PREFIX "d"
+#	define FMTu64 FMT64_PREFIX "u"
+#	define FMTx64 FMT64_PREFIX "x"
+#	define FMTdPTR FMTPTR_PREFIX "d"
+#	define FMTuPTR FMTPTR_PREFIX "u"
+#	define FMTxPTR FMTPTR_PREFIX "x"
 #else
-#  include <inttypes.h>
-#  define FMTd32 PRId32
-#  define FMTu32 PRIu32
-#  define FMTx32 PRIx32
-#  define FMTd64 PRId64
-#  define FMTu64 PRIu64
-#  define FMTx64 PRIx64
-#  define FMTdPTR PRIdPTR
-#  define FMTuPTR PRIuPTR
-#  define FMTxPTR PRIxPTR
+#	include <inttypes.h>
+#	define FMTd32 PRId32
+#	define FMTu32 PRIu32
+#	define FMTx32 PRIx32
+#	define FMTd64 PRId64
+#	define FMTu64 PRIu64
+#	define FMTx64 PRIx64
+#	define FMTdPTR PRIdPTR
+#	define FMTuPTR PRIuPTR
+#	define FMTxPTR PRIxPTR
 #endif
 
 /* Size of stack-allocated buffer passed to buferror(). */
-#define BUFERROR_BUF		64
+#define BUFERROR_BUF 64
 
 /*
  * Size of stack-allocated buffer used by malloc_{,v,vc}printf().  This must be
  * large enough for all possible uses within jemalloc.
  */
-#define MALLOC_PRINTF_BUFSIZE	4096
+#define MALLOC_PRINTF_BUFSIZE 4096
 
 write_cb_t wrtmessage;
-int buferror(int err, char *buf, size_t buflen);
-uintmax_t malloc_strtoumax(const char *restrict nptr, char **restrict endptr,
-    int base);
+int        buferror(int err, char *buf, size_t buflen);
+uintmax_t  malloc_strtoumax(
+     const char *restrict nptr, char **restrict endptr, int base);
 void malloc_write(const char *s);
 
 /*
  * malloc_vsnprintf() supports a subset of snprintf(3) that avoids floating
  * point math.
  */
-size_t malloc_vsnprintf(char *str, size_t size, const char *format,
-    va_list ap);
+size_t malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap);
 size_t malloc_snprintf(char *str, size_t size, const char *format, ...)
     JEMALLOC_FORMAT_PRINTF(3, 4);
 /*
  * The caller can set write_cb to null to choose to print with the
  * je_malloc_message hook.
  */
-void malloc_vcprintf(write_cb_t *write_cb, void *cbopaque, const char *format,
-    va_list ap);
+void malloc_vcprintf(
+    write_cb_t *write_cb, void *cbopaque, const char *format, va_list ap);
 void malloc_cprintf(write_cb_t *write_cb, void *cbopaque, const char *format,
     ...) JEMALLOC_FORMAT_PRINTF(3, 4);
 void malloc_printf(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2);
@@ -81,10 +80,10 @@ malloc_write_fd_syscall(int fd, const void *buf, size_t count) {
 	long result = syscall(SYS_write, fd, buf, count);
 #else
 	ssize_t result = (ssize_t)write(fd, buf,
-#ifdef _WIN32
+#	ifdef _WIN32
 	    (unsigned int)
-#endif
-	    count);
+#	endif
+	        count);
 #endif
 	return (ssize_t)result;
 }
@@ -110,10 +109,10 @@ malloc_read_fd_syscall(int fd, void *buf, size_t count) {
 	long result = syscall(SYS_read, fd, buf, count);
 #else
 	ssize_t result = read(fd, buf,
-#ifdef _WIN32
+#	ifdef _WIN32
 	    (unsigned int)
-#endif
-	    count);
+#	endif
+	        count);
 #endif
 	return (ssize_t)result;
 }
@@ -122,8 +121,8 @@ static inline ssize_t
 malloc_read_fd(int fd, void *buf, size_t count) {
 	size_t bytes_read = 0;
 	do {
-		ssize_t result = malloc_read_fd_syscall(fd,
-		    &((byte_t *)buf)[bytes_read], count - bytes_read);
+		ssize_t result = malloc_read_fd_syscall(
+		    fd, &((byte_t *)buf)[bytes_read], count - bytes_read);
 		if (result < 0) {
 			return result;
 		} else if (result == 0) {
@@ -134,7 +133,8 @@ malloc_read_fd(int fd, void *buf, size_t count) {
 	return bytes_read;
 }
 
-static inline int malloc_open(const char *path, int flags) {
+static inline int
+malloc_open(const char *path, int flags) {
 	int fd;
 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
 	fd = (int)syscall(SYS_open, path, flags);
@@ -146,7 +146,8 @@ static inline int malloc_open(const char *path, int flags) {
 	return fd;
 }
 
-static inline int malloc_close(int fd) {
+static inline int
+malloc_close(int fd) {
 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close)
 	return (int)syscall(SYS_close, fd);
 #else
@@ -154,11 +155,12 @@ static inline int malloc_close(int fd) {
 #endif
 }
 
-static inline off_t malloc_lseek(int fd, off_t offset, int whence) {
+static inline off_t
+malloc_lseek(int fd, off_t offset, int whence) {
 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_lseek)
-  return (off_t)syscall(SYS_lseek, fd, offset, whence);
+	return (off_t)syscall(SYS_lseek, fd, offset, whence);
 #else
-  return lseek(fd, offset, whence);
+	return lseek(fd, offset, whence);
 #endif
 }
 
diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h
index db2bdf37..943c7928 100644
--- a/include/jemalloc/internal/mutex.h
+++ b/include/jemalloc/internal/mutex.h
@@ -31,7 +31,7 @@ struct malloc_mutex_s {
 			 * avoid prefetching a modified cacheline (for the
 			 * unlocking thread).
 			 */
-			mutex_prof_data_t	prof_data;
+			mutex_prof_data_t prof_data;
 			/*
 			 * Hint flag to avoid exclusive cache line contention
 			 * during spin waiting.  Placed along with prof_data
@@ -39,20 +39,20 @@ struct malloc_mutex_s {
 			 * Modified by the lock owner only (after acquired, and
 			 * before release), and may be read by other threads.
 			 */
-			atomic_b_t		locked;
+			atomic_b_t locked;
 #ifdef _WIN32
-#  if _WIN32_WINNT >= 0x0600
-			SRWLOCK         	lock;
-#  else
-			CRITICAL_SECTION	lock;
-#  endif
+#	if _WIN32_WINNT >= 0x0600
+			SRWLOCK lock;
+#	else
+			CRITICAL_SECTION lock;
+#	endif
 #elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
-			os_unfair_lock		lock;
+			os_unfair_lock lock;
 #elif (defined(JEMALLOC_MUTEX_INIT_CB))
-			pthread_mutex_t		lock;
-			malloc_mutex_t		*postponed_next;
+			pthread_mutex_t lock;
+			malloc_mutex_t *postponed_next;
 #else
-			pthread_mutex_t		lock;
+			pthread_mutex_t lock;
 #endif
 		};
 		/*
@@ -62,82 +62,118 @@ struct malloc_mutex_s {
 		 * memory cost.
 		 */
 #if !defined(JEMALLOC_DEBUG)
-		witness_t			witness;
-		malloc_mutex_lock_order_t	lock_order;
+		witness_t                 witness;
+		malloc_mutex_lock_order_t lock_order;
 #endif
 	};
 
 #if defined(JEMALLOC_DEBUG)
-	witness_t			witness;
-	malloc_mutex_lock_order_t	lock_order;
+	witness_t                 witness;
+	malloc_mutex_lock_order_t lock_order;
 #endif
 };
 
 #ifdef _WIN32
-#  if _WIN32_WINNT >= 0x0600
-#    define MALLOC_MUTEX_LOCK(m)    AcquireSRWLockExclusive(&(m)->lock)
-#    define MALLOC_MUTEX_UNLOCK(m)  ReleaseSRWLockExclusive(&(m)->lock)
-#    define MALLOC_MUTEX_TRYLOCK(m) (!TryAcquireSRWLockExclusive(&(m)->lock))
-#  else
-#    define MALLOC_MUTEX_LOCK(m)    EnterCriticalSection(&(m)->lock)
-#    define MALLOC_MUTEX_UNLOCK(m)  LeaveCriticalSection(&(m)->lock)
-#    define MALLOC_MUTEX_TRYLOCK(m) (!TryEnterCriticalSection(&(m)->lock))
-#  endif
+#	if _WIN32_WINNT >= 0x0600
+#		define MALLOC_MUTEX_LOCK(m) AcquireSRWLockExclusive(&(m)->lock)
+#		define MALLOC_MUTEX_UNLOCK(m)                                 \
+			ReleaseSRWLockExclusive(&(m)->lock)
+#		define MALLOC_MUTEX_TRYLOCK(m)                                \
+			(!TryAcquireSRWLockExclusive(&(m)->lock))
+#	else
+#		define MALLOC_MUTEX_LOCK(m) EnterCriticalSection(&(m)->lock)
+#		define MALLOC_MUTEX_UNLOCK(m) LeaveCriticalSection(&(m)->lock)
+#		define MALLOC_MUTEX_TRYLOCK(m)                                \
+			(!TryEnterCriticalSection(&(m)->lock))
+#	endif
 #elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
-#    define MALLOC_MUTEX_LOCK(m)    os_unfair_lock_lock(&(m)->lock)
-#    define MALLOC_MUTEX_UNLOCK(m)  os_unfair_lock_unlock(&(m)->lock)
-#    define MALLOC_MUTEX_TRYLOCK(m) (!os_unfair_lock_trylock(&(m)->lock))
+#	define MALLOC_MUTEX_LOCK(m) os_unfair_lock_lock(&(m)->lock)
+#	define MALLOC_MUTEX_UNLOCK(m) os_unfair_lock_unlock(&(m)->lock)
+#	define MALLOC_MUTEX_TRYLOCK(m) (!os_unfair_lock_trylock(&(m)->lock))
 #else
-#    define MALLOC_MUTEX_LOCK(m)    pthread_mutex_lock(&(m)->lock)
-#    define MALLOC_MUTEX_UNLOCK(m)  pthread_mutex_unlock(&(m)->lock)
-#    define MALLOC_MUTEX_TRYLOCK(m) (pthread_mutex_trylock(&(m)->lock) != 0)
+#	define MALLOC_MUTEX_LOCK(m) pthread_mutex_lock(&(m)->lock)
+#	define MALLOC_MUTEX_UNLOCK(m) pthread_mutex_unlock(&(m)->lock)
+#	define MALLOC_MUTEX_TRYLOCK(m) (pthread_mutex_trylock(&(m)->lock) != 0)
 #endif
 
-#define LOCK_PROF_DATA_INITIALIZER					\
-    {NSTIME_ZERO_INITIALIZER, NSTIME_ZERO_INITIALIZER, 0, 0, 0,		\
-	    ATOMIC_INIT(0), 0, NULL, 0}
+#define LOCK_PROF_DATA_INITIALIZER                                             \
+	{                                                                      \
+		NSTIME_ZERO_INITIALIZER, NSTIME_ZERO_INITIALIZER, 0, 0, 0,     \
+		    ATOMIC_INIT(0), 0, NULL, 0                                 \
+	}
 
 #ifdef _WIN32
-#  define MALLOC_MUTEX_INITIALIZER
+#	define MALLOC_MUTEX_INITIALIZER
 #elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
-#  if defined(JEMALLOC_DEBUG)
-#    define MALLOC_MUTEX_INITIALIZER					\
-  {{{LOCK_PROF_DATA_INITIALIZER, ATOMIC_INIT(false), OS_UNFAIR_LOCK_INIT}}, \
-         WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT), 0}
-#  else
-#    define MALLOC_MUTEX_INITIALIZER                      \
-  {{{LOCK_PROF_DATA_INITIALIZER, ATOMIC_INIT(false), OS_UNFAIR_LOCK_INIT}},  \
-      WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
-#  endif
+#	if defined(JEMALLOC_DEBUG)
+#		define MALLOC_MUTEX_INITIALIZER                               \
+			{                                                      \
+				{{LOCK_PROF_DATA_INITIALIZER,                  \
+				    ATOMIC_INIT(false), OS_UNFAIR_LOCK_INIT}}, \
+				    WITNESS_INITIALIZER(                       \
+				        "mutex", WITNESS_RANK_OMIT),           \
+				    0                                          \
+			}
+#	else
+#		define MALLOC_MUTEX_INITIALIZER                               \
+			{                                                      \
+				{{LOCK_PROF_DATA_INITIALIZER,                  \
+				    ATOMIC_INIT(false), OS_UNFAIR_LOCK_INIT}}, \
+				    WITNESS_INITIALIZER(                       \
+				        "mutex", WITNESS_RANK_OMIT)            \
+			}
+#	endif
 #elif (defined(JEMALLOC_MUTEX_INIT_CB))
-#  if (defined(JEMALLOC_DEBUG))
-#     define MALLOC_MUTEX_INITIALIZER					\
-      {{{LOCK_PROF_DATA_INITIALIZER, ATOMIC_INIT(false), PTHREAD_MUTEX_INITIALIZER, NULL}},	\
-           WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT), 0}
-#  else
-#     define MALLOC_MUTEX_INITIALIZER					\
-      {{{LOCK_PROF_DATA_INITIALIZER, ATOMIC_INIT(false), PTHREAD_MUTEX_INITIALIZER, NULL}},	\
-           WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
-#  endif
+#	if (defined(JEMALLOC_DEBUG))
+#		define MALLOC_MUTEX_INITIALIZER                               \
+			{                                                      \
+				{{LOCK_PROF_DATA_INITIALIZER,                  \
+				    ATOMIC_INIT(false),                        \
+				    PTHREAD_MUTEX_INITIALIZER, NULL}},         \
+				    WITNESS_INITIALIZER(                       \
+				        "mutex", WITNESS_RANK_OMIT),           \
+				    0                                          \
+			}
+#	else
+#		define MALLOC_MUTEX_INITIALIZER                               \
+			{                                                      \
+				{{LOCK_PROF_DATA_INITIALIZER,                  \
+				    ATOMIC_INIT(false),                        \
+				    PTHREAD_MUTEX_INITIALIZER, NULL}},         \
+				    WITNESS_INITIALIZER(                       \
+				        "mutex", WITNESS_RANK_OMIT)            \
+			}
+#	endif
 
 #else
-#    define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT
-#  if defined(JEMALLOC_DEBUG)
-#    define MALLOC_MUTEX_INITIALIZER					\
-     {{{LOCK_PROF_DATA_INITIALIZER, ATOMIC_INIT(false), PTHREAD_MUTEX_INITIALIZER}}, \
-           WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT), 0}
-#  else
-#    define MALLOC_MUTEX_INITIALIZER                          \
-     {{{LOCK_PROF_DATA_INITIALIZER, ATOMIC_INIT(false), PTHREAD_MUTEX_INITIALIZER}},	\
-      WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
-#  endif
+#	define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT
+#	if defined(JEMALLOC_DEBUG)
+#		define MALLOC_MUTEX_INITIALIZER                               \
+			{                                                      \
+				{{LOCK_PROF_DATA_INITIALIZER,                  \
+				    ATOMIC_INIT(false),                        \
+				    PTHREAD_MUTEX_INITIALIZER}},               \
+				    WITNESS_INITIALIZER(                       \
+				        "mutex", WITNESS_RANK_OMIT),           \
+				    0                                          \
+			}
+#	else
+#		define MALLOC_MUTEX_INITIALIZER                               \
+			{                                                      \
+				{{LOCK_PROF_DATA_INITIALIZER,                  \
+				    ATOMIC_INIT(false),                        \
+				    PTHREAD_MUTEX_INITIALIZER}},               \
+				    WITNESS_INITIALIZER(                       \
+				        "mutex", WITNESS_RANK_OMIT)            \
+			}
+#	endif
 #endif
 
 #ifdef JEMALLOC_LAZY_LOCK
 extern bool isthreaded;
 #else
-#  undef isthreaded /* Undo private_namespace.h definition. */
-#  define isthreaded true
+#	undef isthreaded /* Undo private_namespace.h definition. */
+#	define isthreaded true
 #endif
 
 bool malloc_mutex_init(malloc_mutex_t *mutex, const char *name,
@@ -214,12 +250,12 @@ malloc_mutex_prof_merge(mutex_prof_data_t *sum, mutex_prof_data_t *data) {
 	if (sum->max_n_thds < data->max_n_thds) {
 		sum->max_n_thds = data->max_n_thds;
 	}
-	uint32_t cur_n_waiting_thds = atomic_load_u32(&sum->n_waiting_thds,
-	    ATOMIC_RELAXED);
-	uint32_t new_n_waiting_thds = cur_n_waiting_thds + atomic_load_u32(
-	    &data->n_waiting_thds, ATOMIC_RELAXED);
-	atomic_store_u32(&sum->n_waiting_thds, new_n_waiting_thds,
-	    ATOMIC_RELAXED);
+	uint32_t cur_n_waiting_thds = atomic_load_u32(
+	    &sum->n_waiting_thds, ATOMIC_RELAXED);
+	uint32_t new_n_waiting_thds = cur_n_waiting_thds
+	    + atomic_load_u32(&data->n_waiting_thds, ATOMIC_RELAXED);
+	atomic_store_u32(
+	    &sum->n_waiting_thds, new_n_waiting_thds, ATOMIC_RELAXED);
 	sum->n_owner_switches += data->n_owner_switches;
 	sum->n_lock_ops += data->n_lock_ops;
 }
@@ -274,16 +310,16 @@ malloc_mutex_prof_copy(mutex_prof_data_t *dst, mutex_prof_data_t *source) {
 
 /* Copy the prof data from mutex for processing. */
 static inline void
-malloc_mutex_prof_read(tsdn_t *tsdn, mutex_prof_data_t *data,
-    malloc_mutex_t *mutex) {
+malloc_mutex_prof_read(
+    tsdn_t *tsdn, mutex_prof_data_t *data, malloc_mutex_t *mutex) {
 	/* Can only read holding the mutex. */
 	malloc_mutex_assert_owner(tsdn, mutex);
 	malloc_mutex_prof_copy(data, &mutex->prof_data);
 }
 
 static inline void
-malloc_mutex_prof_accum(tsdn_t *tsdn, mutex_prof_data_t *data,
-    malloc_mutex_t *mutex) {
+malloc_mutex_prof_accum(
+    tsdn_t *tsdn, mutex_prof_data_t *data, malloc_mutex_t *mutex) {
 	mutex_prof_data_t *source = &mutex->prof_data;
 	/* Can only read holding the mutex. */
 	malloc_mutex_assert_owner(tsdn, mutex);
@@ -305,8 +341,8 @@ malloc_mutex_prof_accum(tsdn_t *tsdn, mutex_prof_data_t *data,
 
 /* Compare the prof data and update to the maximum. */
 static inline void
-malloc_mutex_prof_max_update(tsdn_t *tsdn, mutex_prof_data_t *data,
-    malloc_mutex_t *mutex) {
+malloc_mutex_prof_max_update(
+    tsdn_t *tsdn, mutex_prof_data_t *data, malloc_mutex_t *mutex) {
 	mutex_prof_data_t *source = &mutex->prof_data;
 	/* Can only read holding the mutex. */
 	malloc_mutex_assert_owner(tsdn, mutex);
diff --git a/include/jemalloc/internal/mutex_prof.h b/include/jemalloc/internal/mutex_prof.h
index 14e4340b..572200f3 100644
--- a/include/jemalloc/internal/mutex_prof.h
+++ b/include/jemalloc/internal/mutex_prof.h
@@ -6,76 +6,76 @@
 #include "jemalloc/internal/nstime.h"
 #include "jemalloc/internal/tsd_types.h"
 
-#define MUTEX_PROF_GLOBAL_MUTEXES					\
-    OP(background_thread)						\
-    OP(max_per_bg_thd)							\
-    OP(ctl)								\
-    OP(prof)								\
-    OP(prof_thds_data)							\
-    OP(prof_dump)							\
-    OP(prof_recent_alloc)						\
-    OP(prof_recent_dump)						\
-    OP(prof_stats)
+#define MUTEX_PROF_GLOBAL_MUTEXES                                              \
+	OP(background_thread)                                                  \
+	OP(max_per_bg_thd)                                                     \
+	OP(ctl)                                                                \
+	OP(prof)                                                               \
+	OP(prof_thds_data)                                                     \
+	OP(prof_dump)                                                          \
+	OP(prof_recent_alloc)                                                  \
+	OP(prof_recent_dump)                                                   \
+	OP(prof_stats)
 
 typedef enum {
 #define OP(mtx) global_prof_mutex_##mtx,
 	MUTEX_PROF_GLOBAL_MUTEXES
 #undef OP
-	mutex_prof_num_global_mutexes
+	    mutex_prof_num_global_mutexes
 } mutex_prof_global_ind_t;
 
-#define MUTEX_PROF_ARENA_MUTEXES					\
-    OP(large)								\
-    OP(extent_avail)							\
-    OP(extents_dirty)							\
-    OP(extents_muzzy)							\
-    OP(extents_retained)						\
-    OP(decay_dirty)							\
-    OP(decay_muzzy)							\
-    OP(base)								\
-    OP(tcache_list)							\
-    OP(hpa_shard)							\
-    OP(hpa_shard_grow)							\
-    OP(hpa_sec)
+#define MUTEX_PROF_ARENA_MUTEXES                                               \
+	OP(large)                                                              \
+	OP(extent_avail)                                                       \
+	OP(extents_dirty)                                                      \
+	OP(extents_muzzy)                                                      \
+	OP(extents_retained)                                                   \
+	OP(decay_dirty)                                                        \
+	OP(decay_muzzy)                                                        \
+	OP(base)                                                               \
+	OP(tcache_list)                                                        \
+	OP(hpa_shard)                                                          \
+	OP(hpa_shard_grow)                                                     \
+	OP(hpa_sec)
 
 typedef enum {
 #define OP(mtx) arena_prof_mutex_##mtx,
 	MUTEX_PROF_ARENA_MUTEXES
 #undef OP
-	mutex_prof_num_arena_mutexes
+	    mutex_prof_num_arena_mutexes
 } mutex_prof_arena_ind_t;
 
 /*
  * The forth parameter is a boolean value that is true for derived rate counters
  * and false for real ones.
  */
-#define MUTEX_PROF_UINT64_COUNTERS					\
-    OP(num_ops, uint64_t, "n_lock_ops", false, num_ops)					\
-    OP(num_ops_ps, uint64_t, "(#/sec)", true, num_ops)				\
-    OP(num_wait, uint64_t, "n_waiting", false, num_wait)				\
-    OP(num_wait_ps, uint64_t, "(#/sec)", true, num_wait)				\
-    OP(num_spin_acq, uint64_t, "n_spin_acq", false, num_spin_acq)			\
-    OP(num_spin_acq_ps, uint64_t, "(#/sec)", true, num_spin_acq)			\
-    OP(num_owner_switch, uint64_t, "n_owner_switch", false, num_owner_switch)		\
-    OP(num_owner_switch_ps, uint64_t, "(#/sec)", true, num_owner_switch)	\
-    OP(total_wait_time, uint64_t, "total_wait_ns", false, total_wait_time)		\
-    OP(total_wait_time_ps, uint64_t, "(#/sec)", true, total_wait_time)		\
-    OP(max_wait_time, uint64_t, "max_wait_ns", false, max_wait_time)
+#define MUTEX_PROF_UINT64_COUNTERS                                             \
+	OP(num_ops, uint64_t, "n_lock_ops", false, num_ops)                    \
+	OP(num_ops_ps, uint64_t, "(#/sec)", true, num_ops)                     \
+	OP(num_wait, uint64_t, "n_waiting", false, num_wait)                   \
+	OP(num_wait_ps, uint64_t, "(#/sec)", true, num_wait)                   \
+	OP(num_spin_acq, uint64_t, "n_spin_acq", false, num_spin_acq)          \
+	OP(num_spin_acq_ps, uint64_t, "(#/sec)", true, num_spin_acq)           \
+	OP(num_owner_switch, uint64_t, "n_owner_switch", false,                \
+	    num_owner_switch)                                                  \
+	OP(num_owner_switch_ps, uint64_t, "(#/sec)", true, num_owner_switch)   \
+	OP(total_wait_time, uint64_t, "total_wait_ns", false, total_wait_time) \
+	OP(total_wait_time_ps, uint64_t, "(#/sec)", true, total_wait_time)     \
+	OP(max_wait_time, uint64_t, "max_wait_ns", false, max_wait_time)
 
-#define MUTEX_PROF_UINT32_COUNTERS					\
-    OP(max_num_thds, uint32_t, "max_n_thds", false, max_num_thds)
+#define MUTEX_PROF_UINT32_COUNTERS                                             \
+	OP(max_num_thds, uint32_t, "max_n_thds", false, max_num_thds)
 
-#define MUTEX_PROF_COUNTERS						\
-		MUTEX_PROF_UINT64_COUNTERS				\
-		MUTEX_PROF_UINT32_COUNTERS
+#define MUTEX_PROF_COUNTERS                                                    \
+	MUTEX_PROF_UINT64_COUNTERS                                             \
+	MUTEX_PROF_UINT32_COUNTERS
 
 #define OP(counter, type, human, derived, base_counter) mutex_counter_##counter,
 
-#define COUNTER_ENUM(counter_list, t)					\
-		typedef enum {						\
-			counter_list					\
-			mutex_prof_num_##t##_counters			\
-		} mutex_prof_##t##_counter_ind_t;
+#define COUNTER_ENUM(counter_list, t)                                          \
+	typedef enum {                                                         \
+		counter_list mutex_prof_num_##t##_counters                     \
+	} mutex_prof_##t##_counter_ind_t;
 
 COUNTER_ENUM(MUTEX_PROF_UINT64_COUNTERS, uint64_t)
 COUNTER_ENUM(MUTEX_PROF_UINT32_COUNTERS, uint32_t)
@@ -89,17 +89,17 @@ typedef struct {
 	 * contention.  We update them once we have the lock.
 	 */
 	/* Total time (in nano seconds) spent waiting on this mutex. */
-	nstime_t		tot_wait_time;
+	nstime_t tot_wait_time;
 	/* Max time (in nano seconds) spent on a single lock operation. */
-	nstime_t		max_wait_time;
+	nstime_t max_wait_time;
 	/* # of times have to wait for this mutex (after spinning). */
-	uint64_t		n_wait_times;
+	uint64_t n_wait_times;
 	/* # of times acquired the mutex through local spinning. */
-	uint64_t		n_spin_acquired;
+	uint64_t n_spin_acquired;
 	/* Max # of threads waiting for the mutex at the same time. */
-	uint32_t		max_n_thds;
+	uint32_t max_n_thds;
 	/* Current # of threads waiting on the lock.  Atomic synced. */
-	atomic_u32_t		n_waiting_thds;
+	atomic_u32_t n_waiting_thds;
 
 	/*
 	 * Data touched on the fast path.  These are modified right after we
@@ -108,11 +108,11 @@ typedef struct {
 	 * cacheline.
 	 */
 	/* # of times the mutex holder is different than the previous one. */
-	uint64_t		n_owner_switches;
+	uint64_t n_owner_switches;
 	/* Previous mutex holder, to facilitate n_owner_switches. */
-	tsdn_t			*prev_owner;
+	tsdn_t *prev_owner;
 	/* # of lock() operations in total. */
-	uint64_t		n_lock_ops;
+	uint64_t n_lock_ops;
 } mutex_prof_data_t;
 
 #endif /* JEMALLOC_INTERNAL_MUTEX_PROF_H */
diff --git a/include/jemalloc/internal/nstime.h b/include/jemalloc/internal/nstime.h
index 1f32df58..a10b2de1 100644
--- a/include/jemalloc/internal/nstime.h
+++ b/include/jemalloc/internal/nstime.h
@@ -9,9 +9,11 @@
 
 #define NSTIME_MAGIC ((uint32_t)0xb8a9ce37)
 #ifdef JEMALLOC_DEBUG
-#  define NSTIME_ZERO_INITIALIZER {0, NSTIME_MAGIC}
+#	define NSTIME_ZERO_INITIALIZER                                        \
+		{ 0, NSTIME_MAGIC }
 #else
-#  define NSTIME_ZERO_INITIALIZER {0}
+#	define NSTIME_ZERO_INITIALIZER                                        \
+		{ 0 }
 #endif
 
 typedef struct {
@@ -23,43 +25,40 @@ typedef struct {
 
 static const nstime_t nstime_zero = NSTIME_ZERO_INITIALIZER;
 
-void nstime_init(nstime_t *time, uint64_t ns);
-void nstime_init2(nstime_t *time, uint64_t sec, uint64_t nsec);
+void     nstime_init(nstime_t *time, uint64_t ns);
+void     nstime_init2(nstime_t *time, uint64_t sec, uint64_t nsec);
 uint64_t nstime_ns(const nstime_t *time);
 uint64_t nstime_ms(const nstime_t *time);
 uint64_t nstime_sec(const nstime_t *time);
 uint64_t nstime_nsec(const nstime_t *time);
-void nstime_copy(nstime_t *time, const nstime_t *source);
-int nstime_compare(const nstime_t *a, const nstime_t *b);
-void nstime_add(nstime_t *time, const nstime_t *addend);
-void nstime_iadd(nstime_t *time, uint64_t addend);
-void nstime_subtract(nstime_t *time, const nstime_t *subtrahend);
-void nstime_isubtract(nstime_t *time, uint64_t subtrahend);
-void nstime_imultiply(nstime_t *time, uint64_t multiplier);
-void nstime_idivide(nstime_t *time, uint64_t divisor);
+void     nstime_copy(nstime_t *time, const nstime_t *source);
+int      nstime_compare(const nstime_t *a, const nstime_t *b);
+void     nstime_add(nstime_t *time, const nstime_t *addend);
+void     nstime_iadd(nstime_t *time, uint64_t addend);
+void     nstime_subtract(nstime_t *time, const nstime_t *subtrahend);
+void     nstime_isubtract(nstime_t *time, uint64_t subtrahend);
+void     nstime_imultiply(nstime_t *time, uint64_t multiplier);
+void     nstime_idivide(nstime_t *time, uint64_t divisor);
 uint64_t nstime_divide(const nstime_t *time, const nstime_t *divisor);
 uint64_t nstime_ns_since(const nstime_t *past);
 uint64_t nstime_ms_since(const nstime_t *past);
 
-typedef bool (nstime_monotonic_t)(void);
+typedef bool(nstime_monotonic_t)(void);
 extern nstime_monotonic_t *JET_MUTABLE nstime_monotonic;
 
-typedef void (nstime_update_t)(nstime_t *);
+typedef void(nstime_update_t)(nstime_t *);
 extern nstime_update_t *JET_MUTABLE nstime_update;
 
-typedef void (nstime_prof_update_t)(nstime_t *);
+typedef void(nstime_prof_update_t)(nstime_t *);
 extern nstime_prof_update_t *JET_MUTABLE nstime_prof_update;
 
 void nstime_init_update(nstime_t *time);
 void nstime_prof_init_update(nstime_t *time);
 
-enum prof_time_res_e {
-	prof_time_res_default = 0,
-	prof_time_res_high = 1
-};
+enum prof_time_res_e { prof_time_res_default = 0, prof_time_res_high = 1 };
 typedef enum prof_time_res_e prof_time_res_t;
 
-extern prof_time_res_t opt_prof_time_res;
+extern prof_time_res_t   opt_prof_time_res;
 extern const char *const prof_time_res_mode_names[];
 
 JEMALLOC_ALWAYS_INLINE void
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 75626738..3f2d10b0 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -101,7 +101,7 @@ struct pa_shard_s {
 	 * these configurations to use many fewer arenas, and therefore have a
 	 * higher risk of hot locks.
 	 */
-	sec_t hpa_sec;
+	sec_t       hpa_sec;
 	hpa_shard_t hpa_shard;
 
 	/* The source of edata_t objects. */
@@ -109,7 +109,7 @@ struct pa_shard_s {
 
 	unsigned ind;
 
-	malloc_mutex_t *stats_mtx;
+	malloc_mutex_t   *stats_mtx;
 	pa_shard_stats_t *stats;
 
 	/* The emap this shard is tied to. */
@@ -121,8 +121,8 @@ struct pa_shard_s {
 
 static inline bool
 pa_shard_dont_decay_muzzy(pa_shard_t *shard) {
-	return ecache_npages_get(&shard->pac.ecache_muzzy) == 0 &&
-	    pac_decay_ms_get(&shard->pac, extent_state_muzzy) <= 0;
+	return ecache_npages_get(&shard->pac.ecache_muzzy) == 0
+	    && pac_decay_ms_get(&shard->pac, extent_state_muzzy) <= 0;
 }
 
 static inline ehooks_t *
@@ -186,10 +186,10 @@ bool pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
  * (We could make generated_dirty the return value of course, but this is more
  * consistent with the shrink pathway and our error codes here).
  */
-void pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
-    bool *deferred_work_generated);
-bool pa_decay_ms_set(tsdn_t *tsdn, pa_shard_t *shard, extent_state_t state,
-    ssize_t decay_ms, pac_purge_eagerness_t eagerness);
+void    pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
+       bool *deferred_work_generated);
+bool    pa_decay_ms_set(tsdn_t *tsdn, pa_shard_t *shard, extent_state_t state,
+       ssize_t decay_ms, pac_purge_eagerness_t eagerness);
 ssize_t pa_decay_ms_get(pa_shard_t *shard, extent_state_t state);
 
 /*
@@ -199,10 +199,10 @@ ssize_t pa_decay_ms_get(pa_shard_t *shard, extent_state_t state);
  * though, the arena, background thread, and PAC modules are tightly interwoven
  * in a way that's tricky to extricate, so we only do the HPA-specific parts.
  */
-void pa_shard_set_deferral_allowed(tsdn_t *tsdn, pa_shard_t *shard,
-    bool deferral_allowed);
-void pa_shard_do_deferred_work(tsdn_t *tsdn, pa_shard_t *shard);
-void pa_shard_try_deferred_work(tsdn_t *tsdn, pa_shard_t *shard);
+void pa_shard_set_deferral_allowed(
+    tsdn_t *tsdn, pa_shard_t *shard, bool deferral_allowed);
+void     pa_shard_do_deferred_work(tsdn_t *tsdn, pa_shard_t *shard);
+void     pa_shard_try_deferred_work(tsdn_t *tsdn, pa_shard_t *shard);
 uint64_t pa_shard_time_until_deferred_work(tsdn_t *tsdn, pa_shard_t *shard);
 
 /******************************************************************************/
@@ -228,8 +228,8 @@ size_t pa_shard_nactive(pa_shard_t *shard);
 size_t pa_shard_ndirty(pa_shard_t *shard);
 size_t pa_shard_nmuzzy(pa_shard_t *shard);
 
-void pa_shard_basic_stats_merge(pa_shard_t *shard, size_t *nactive,
-    size_t *ndirty, size_t *nmuzzy);
+void pa_shard_basic_stats_merge(
+    pa_shard_t *shard, size_t *nactive, size_t *ndirty, size_t *nmuzzy);
 
 void pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
     pa_shard_stats_t *pa_shard_stats_out, pac_estats_t *estats_out,
diff --git a/include/jemalloc/internal/pac.h b/include/jemalloc/internal/pac.h
index 243e97f3..a9edc19b 100644
--- a/include/jemalloc/internal/pac.h
+++ b/include/jemalloc/internal/pac.h
@@ -95,12 +95,12 @@ struct pac_s {
 	ecache_t ecache_muzzy;
 	ecache_t ecache_retained;
 
-	base_t *base;
-	emap_t *emap;
+	base_t        *base;
+	emap_t        *emap;
 	edata_cache_t *edata_cache;
 
 	/* The grow info for the retained ecache. */
-	exp_grow_t exp_grow;
+	exp_grow_t     exp_grow;
 	malloc_mutex_t grow_mtx;
 
 	/* Special allocator for guarded frequently reused extents. */
@@ -119,7 +119,7 @@ struct pac_s {
 	decay_t decay_muzzy; /* muzzy --> retained */
 
 	malloc_mutex_t *stats_mtx;
-	pac_stats_t *stats;
+	pac_stats_t    *stats;
 
 	/* Extent serial number generator state. */
 	atomic_zu_t extent_sn_next;
@@ -141,8 +141,8 @@ struct pac_thp_s {
 	bool thp_madvise;
 	/* Below fields are protected by the lock. */
 	malloc_mutex_t lock;
-	bool auto_thp_switched;
-	atomic_u_t n_thp_lazy;
+	bool           auto_thp_switched;
+	atomic_u_t     n_thp_lazy;
 	/*
 	 * List that tracks HUGEPAGE aligned regions that're lazily hugified
 	 * in auto thp mode.
@@ -195,11 +195,11 @@ bool pac_maybe_decay_purge(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
  *
  * Returns true on error (if the new limit is not valid).
  */
-bool pac_retain_grow_limit_get_set(tsdn_t *tsdn, pac_t *pac, size_t *old_limit,
-    size_t *new_limit);
+bool pac_retain_grow_limit_get_set(
+    tsdn_t *tsdn, pac_t *pac, size_t *old_limit, size_t *new_limit);
 
-bool pac_decay_ms_set(tsdn_t *tsdn, pac_t *pac, extent_state_t state,
-    ssize_t decay_ms, pac_purge_eagerness_t eagerness);
+bool    pac_decay_ms_set(tsdn_t *tsdn, pac_t *pac, extent_state_t state,
+       ssize_t decay_ms, pac_purge_eagerness_t eagerness);
 ssize_t pac_decay_ms_get(pac_t *pac, extent_state_t state);
 
 void pac_reset(tsdn_t *tsdn, pac_t *pac);
diff --git a/include/jemalloc/internal/pages.h b/include/jemalloc/internal/pages.h
index 366bc30b..31909934 100644
--- a/include/jemalloc/internal/pages.h
+++ b/include/jemalloc/internal/pages.h
@@ -5,27 +5,24 @@
 #include "jemalloc/internal/jemalloc_internal_types.h"
 
 /* Actual operating system page size, detected during bootstrap, <= PAGE. */
-extern size_t	os_page;
+extern size_t os_page;
 
 /* Page size.  LG_PAGE is determined by the configure script. */
 #ifdef PAGE_MASK
-#  undef PAGE_MASK
+#	undef PAGE_MASK
 #endif
-#define PAGE		((size_t)(1U << LG_PAGE))
-#define PAGE_MASK	((size_t)(PAGE - 1))
+#define PAGE ((size_t)(1U << LG_PAGE))
+#define PAGE_MASK ((size_t)(PAGE - 1))
 /* Return the page base address for the page containing address a. */
-#define PAGE_ADDR2BASE(a)						\
-	ALIGNMENT_ADDR2BASE(a, PAGE)
+#define PAGE_ADDR2BASE(a) ALIGNMENT_ADDR2BASE(a, PAGE)
 /* Return the smallest pagesize multiple that is >= s. */
-#define PAGE_CEILING(s)							\
-	(((s) + PAGE_MASK) & ~PAGE_MASK)
+#define PAGE_CEILING(s) (((s) + PAGE_MASK) & ~PAGE_MASK)
 /* Return the largest pagesize multiple that is <=s. */
-#define PAGE_FLOOR(s) 							\
-	((s) & ~PAGE_MASK)
+#define PAGE_FLOOR(s) ((s) & ~PAGE_MASK)
 
 /* Huge page size.  LG_HUGEPAGE is determined by the configure script. */
-#define HUGEPAGE	((size_t)(1U << LG_HUGEPAGE))
-#define HUGEPAGE_MASK	((size_t)(HUGEPAGE - 1))
+#define HUGEPAGE ((size_t)(1U << LG_HUGEPAGE))
+#define HUGEPAGE_MASK ((size_t)(HUGEPAGE - 1))
 
 /*
  * Used to validate that the hugepage size is not unexpectedly high.  The huge
@@ -36,7 +33,7 @@ extern size_t	os_page;
 #define HUGEPAGE_MAX_EXPECTED_SIZE ((size_t)(16U << 20))
 
 #if LG_HUGEPAGE != 0
-#  define HUGEPAGE_PAGES (HUGEPAGE / PAGE)
+#	define HUGEPAGE_PAGES (HUGEPAGE / PAGE)
 #else
 /*
  * It's convenient to define arrays (or bitmaps) of HUGEPAGE_PAGES lengths.  If
@@ -45,19 +42,17 @@ extern size_t	os_page;
  * that this value is at least 1.  (We won't ever run in this degraded state;
  * hpa_supported() returns false in this case.
  */
-#  define HUGEPAGE_PAGES 1
+#	define HUGEPAGE_PAGES 1
 #endif
 
 /* Return the huge page base address for the huge page containing address a. */
-#define HUGEPAGE_ADDR2BASE(a)						\
-	ALIGNMENT_ADDR2BASE(a, HUGEPAGE)
+#define HUGEPAGE_ADDR2BASE(a) ALIGNMENT_ADDR2BASE(a, HUGEPAGE)
 /* Return the smallest pagesize multiple that is >= s. */
-#define HUGEPAGE_CEILING(s)						\
-	(((s) + HUGEPAGE_MASK) & ~HUGEPAGE_MASK)
+#define HUGEPAGE_CEILING(s) (((s) + HUGEPAGE_MASK) & ~HUGEPAGE_MASK)
 
 /* PAGES_CAN_PURGE_LAZY is defined if lazy purging is supported. */
 #if defined(_WIN32) || defined(JEMALLOC_PURGE_MADVISE_FREE)
-#  define PAGES_CAN_PURGE_LAZY
+#	define PAGES_CAN_PURGE_LAZY
 #endif
 /*
  * PAGES_CAN_PURGE_FORCED is defined if forced purging is supported.
@@ -68,10 +63,11 @@ extern size_t	os_page;
  * next step after purging on Windows anyway, there's no point in adding such
  * complexity.
  */
-#if !defined(_WIN32) && ((defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \
-    defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)) || \
-    defined(JEMALLOC_MAPS_COALESCE))
-#  define PAGES_CAN_PURGE_FORCED
+#if !defined(_WIN32)                                                           \
+    && ((defined(JEMALLOC_PURGE_MADVISE_DONTNEED)                              \
+            && defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS))                 \
+        || defined(JEMALLOC_MAPS_COALESCE))
+#	define PAGES_CAN_PURGE_FORCED
 #endif
 
 static const bool pages_can_purge_lazy =
@@ -90,7 +86,7 @@ static const bool pages_can_purge_forced =
     ;
 
 #if defined(JEMALLOC_HAVE_MADVISE_HUGE) || defined(JEMALLOC_HAVE_MEMCNTL)
-#  define PAGES_CAN_HUGIFY
+#	define PAGES_CAN_HUGIFY
 #endif
 
 static const bool pages_can_hugify =
@@ -102,25 +98,25 @@ static const bool pages_can_hugify =
     ;
 
 typedef enum {
-	thp_mode_default       = 0, /* Do not change hugepage settings. */
-	thp_mode_always        = 1, /* Always set MADV_HUGEPAGE. */
-	thp_mode_never         = 2, /* Always set MADV_NOHUGEPAGE. */
+	thp_mode_default = 0, /* Do not change hugepage settings. */
+	thp_mode_always = 1,  /* Always set MADV_HUGEPAGE. */
+	thp_mode_never = 2,   /* Always set MADV_NOHUGEPAGE. */
 
-	thp_mode_names_limit   = 3, /* Used for option processing. */
-	thp_mode_not_supported = 3  /* No THP support detected. */
+	thp_mode_names_limit = 3,  /* Used for option processing. */
+	thp_mode_not_supported = 3 /* No THP support detected. */
 } thp_mode_t;
 
 #define THP_MODE_DEFAULT thp_mode_default
-extern thp_mode_t opt_thp;
-extern thp_mode_t init_system_thp_mode; /* Initial system wide state. */
+extern thp_mode_t        opt_thp;
+extern thp_mode_t        init_system_thp_mode; /* Initial system wide state. */
 extern const char *const thp_mode_names[];
 
 void *pages_map(void *addr, size_t size, size_t alignment, bool *commit);
-void pages_unmap(void *addr, size_t size);
-bool pages_commit(void *addr, size_t size);
-bool pages_decommit(void *addr, size_t size);
-bool pages_purge_lazy(void *addr, size_t size);
-bool pages_purge_forced(void *addr, size_t size);
+void  pages_unmap(void *addr, size_t size);
+bool  pages_commit(void *addr, size_t size);
+bool  pages_decommit(void *addr, size_t size);
+bool  pages_purge_lazy(void *addr, size_t size);
+bool  pages_purge_forced(void *addr, size_t size);
 bool pages_purge_process_madvise(void *vec, size_t ven_len, size_t total_bytes);
 bool pages_huge(void *addr, size_t size);
 bool pages_nohuge(void *addr, size_t size);
@@ -128,7 +124,7 @@ bool pages_collapse(void *addr, size_t size);
 bool pages_dontdump(void *addr, size_t size);
 bool pages_dodump(void *addr, size_t size);
 bool pages_boot(void);
-void pages_set_thp_state (void *ptr, size_t size);
+void pages_set_thp_state(void *ptr, size_t size);
 void pages_mark_guards(void *head, void *tail);
 void pages_unmark_guards(void *head, void *tail);
 
diff --git a/include/jemalloc/internal/pai.h b/include/jemalloc/internal/pai.h
index 557d30d1..1d924657 100644
--- a/include/jemalloc/internal/pai.h
+++ b/include/jemalloc/internal/pai.h
@@ -41,9 +41,8 @@ struct pai_s {
  */
 
 static inline edata_t *
-pai_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment,
-    bool zero, bool guarded, bool frequent_reuse,
-    bool *deferred_work_generated) {
+pai_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
+    bool guarded, bool frequent_reuse, bool *deferred_work_generated) {
 	return self->alloc(tsdn, self, size, alignment, zero, guarded,
 	    frequent_reuse, deferred_work_generated);
 }
@@ -66,13 +65,13 @@ pai_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
 static inline bool
 pai_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
     size_t new_size, bool *deferred_work_generated) {
-	return self->shrink(tsdn, self, edata, old_size, new_size,
-	    deferred_work_generated);
+	return self->shrink(
+	    tsdn, self, edata, old_size, new_size, deferred_work_generated);
 }
 
 static inline void
-pai_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    bool *deferred_work_generated) {
+pai_dalloc(
+    tsdn_t *tsdn, pai_t *self, edata_t *edata, bool *deferred_work_generated) {
 	self->dalloc(tsdn, self, edata, deferred_work_generated);
 }
 
diff --git a/include/jemalloc/internal/peak.h b/include/jemalloc/internal/peak.h
index 2a973cb8..599f1a02 100644
--- a/include/jemalloc/internal/peak.h
+++ b/include/jemalloc/internal/peak.h
@@ -14,7 +14,8 @@ struct peak_s {
 	uint64_t adjustment;
 };
 
-#define PEAK_INITIALIZER {0, 0}
+#define PEAK_INITIALIZER                                                       \
+	{ 0, 0 }
 
 static inline uint64_t
 peak_max(peak_t *peak) {
diff --git a/include/jemalloc/internal/peak_event.h b/include/jemalloc/internal/peak_event.h
index 1e339ff8..0d1f1627 100644
--- a/include/jemalloc/internal/peak_event.h
+++ b/include/jemalloc/internal/peak_event.h
@@ -20,7 +20,7 @@
 /* Update the peak with current tsd state. */
 void peak_event_update(tsd_t *tsd);
 /* Set current state to zero. */
-void peak_event_zero(tsd_t *tsd);
+void     peak_event_zero(tsd_t *tsd);
 uint64_t peak_event_max(tsd_t *tsd);
 
 extern te_base_cb_t peak_te_handler;
diff --git a/include/jemalloc/internal/ph.h b/include/jemalloc/internal/ph.h
index 05376004..803d2cbd 100644
--- a/include/jemalloc/internal/ph.h
+++ b/include/jemalloc/internal/ph.h
@@ -129,8 +129,7 @@ phn_prev_set(void *phn, void *prev, size_t offset) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-phn_merge_ordered(void *phn0, void *phn1, size_t offset,
-    ph_cmp_t cmp) {
+phn_merge_ordered(void *phn0, void *phn1, size_t offset, ph_cmp_t cmp) {
 	void *phn0child;
 
 	assert(phn0 != NULL);
@@ -361,15 +360,14 @@ ph_insert(ph_t *ph, void *phn, size_t offset, ph_cmp_t cmp) {
 
 	phn_next_set(phn, phn_next_get(ph->root, offset), offset);
 	if (phn_next_get(ph->root, offset) != NULL) {
-		phn_prev_set(phn_next_get(ph->root, offset), phn,
-		    offset);
+		phn_prev_set(phn_next_get(ph->root, offset), phn, offset);
 	}
 	phn_prev_set(phn, ph->root, offset);
 	phn_next_set(ph->root, phn, offset);
 
 	ph->auxcount++;
 	unsigned nmerges = ffs_zu(ph->auxcount);
-	bool done = false;
+	bool     done = false;
 	for (unsigned i = 0; i < nmerges && !done; i++) {
 		done = ph_try_aux_merge_pair(ph, offset, cmp);
 	}
@@ -387,7 +385,6 @@ ph_remove_first(ph_t *ph, size_t offset, ph_cmp_t cmp) {
 	ph->root = ph_merge_children(ph->root, offset, cmp);
 
 	return ret;
-
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -398,11 +395,11 @@ ph_remove(ph_t *ph, void *phn, size_t offset, ph_cmp_t cmp) {
 		return;
 	}
 
-	void* prev = phn_prev_get(phn, offset);
-	void* next = phn_next_get(phn, offset);
+	void *prev = phn_prev_get(phn, offset);
+	void *next = phn_next_get(phn, offset);
 
 	/* If we have children, then we integrate them back in the heap. */
-	void* replace = ph_merge_children(phn, offset, cmp);
+	void *replace = ph_merge_children(phn, offset, cmp);
 	if (replace != NULL) {
 		phn_next_set(replace, next, offset);
 		if (next != NULL) {
@@ -438,16 +435,16 @@ ph_enumerate_vars_init(ph_enumerate_vars_t *vars, uint16_t max_visit_num,
 	 * max_queue_size must be able to support max_visit_num, which means
 	 * the queue will not overflow before reaching max_visit_num.
 	 */
-	assert(vars->max_queue_size >= (vars->max_visit_num + 1)/2);
+	assert(vars->max_queue_size >= (vars->max_visit_num + 1) / 2);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-ph_enumerate_queue_push(void *phn, void **bfs_queue,
-    ph_enumerate_vars_t *vars) {
+ph_enumerate_queue_push(
+    void *phn, void **bfs_queue, ph_enumerate_vars_t *vars) {
 	assert(vars->queue_size < vars->max_queue_size);
 	bfs_queue[vars->rear] = phn;
 	vars->rear = (vars->rear + 1) % vars->max_queue_size;
-	(vars->queue_size) ++;
+	(vars->queue_size)++;
 }
 
 JEMALLOC_ALWAYS_INLINE void *
@@ -456,11 +453,10 @@ ph_enumerate_queue_pop(void **bfs_queue, ph_enumerate_vars_t *vars) {
 	assert(vars->queue_size <= vars->max_queue_size);
 	void *ret = bfs_queue[vars->front];
 	vars->front = (vars->front + 1) % vars->max_queue_size;
-	(vars->queue_size) --;
+	(vars->queue_size)--;
 	return ret;
 }
 
-
 /*
  * The two functions below offer a solution to enumerate the pairing heap.
  * Whe enumerating, always call ph_enumerate_prepare first to prepare the queue
@@ -478,13 +474,13 @@ ph_enumerate_prepare(ph_t *ph, void **bfs_queue, ph_enumerate_vars_t *vars,
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-ph_enumerate_next(ph_t *ph, size_t offset, void **bfs_queue,
-    ph_enumerate_vars_t *vars) {
+ph_enumerate_next(
+    ph_t *ph, size_t offset, void **bfs_queue, ph_enumerate_vars_t *vars) {
 	if (vars->queue_size == 0) {
 		return NULL;
 	}
 
-	(vars->visited_num) ++;
+	(vars->visited_num)++;
 	if (vars->visited_num > vars->max_visit_num) {
 		return NULL;
 	}
@@ -502,109 +498,97 @@ ph_enumerate_next(ph_t *ph, size_t offset, void **bfs_queue,
 	return ret;
 }
 
-#define ph_structs(a_prefix, a_type, a_max_queue_size)			\
-typedef struct {							\
-	phn_link_t link;						\
-} a_prefix##_link_t;							\
-									\
-typedef struct {							\
-	ph_t ph;							\
-} a_prefix##_t;								\
-									\
-typedef struct {							\
-	void *bfs_queue[a_max_queue_size];				\
-	ph_enumerate_vars_t vars;					\
-} a_prefix##_enumerate_helper_t;
-
+#define ph_structs(a_prefix, a_type, a_max_queue_size)                         \
+	typedef struct {                                                       \
+		phn_link_t link;                                               \
+	} a_prefix##_link_t;                                                   \
+                                                                               \
+	typedef struct {                                                       \
+		ph_t ph;                                                       \
+	} a_prefix##_t;                                                        \
+                                                                               \
+	typedef struct {                                                       \
+		void               *bfs_queue[a_max_queue_size];               \
+		ph_enumerate_vars_t vars;                                      \
+	} a_prefix##_enumerate_helper_t;
 
 /*
  * The ph_proto() macro generates function prototypes that correspond to the
  * functions generated by an equivalently parameterized call to ph_gen().
  */
-#define ph_proto(a_attr, a_prefix, a_type)				\
-									\
-a_attr void a_prefix##_new(a_prefix##_t *ph);				\
-a_attr bool a_prefix##_empty(a_prefix##_t *ph);				\
-a_attr a_type *a_prefix##_first(a_prefix##_t *ph);			\
-a_attr a_type *a_prefix##_any(a_prefix##_t *ph);			\
-a_attr void a_prefix##_insert(a_prefix##_t *ph, a_type *phn);		\
-a_attr a_type *a_prefix##_remove_first(a_prefix##_t *ph);		\
-a_attr void a_prefix##_remove(a_prefix##_t *ph, a_type *phn);		\
-a_attr a_type *a_prefix##_remove_any(a_prefix##_t *ph);			\
-a_attr void a_prefix##_enumerate_prepare(a_prefix##_t *ph,		\
-    a_prefix##_enumerate_helper_t *helper, uint16_t max_visit_num,	\
-    uint16_t max_queue_size);						\
-a_attr a_type *a_prefix##_enumerate_next(a_prefix##_t *ph,		\
-    a_prefix##_enumerate_helper_t *helper);
+#define ph_proto(a_attr, a_prefix, a_type)                                     \
+                                                                               \
+	a_attr void    a_prefix##_new(a_prefix##_t *ph);                       \
+	a_attr bool    a_prefix##_empty(a_prefix##_t *ph);                     \
+	a_attr a_type *a_prefix##_first(a_prefix##_t *ph);                     \
+	a_attr a_type *a_prefix##_any(a_prefix##_t *ph);                       \
+	a_attr void    a_prefix##_insert(a_prefix##_t *ph, a_type *phn);       \
+	a_attr a_type *a_prefix##_remove_first(a_prefix##_t *ph);              \
+	a_attr void    a_prefix##_remove(a_prefix##_t *ph, a_type *phn);       \
+	a_attr a_type *a_prefix##_remove_any(a_prefix##_t *ph);                \
+	a_attr void    a_prefix##_enumerate_prepare(a_prefix##_t *ph,          \
+	       a_prefix##_enumerate_helper_t *helper, uint16_t max_visit_num,  \
+	       uint16_t max_queue_size);                                       \
+	a_attr a_type *a_prefix##_enumerate_next(                              \
+	    a_prefix##_t *ph, a_prefix##_enumerate_helper_t *helper);
 
 /* The ph_gen() macro generates a type-specific pairing heap implementation. */
-#define ph_gen(a_attr, a_prefix, a_type, a_field, a_cmp)		\
-JEMALLOC_ALWAYS_INLINE int						\
-a_prefix##_ph_cmp(void *a, void *b) {					\
-	return a_cmp((a_type *)a, (a_type *)b);				\
-}									\
-									\
-a_attr void								\
-a_prefix##_new(a_prefix##_t *ph) {					\
-	ph_new(&ph->ph);						\
-}									\
-									\
-a_attr bool								\
-a_prefix##_empty(a_prefix##_t *ph) {					\
-	return ph_empty(&ph->ph);					\
-}									\
-									\
-a_attr a_type *								\
-a_prefix##_first(a_prefix##_t *ph) {					\
-	return ph_first(&ph->ph, offsetof(a_type, a_field),		\
-	    &a_prefix##_ph_cmp);					\
-}									\
-									\
-a_attr a_type *								\
-a_prefix##_any(a_prefix##_t *ph) {					\
-	return ph_any(&ph->ph, offsetof(a_type, a_field));		\
-}									\
-									\
-a_attr void								\
-a_prefix##_insert(a_prefix##_t *ph, a_type *phn) {			\
-	ph_insert(&ph->ph, phn, offsetof(a_type, a_field),		\
-	    a_prefix##_ph_cmp);						\
-}									\
-									\
-a_attr a_type *								\
-a_prefix##_remove_first(a_prefix##_t *ph) {				\
-	return ph_remove_first(&ph->ph, offsetof(a_type, a_field),	\
-	    a_prefix##_ph_cmp);						\
-}									\
-									\
-a_attr void								\
-a_prefix##_remove(a_prefix##_t *ph, a_type *phn) {			\
-	ph_remove(&ph->ph, phn, offsetof(a_type, a_field),		\
-	    a_prefix##_ph_cmp);						\
-}									\
-									\
-a_attr a_type *								\
-a_prefix##_remove_any(a_prefix##_t *ph) {				\
-	a_type *ret = a_prefix##_any(ph);				\
-	if (ret != NULL) {						\
-		a_prefix##_remove(ph, ret);				\
-	}								\
-	return ret;							\
-}									\
-									\
-a_attr void								\
-a_prefix##_enumerate_prepare(a_prefix##_t *ph,				\
-    a_prefix##_enumerate_helper_t *helper, uint16_t max_visit_num,	\
-    uint16_t max_queue_size) {						\
-	ph_enumerate_prepare(&ph->ph, helper->bfs_queue, &helper->vars,	\
-	    max_visit_num, max_queue_size);				\
-}									\
-									\
-a_attr a_type *								\
-a_prefix##_enumerate_next(a_prefix##_t *ph,				\
-    a_prefix##_enumerate_helper_t *helper) {				\
-	return ph_enumerate_next(&ph->ph, offsetof(a_type, a_field),	\
-	    helper->bfs_queue, &helper->vars);				\
-}
+#define ph_gen(a_attr, a_prefix, a_type, a_field, a_cmp)                       \
+	JEMALLOC_ALWAYS_INLINE int a_prefix##_ph_cmp(void *a, void *b) {       \
+		return a_cmp((a_type *)a, (a_type *)b);                        \
+	}                                                                      \
+                                                                               \
+	a_attr void a_prefix##_new(a_prefix##_t *ph) {                         \
+		ph_new(&ph->ph);                                               \
+	}                                                                      \
+                                                                               \
+	a_attr bool a_prefix##_empty(a_prefix##_t *ph) {                       \
+		return ph_empty(&ph->ph);                                      \
+	}                                                                      \
+                                                                               \
+	a_attr a_type *a_prefix##_first(a_prefix##_t *ph) {                    \
+		return ph_first(                                               \
+		    &ph->ph, offsetof(a_type, a_field), &a_prefix##_ph_cmp);   \
+	}                                                                      \
+                                                                               \
+	a_attr a_type *a_prefix##_any(a_prefix##_t *ph) {                      \
+		return ph_any(&ph->ph, offsetof(a_type, a_field));             \
+	}                                                                      \
+                                                                               \
+	a_attr void a_prefix##_insert(a_prefix##_t *ph, a_type *phn) {         \
+		ph_insert(&ph->ph, phn, offsetof(a_type, a_field),             \
+		    a_prefix##_ph_cmp);                                        \
+	}                                                                      \
+                                                                               \
+	a_attr a_type *a_prefix##_remove_first(a_prefix##_t *ph) {             \
+		return ph_remove_first(                                        \
+		    &ph->ph, offsetof(a_type, a_field), a_prefix##_ph_cmp);    \
+	}                                                                      \
+                                                                               \
+	a_attr void a_prefix##_remove(a_prefix##_t *ph, a_type *phn) {         \
+		ph_remove(&ph->ph, phn, offsetof(a_type, a_field),             \
+		    a_prefix##_ph_cmp);                                        \
+	}                                                                      \
+                                                                               \
+	a_attr a_type *a_prefix##_remove_any(a_prefix##_t *ph) {               \
+		a_type *ret = a_prefix##_any(ph);                              \
+		if (ret != NULL) {                                             \
+			a_prefix##_remove(ph, ret);                            \
+		}                                                              \
+		return ret;                                                    \
+	}                                                                      \
+                                                                               \
+	a_attr void a_prefix##_enumerate_prepare(a_prefix##_t *ph,             \
+	    a_prefix##_enumerate_helper_t *helper, uint16_t max_visit_num,     \
+	    uint16_t max_queue_size) {                                         \
+		ph_enumerate_prepare(&ph->ph, helper->bfs_queue,               \
+		    &helper->vars, max_visit_num, max_queue_size);             \
+	}                                                                      \
+                                                                               \
+	a_attr a_type *a_prefix##_enumerate_next(                              \
+	    a_prefix##_t *ph, a_prefix##_enumerate_helper_t *helper) {         \
+		return ph_enumerate_next(&ph->ph, offsetof(a_type, a_field),   \
+		    helper->bfs_queue, &helper->vars);                         \
+	}
 
 #endif /* JEMALLOC_INTERNAL_PH_H */
diff --git a/include/jemalloc/internal/prng.h b/include/jemalloc/internal/prng.h
index 81060d32..04049519 100644
--- a/include/jemalloc/internal/prng.h
+++ b/include/jemalloc/internal/prng.h
@@ -26,11 +26,11 @@
 /******************************************************************************/
 /* INTERNAL DEFINITIONS -- IGNORE */
 /******************************************************************************/
-#define PRNG_A_32	UINT32_C(1103515241)
-#define PRNG_C_32	UINT32_C(12347)
+#define PRNG_A_32 UINT32_C(1103515241)
+#define PRNG_C_32 UINT32_C(12347)
 
-#define PRNG_A_64	UINT64_C(6364136223846793005)
-#define PRNG_C_64	UINT64_C(1442695040888963407)
+#define PRNG_A_64 UINT64_C(6364136223846793005)
+#define PRNG_C_64 UINT64_C(1442695040888963407)
 
 JEMALLOC_ALWAYS_INLINE uint32_t
 prng_state_next_u32(uint32_t state) {
@@ -49,7 +49,7 @@ prng_state_next_zu(size_t state) {
 #elif LG_SIZEOF_PTR == 3
 	return (state * PRNG_A_64) + PRNG_C_64;
 #else
-#error Unsupported pointer size
+#	error Unsupported pointer size
 #endif
 }
 
diff --git a/include/jemalloc/internal/prof_data.h b/include/jemalloc/internal/prof_data.h
index 43e8d7e7..0af5835c 100644
--- a/include/jemalloc/internal/prof_data.h
+++ b/include/jemalloc/internal/prof_data.h
@@ -17,21 +17,21 @@ extern size_t prof_shifted_unbiased_cnt[PROF_SC_NSIZES];
 void prof_bt_hash(const void *key, size_t r_hash[2]);
 bool prof_bt_keycomp(const void *k1, const void *k2);
 
-bool prof_data_init(tsd_t *tsd);
+bool         prof_data_init(tsd_t *tsd);
 prof_tctx_t *prof_lookup(tsd_t *tsd, prof_bt_t *bt);
-int prof_thread_name_set_impl(tsd_t *tsd, const char *thread_name);
-void prof_unbias_map_init(void);
+int          prof_thread_name_set_impl(tsd_t *tsd, const char *thread_name);
+void         prof_unbias_map_init(void);
 void prof_dump_impl(tsd_t *tsd, write_cb_t *prof_dump_write, void *cbopaque,
     prof_tdata_t *tdata, bool leakcheck);
-prof_tdata_t * prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid,
+prof_tdata_t *prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid,
     uint64_t thr_discrim, char *thread_name, bool active);
-void prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata);
-void prof_reset(tsd_t *tsd, size_t lg_sample);
-void prof_tctx_try_destroy(tsd_t *tsd, prof_tctx_t *tctx);
+void          prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata);
+void          prof_reset(tsd_t *tsd, size_t lg_sample);
+void          prof_tctx_try_destroy(tsd_t *tsd, prof_tctx_t *tctx);
 
 /* Used in unit tests. */
 size_t prof_tdata_count(void);
 size_t prof_bt_count(void);
-void prof_cnt_all(prof_cnt_t *cnt_all);
+void   prof_cnt_all(prof_cnt_t *cnt_all);
 
 #endif /* JEMALLOC_INTERNAL_PROF_DATA_H */
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 7d962522..e41e30a0 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -7,21 +7,22 @@
 #include "jemalloc/internal/prof_hook.h"
 #include "jemalloc/internal/thread_event_registry.h"
 
-extern bool opt_prof;
-extern bool opt_prof_active;
-extern bool opt_prof_thread_active_init;
+extern bool     opt_prof;
+extern bool     opt_prof_active;
+extern bool     opt_prof_thread_active_init;
 extern unsigned opt_prof_bt_max;
-extern size_t opt_lg_prof_sample;    /* Mean bytes between samples. */
-extern size_t opt_experimental_lg_prof_threshold;    /* Mean bytes between thresholds. */
-extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */
-extern bool opt_prof_gdump;          /* High-water memory dumping. */
-extern bool opt_prof_final;          /* Final profile dumping. */
-extern bool opt_prof_leak;           /* Dump leak summary at exit. */
-extern bool opt_prof_leak_error;     /* Exit with error code if memory leaked */
-extern bool opt_prof_accum;          /* Report cumulative bytes. */
-extern bool opt_prof_log;            /* Turn logging on at boot. */
-extern char opt_prof_prefix[
-    /* Minimize memory bloat for non-prof builds. */
+extern size_t   opt_lg_prof_sample; /* Mean bytes between samples. */
+extern size_t
+    opt_experimental_lg_prof_threshold; /* Mean bytes between thresholds. */
+extern ssize_t opt_lg_prof_interval;    /* lg(prof_interval). */
+extern bool    opt_prof_gdump;          /* High-water memory dumping. */
+extern bool    opt_prof_final;          /* Final profile dumping. */
+extern bool    opt_prof_leak;           /* Dump leak summary at exit. */
+extern bool    opt_prof_leak_error; /* Exit with error code if memory leaked */
+extern bool    opt_prof_accum;      /* Report cumulative bytes. */
+extern bool    opt_prof_log;        /* Turn logging on at boot. */
+extern char    opt_prof_prefix[
+/* Minimize memory bloat for non-prof builds. */
 #ifdef JEMALLOC_PROF
     PATH_MAX +
 #endif
@@ -57,19 +58,19 @@ extern size_t lg_prof_sample;
 
 extern bool prof_booted;
 
-void prof_backtrace_hook_set(prof_backtrace_hook_t hook);
+void                  prof_backtrace_hook_set(prof_backtrace_hook_t hook);
 prof_backtrace_hook_t prof_backtrace_hook_get(void);
 
-void prof_dump_hook_set(prof_dump_hook_t hook);
+void             prof_dump_hook_set(prof_dump_hook_t hook);
 prof_dump_hook_t prof_dump_hook_get(void);
 
-void prof_sample_hook_set(prof_sample_hook_t hook);
+void               prof_sample_hook_set(prof_sample_hook_t hook);
 prof_sample_hook_t prof_sample_hook_get(void);
 
-void prof_sample_free_hook_set(prof_sample_free_hook_t hook);
+void                    prof_sample_free_hook_set(prof_sample_free_hook_t hook);
 prof_sample_free_hook_t prof_sample_free_hook_get(void);
 
-void prof_threshold_hook_set(prof_threshold_hook_t hook);
+void                  prof_threshold_hook_set(prof_threshold_hook_t hook);
 prof_threshold_hook_t prof_threshold_hook_get(void);
 
 /* Functions only accessed in prof_inlines.h */
@@ -77,33 +78,33 @@ prof_tdata_t *prof_tdata_init(tsd_t *tsd);
 prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
 
 void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx);
-void prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
-    size_t usize, prof_tctx_t *tctx);
-void prof_free_sampled_object(tsd_t *tsd, const void *ptr, size_t usize,
-    prof_info_t *prof_info);
+void prof_malloc_sample_object(
+    tsd_t *tsd, const void *ptr, size_t size, size_t usize, prof_tctx_t *tctx);
+void prof_free_sampled_object(
+    tsd_t *tsd, const void *ptr, size_t usize, prof_info_t *prof_info);
 prof_tctx_t *prof_tctx_create(tsd_t *tsd);
-void prof_idump(tsdn_t *tsdn);
-bool prof_mdump(tsd_t *tsd, const char *filename);
-void prof_gdump(tsdn_t *tsdn);
+void         prof_idump(tsdn_t *tsdn);
+bool         prof_mdump(tsd_t *tsd, const char *filename);
+void         prof_gdump(tsdn_t *tsdn);
 
-void prof_tdata_cleanup(tsd_t *tsd);
-bool prof_active_get(tsdn_t *tsdn);
-bool prof_active_set(tsdn_t *tsdn, bool active);
+void        prof_tdata_cleanup(tsd_t *tsd);
+bool        prof_active_get(tsdn_t *tsdn);
+bool        prof_active_set(tsdn_t *tsdn, bool active);
 const char *prof_thread_name_get(tsd_t *tsd);
-int prof_thread_name_set(tsd_t *tsd, const char *thread_name);
-bool prof_thread_active_get(tsd_t *tsd);
-bool prof_thread_active_set(tsd_t *tsd, bool active);
-bool prof_thread_active_init_get(tsdn_t *tsdn);
-bool prof_thread_active_init_set(tsdn_t *tsdn, bool active_init);
-bool prof_gdump_get(tsdn_t *tsdn);
-bool prof_gdump_set(tsdn_t *tsdn, bool active);
-void prof_boot0(void);
-void prof_boot1(void);
-bool prof_boot2(tsd_t *tsd, base_t *base);
-void prof_prefork0(tsdn_t *tsdn);
-void prof_prefork1(tsdn_t *tsdn);
-void prof_postfork_parent(tsdn_t *tsdn);
-void prof_postfork_child(tsdn_t *tsdn);
+int         prof_thread_name_set(tsd_t *tsd, const char *thread_name);
+bool        prof_thread_active_get(tsd_t *tsd);
+bool        prof_thread_active_set(tsd_t *tsd, bool active);
+bool        prof_thread_active_init_get(tsdn_t *tsdn);
+bool        prof_thread_active_init_set(tsdn_t *tsdn, bool active_init);
+bool        prof_gdump_get(tsdn_t *tsdn);
+bool        prof_gdump_set(tsdn_t *tsdn, bool active);
+void        prof_boot0(void);
+void        prof_boot1(void);
+bool        prof_boot2(tsd_t *tsd, base_t *base);
+void        prof_prefork0(tsdn_t *tsdn);
+void        prof_prefork1(tsdn_t *tsdn);
+void        prof_postfork_parent(tsdn_t *tsdn);
+void        prof_postfork_child(tsdn_t *tsdn);
 
 uint64_t prof_sample_new_event_wait(tsd_t *tsd);
 uint64_t tsd_prof_sample_event_wait_get(tsd_t *tsd);
@@ -130,8 +131,8 @@ uint64_t tsd_prof_sample_event_wait_get(tsd_t *tsd);
  */
 
 JEMALLOC_ALWAYS_INLINE bool
-te_prof_sample_event_lookahead_surplus(tsd_t *tsd, size_t usize,
-    size_t *surplus) {
+te_prof_sample_event_lookahead_surplus(
+    tsd_t *tsd, size_t usize, size_t *surplus) {
 	if (surplus != NULL) {
 		/*
 		 * This is a dead store: the surplus will be overwritten before
@@ -146,8 +147,8 @@ te_prof_sample_event_lookahead_surplus(tsd_t *tsd, size_t usize,
 		return false;
 	}
 	/* The subtraction is intentionally susceptible to underflow. */
-	uint64_t accumbytes = tsd_thread_allocated_get(tsd) + usize -
-	    tsd_thread_allocated_last_event_get(tsd);
+	uint64_t accumbytes = tsd_thread_allocated_get(tsd) + usize
+	    - tsd_thread_allocated_last_event_get(tsd);
 	uint64_t sample_wait = tsd_prof_sample_event_wait_get(tsd);
 	if (accumbytes < sample_wait) {
 		return false;
diff --git a/include/jemalloc/internal/prof_hook.h b/include/jemalloc/internal/prof_hook.h
index 2f3a81af..69dfaabf 100644
--- a/include/jemalloc/internal/prof_hook.h
+++ b/include/jemalloc/internal/prof_hook.h
@@ -21,7 +21,8 @@ typedef void (*prof_backtrace_hook_t)(void **, unsigned *, unsigned);
 typedef void (*prof_dump_hook_t)(const char *filename);
 
 /* ptr, size, backtrace vector, backtrace vector length, usize */
-typedef void (*prof_sample_hook_t)(const void *ptr, size_t size, void **backtrace, unsigned backtrace_length, size_t usize);
+typedef void (*prof_sample_hook_t)(const void *ptr, size_t size,
+    void **backtrace, unsigned backtrace_length, size_t usize);
 
 /* ptr, size */
 typedef void (*prof_sample_free_hook_t)(const void *, size_t);
@@ -29,6 +30,7 @@ typedef void (*prof_sample_free_hook_t)(const void *, size_t);
 /*
  * A callback hook that notifies when an allocation threshold has been crossed.
  */
-typedef void (*prof_threshold_hook_t)(uint64_t alloc, uint64_t dealloc, uint64_t peak);
+typedef void (*prof_threshold_hook_t)(
+    uint64_t alloc, uint64_t dealloc, uint64_t peak);
 
 #endif /* JEMALLOC_INTERNAL_PROF_HOOK_H */
diff --git a/include/jemalloc/internal/prof_inlines.h b/include/jemalloc/internal/prof_inlines.h
index 75300ee4..4a36bd7a 100644
--- a/include/jemalloc/internal/prof_inlines.h
+++ b/include/jemalloc/internal/prof_inlines.h
@@ -164,8 +164,8 @@ JEMALLOC_ALWAYS_INLINE prof_tctx_t *
 prof_alloc_prep(tsd_t *tsd, bool prof_active, bool sample_event) {
 	prof_tctx_t *ret;
 
-	if (!prof_active ||
-	    likely(prof_sample_should_skip(tsd, sample_event))) {
+	if (!prof_active
+	    || likely(prof_sample_should_skip(tsd, sample_event))) {
 		ret = PROF_TCTX_SENTINEL;
 	} else {
 		ret = prof_tctx_create(tsd);
@@ -242,8 +242,8 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t size, size_t usize,
 	 * counters.
 	 */
 	if (unlikely(old_sampled)) {
-		prof_free_sampled_object(tsd, old_ptr, old_usize,
-		    old_prof_info);
+		prof_free_sampled_object(
+		    tsd, old_ptr, old_usize, old_prof_info);
 	}
 }
 
@@ -254,9 +254,10 @@ prof_sample_align(size_t usize, size_t orig_align) {
 	 * w/o metadata lookup.
 	 */
 	assert(opt_prof);
-	return (orig_align < PROF_SAMPLE_ALIGNMENT &&
-	       (sz_can_use_slab(usize) || opt_cache_oblivious)) ?
-	           PROF_SAMPLE_ALIGNMENT : orig_align;
+	return (orig_align < PROF_SAMPLE_ALIGNMENT
+	           && (sz_can_use_slab(usize) || opt_cache_oblivious))
+	    ? PROF_SAMPLE_ALIGNMENT
+	    : orig_align;
 }
 
 JEMALLOC_ALWAYS_INLINE bool
@@ -271,8 +272,8 @@ prof_sampled(tsd_t *tsd, const void *ptr) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_free(tsd_t *tsd, const void *ptr, size_t usize,
-    emap_alloc_ctx_t *alloc_ctx) {
+prof_free(
+    tsd_t *tsd, const void *ptr, size_t usize, emap_alloc_ctx_t *alloc_ctx) {
 	prof_info_t prof_info;
 	prof_info_get_and_reset_recent(tsd, ptr, alloc_ctx, &prof_info);
 
diff --git a/include/jemalloc/internal/prof_log.h b/include/jemalloc/internal/prof_log.h
index 0b1271c8..d9b97dc8 100644
--- a/include/jemalloc/internal/prof_log.h
+++ b/include/jemalloc/internal/prof_log.h
@@ -13,9 +13,9 @@ bool prof_log_init(tsd_t *tsdn);
 size_t prof_log_bt_count(void);
 size_t prof_log_alloc_count(void);
 size_t prof_log_thr_count(void);
-bool prof_log_is_logging(void);
-bool prof_log_rep_check(void);
-void prof_log_dummy_set(bool new_value);
+bool   prof_log_is_logging(void);
+bool   prof_log_rep_check(void);
+void   prof_log_dummy_set(bool new_value);
 
 bool prof_log_start(tsdn_t *tsdn, const char *filename);
 bool prof_log_stop(tsdn_t *tsdn);
diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h
index 084a549d..d38b15ea 100644
--- a/include/jemalloc/internal/prof_structs.h
+++ b/include/jemalloc/internal/prof_structs.h
@@ -10,29 +10,29 @@
 
 struct prof_bt_s {
 	/* Backtrace, stored as len program counters. */
-	void		**vec;
-	unsigned	len;
+	void   **vec;
+	unsigned len;
 };
 
 #ifdef JEMALLOC_PROF_LIBGCC
 /* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */
 typedef struct {
-	void 		**vec;
-	unsigned	*len;
-	unsigned	max;
+	void    **vec;
+	unsigned *len;
+	unsigned  max;
 } prof_unwind_data_t;
 #endif
 
 struct prof_cnt_s {
 	/* Profiling counters. */
-	uint64_t	curobjs;
-	uint64_t	curobjs_shifted_unbiased;
-	uint64_t	curbytes;
-	uint64_t	curbytes_unbiased;
-	uint64_t	accumobjs;
-	uint64_t	accumobjs_shifted_unbiased;
-	uint64_t	accumbytes;
-	uint64_t	accumbytes_unbiased;
+	uint64_t curobjs;
+	uint64_t curobjs_shifted_unbiased;
+	uint64_t curbytes;
+	uint64_t curbytes_unbiased;
+	uint64_t accumobjs;
+	uint64_t accumobjs_shifted_unbiased;
+	uint64_t accumbytes;
+	uint64_t accumbytes_unbiased;
 };
 
 typedef enum {
@@ -44,26 +44,26 @@ typedef enum {
 
 struct prof_tctx_s {
 	/* Thread data for thread that performed the allocation. */
-	prof_tdata_t		*tdata;
+	prof_tdata_t *tdata;
 
 	/*
 	 * Copy of tdata->thr_{uid,discrim}, necessary because tdata may be
 	 * defunct during teardown.
 	 */
-	uint64_t		thr_uid;
-	uint64_t		thr_discrim;
+	uint64_t thr_uid;
+	uint64_t thr_discrim;
 
 	/*
 	 * Reference count of how many times this tctx object is referenced in
 	 * recent allocation / deallocation records, protected by tdata->lock.
 	 */
-	uint64_t		recent_count;
+	uint64_t recent_count;
 
 	/* Profiling counters, protected by tdata->lock. */
-	prof_cnt_t		cnts;
+	prof_cnt_t cnts;
 
 	/* Associated global context. */
-	prof_gctx_t		*gctx;
+	prof_gctx_t *gctx;
 
 	/*
 	 * UID that distinguishes multiple tctx's created by the same thread,
@@ -78,40 +78,40 @@ struct prof_tctx_s {
 	 *   threshold can be hit again before the first consumer finishes
 	 *   executing prof_tctx_destroy().
 	 */
-	uint64_t		tctx_uid;
+	uint64_t tctx_uid;
 
 	/* Linkage into gctx's tctxs. */
-	rb_node(prof_tctx_t)	tctx_link;
+	rb_node(prof_tctx_t) tctx_link;
 
 	/*
 	 * True during prof_alloc_prep()..prof_malloc_sample_object(), prevents
 	 * sample vs destroy race.
 	 */
-	bool			prepared;
+	bool prepared;
 
 	/* Current dump-related state, protected by gctx->lock. */
-	prof_tctx_state_t	state;
+	prof_tctx_state_t state;
 
 	/*
 	 * Copy of cnts snapshotted during early dump phase, protected by
 	 * dump_mtx.
 	 */
-	prof_cnt_t		dump_cnts;
+	prof_cnt_t dump_cnts;
 };
 typedef rb_tree(prof_tctx_t) prof_tctx_tree_t;
 
 struct prof_info_s {
 	/* Time when the allocation was made. */
-	nstime_t		alloc_time;
+	nstime_t alloc_time;
 	/* Points to the prof_tctx_t corresponding to the allocation. */
-	prof_tctx_t		*alloc_tctx;
+	prof_tctx_t *alloc_tctx;
 	/* Allocation request size. */
-	size_t			alloc_size;
+	size_t alloc_size;
 };
 
 struct prof_gctx_s {
 	/* Protects nlimbo, cnt_summed, and tctxs. */
-	malloc_mutex_t		*lock;
+	malloc_mutex_t *lock;
 
 	/*
 	 * Number of threads that currently cause this gctx to be in a state of
@@ -123,48 +123,48 @@ struct prof_gctx_s {
 	 * nlimbo must be 1 (single destroyer) in order to safely destroy the
 	 * gctx.
 	 */
-	unsigned		nlimbo;
+	unsigned nlimbo;
 
 	/*
 	 * Tree of profile counters, one for each thread that has allocated in
 	 * this context.
 	 */
-	prof_tctx_tree_t	tctxs;
+	prof_tctx_tree_t tctxs;
 
 	/* Linkage for tree of contexts to be dumped. */
-	rb_node(prof_gctx_t)	dump_link;
+	rb_node(prof_gctx_t) dump_link;
 
 	/* Temporary storage for summation during dump. */
-	prof_cnt_t		cnt_summed;
+	prof_cnt_t cnt_summed;
 
 	/* Associated backtrace. */
-	prof_bt_t		bt;
+	prof_bt_t bt;
 
 	/* Backtrace vector, variable size, referred to by bt. */
-	void			*vec[1];
+	void *vec[1];
 };
 typedef rb_tree(prof_gctx_t) prof_gctx_tree_t;
 
 struct prof_tdata_s {
-	malloc_mutex_t		*lock;
+	malloc_mutex_t *lock;
 
 	/* Monotonically increasing unique thread identifier. */
-	uint64_t		thr_uid;
+	uint64_t thr_uid;
 
 	/*
 	 * Monotonically increasing discriminator among tdata structures
 	 * associated with the same thr_uid.
 	 */
-	uint64_t		thr_discrim;
+	uint64_t thr_discrim;
 
-	rb_node(prof_tdata_t)	tdata_link;
+	rb_node(prof_tdata_t) tdata_link;
 
 	/*
 	 * Counter used to initialize prof_tctx_t's tctx_uid.  No locking is
 	 * necessary when incrementing this field, because only one thread ever
 	 * does so.
 	 */
-	uint64_t		tctx_uid_next;
+	uint64_t tctx_uid_next;
 
 	/*
 	 * Hash of (prof_bt_t *)-->(prof_tctx_t *).  Each thread tracks
@@ -172,15 +172,15 @@ struct prof_tdata_s {
 	 * associated with thread-specific prof_tctx_t objects.  Other threads
 	 * may write to prof_tctx_t contents when freeing associated objects.
 	 */
-	ckh_t			bt2tctx;
+	ckh_t bt2tctx;
 
 	/* Included in heap profile dumps if has content. */
-	char			thread_name[PROF_THREAD_NAME_MAX_LEN];
+	char thread_name[PROF_THREAD_NAME_MAX_LEN];
 
 	/* State used to avoid dumping while operating on prof internals. */
-	bool			enq;
-	bool			enq_idump;
-	bool			enq_gdump;
+	bool enq;
+	bool enq_idump;
+	bool enq_gdump;
 
 	/*
 	 * Set to true during an early dump phase for tdata's which are
@@ -188,22 +188,22 @@ struct prof_tdata_s {
 	 * to false so that they aren't accidentally included in later dump
 	 * phases.
 	 */
-	bool			dumping;
+	bool dumping;
 
 	/*
 	 * True if profiling is active for this tdata's thread
 	 * (thread.prof.active mallctl).
 	 */
-	bool			active;
+	bool active;
 
-	bool			attached;
-	bool			expired;
+	bool attached;
+	bool expired;
 
 	/* Temporary storage for summation during dump. */
-	prof_cnt_t		cnt_summed;
+	prof_cnt_t cnt_summed;
 
 	/* Backtrace vector, used for calls to prof_backtrace(). */
-	void 			**vec;
+	void **vec;
 };
 typedef rb_tree(prof_tdata_t) prof_tdata_tree_t;
 
@@ -212,9 +212,9 @@ struct prof_recent_s {
 	nstime_t dalloc_time;
 
 	ql_elm(prof_recent_t) link;
-	size_t size;
-	size_t usize;
-	atomic_p_t alloc_edata; /* NULL means allocation has been freed. */
+	size_t       size;
+	size_t       usize;
+	atomic_p_t   alloc_edata; /* NULL means allocation has been freed. */
 	prof_tctx_t *alloc_tctx;
 	prof_tctx_t *dalloc_tctx;
 };
diff --git a/include/jemalloc/internal/prof_sys.h b/include/jemalloc/internal/prof_sys.h
index 42284b38..0745b991 100644
--- a/include/jemalloc/internal/prof_sys.h
+++ b/include/jemalloc/internal/prof_sys.h
@@ -6,30 +6,30 @@
 #include "jemalloc/internal/mutex.h"
 
 extern malloc_mutex_t prof_dump_filename_mtx;
-extern base_t *prof_base;
+extern base_t        *prof_base;
 
 void bt_init(prof_bt_t *bt, void **vec);
 void prof_backtrace(tsd_t *tsd, prof_bt_t *bt);
 void prof_hooks_init(void);
 void prof_unwind_init(void);
 void prof_sys_thread_name_fetch(tsd_t *tsd);
-int prof_getpid(void);
+int  prof_getpid(void);
 void prof_get_default_filename(tsdn_t *tsdn, char *filename, uint64_t ind);
 bool prof_prefix_set(tsdn_t *tsdn, const char *prefix);
 void prof_fdump_impl(tsd_t *tsd);
 void prof_idump_impl(tsd_t *tsd);
 bool prof_mdump_impl(tsd_t *tsd, const char *filename);
 void prof_gdump_impl(tsd_t *tsd);
-int prof_thread_stack_range(uintptr_t fp, uintptr_t *low, uintptr_t *high);
+int  prof_thread_stack_range(uintptr_t fp, uintptr_t *low, uintptr_t *high);
 
 /* Used in unit tests. */
-typedef int (prof_sys_thread_name_read_t)(char *buf, size_t limit);
+typedef int(prof_sys_thread_name_read_t)(char *buf, size_t limit);
 extern prof_sys_thread_name_read_t *JET_MUTABLE prof_sys_thread_name_read;
-typedef int (prof_dump_open_file_t)(const char *, int);
+typedef int(prof_dump_open_file_t)(const char *, int);
 extern prof_dump_open_file_t *JET_MUTABLE prof_dump_open_file;
-typedef ssize_t (prof_dump_write_file_t)(int, const void *, size_t);
+typedef ssize_t(prof_dump_write_file_t)(int, const void *, size_t);
 extern prof_dump_write_file_t *JET_MUTABLE prof_dump_write_file;
-typedef int (prof_dump_open_maps_t)(void);
+typedef int(prof_dump_open_maps_t)(void);
 extern prof_dump_open_maps_t *JET_MUTABLE prof_dump_open_maps;
 
 #endif /* JEMALLOC_INTERNAL_PROF_SYS_H */
diff --git a/include/jemalloc/internal/prof_types.h b/include/jemalloc/internal/prof_types.h
index a27f7fb3..7468885e 100644
--- a/include/jemalloc/internal/prof_types.h
+++ b/include/jemalloc/internal/prof_types.h
@@ -1,22 +1,22 @@
 #ifndef JEMALLOC_INTERNAL_PROF_TYPES_H
 #define JEMALLOC_INTERNAL_PROF_TYPES_H
 
-typedef struct prof_bt_s prof_bt_t;
-typedef struct prof_cnt_s prof_cnt_t;
-typedef struct prof_tctx_s prof_tctx_t;
-typedef struct prof_info_s prof_info_t;
-typedef struct prof_gctx_s prof_gctx_t;
-typedef struct prof_tdata_s prof_tdata_t;
+typedef struct prof_bt_s     prof_bt_t;
+typedef struct prof_cnt_s    prof_cnt_t;
+typedef struct prof_tctx_s   prof_tctx_t;
+typedef struct prof_info_s   prof_info_t;
+typedef struct prof_gctx_s   prof_gctx_t;
+typedef struct prof_tdata_s  prof_tdata_t;
 typedef struct prof_recent_s prof_recent_t;
 
 /* Option defaults. */
 #ifdef JEMALLOC_PROF
-#  define PROF_PREFIX_DEFAULT		"jeprof"
+#	define PROF_PREFIX_DEFAULT "jeprof"
 #else
-#  define PROF_PREFIX_DEFAULT		""
+#	define PROF_PREFIX_DEFAULT ""
 #endif
-#define LG_PROF_SAMPLE_DEFAULT		19
-#define LG_PROF_INTERVAL_DEFAULT	-1
+#define LG_PROF_SAMPLE_DEFAULT 19
+#define LG_PROF_INTERVAL_DEFAULT -1
 
 /*
  * Hard limit on stack backtrace depth.  The version of prof_backtrace() that
@@ -24,54 +24,54 @@ typedef struct prof_recent_s prof_recent_t;
  * of backtrace frame handlers, and should be kept in sync with this setting.
  */
 #ifdef JEMALLOC_PROF_GCC
-#  define PROF_BT_MAX_LIMIT 256
+#	define PROF_BT_MAX_LIMIT 256
 #else
-#  define PROF_BT_MAX_LIMIT UINT_MAX
+#	define PROF_BT_MAX_LIMIT UINT_MAX
 #endif
-#define PROF_BT_MAX_DEFAULT			128
+#define PROF_BT_MAX_DEFAULT 128
 
 /* Initial hash table size. */
-#define PROF_CKH_MINITEMS		64
+#define PROF_CKH_MINITEMS 64
 
 /* Size of memory buffer to use when writing dump files. */
 #ifndef JEMALLOC_PROF
 /* Minimize memory bloat for non-prof builds. */
-#  define PROF_DUMP_BUFSIZE		1
+#	define PROF_DUMP_BUFSIZE 1
 #elif defined(JEMALLOC_DEBUG)
 /* Use a small buffer size in debug build, mainly to facilitate testing. */
-#  define PROF_DUMP_BUFSIZE		16
+#	define PROF_DUMP_BUFSIZE 16
 #else
-#  define PROF_DUMP_BUFSIZE		65536
+#	define PROF_DUMP_BUFSIZE 65536
 #endif
 
 /* Size of size class related tables */
 #ifdef JEMALLOC_PROF
-#  define PROF_SC_NSIZES		SC_NSIZES
+#	define PROF_SC_NSIZES SC_NSIZES
 #else
 /* Minimize memory bloat for non-prof builds. */
-#  define PROF_SC_NSIZES		1
+#	define PROF_SC_NSIZES 1
 #endif
 
 /* Size of stack-allocated buffer used by prof_printf(). */
-#define PROF_PRINTF_BUFSIZE		128
+#define PROF_PRINTF_BUFSIZE 128
 
 /*
  * Number of mutexes shared among all gctx's.  No space is allocated for these
  * unless profiling is enabled, so it's okay to over-provision.
  */
-#define PROF_NCTX_LOCKS			1024
+#define PROF_NCTX_LOCKS 1024
 
 /*
  * Number of mutexes shared among all tdata's.  No space is allocated for these
  * unless profiling is enabled, so it's okay to over-provision.
  */
-#define PROF_NTDATA_LOCKS		256
+#define PROF_NTDATA_LOCKS 256
 
 /* Minimize memory bloat for non-prof builds. */
 #ifdef JEMALLOC_PROF
-#define PROF_DUMP_FILENAME_LEN (PATH_MAX + 1)
+#	define PROF_DUMP_FILENAME_LEN (PATH_MAX + 1)
 #else
-#define PROF_DUMP_FILENAME_LEN 1
+#	define PROF_DUMP_FILENAME_LEN 1
 #endif
 
 /* Default number of recent allocations to record. */
diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
index ea608213..3fdecaed 100644
--- a/include/jemalloc/internal/psset.h
+++ b/include/jemalloc/internal/psset.h
@@ -90,7 +90,7 @@ struct psset_s {
 	 */
 	hpdata_age_heap_t pageslabs[PSSET_NPSIZES];
 	/* Bitmap for which set bits correspond to non-empty heaps. */
-	fb_group_t pageslab_bitmap[FB_NGROUPS(PSSET_NPSIZES)];
+	fb_group_t    pageslab_bitmap[FB_NGROUPS(PSSET_NPSIZES)];
 	psset_stats_t stats;
 	/*
 	 * Slabs with no active allocations, but which are allowed to serve new
diff --git a/include/jemalloc/internal/ql.h b/include/jemalloc/internal/ql.h
index ebe69988..9c1776a4 100644
--- a/include/jemalloc/internal/ql.h
+++ b/include/jemalloc/internal/ql.h
@@ -28,33 +28,36 @@
  */
 
 /* List definitions. */
-#define ql_head(a_type)							\
-struct {								\
-	a_type *qlh_first;						\
-}
+#define ql_head(a_type)                                                        \
+	struct {                                                               \
+		a_type *qlh_first;                                             \
+	}
 
 /* Static initializer for an empty list. */
-#define ql_head_initializer(a_head) {NULL}
+#define ql_head_initializer(a_head)                                            \
+	{ NULL }
 
 /* The field definition. */
-#define ql_elm(a_type)	qr(a_type)
+#define ql_elm(a_type) qr(a_type)
 
 /* A pointer to the first element in the list, or NULL if the list is empty. */
 #define ql_first(a_head) ((a_head)->qlh_first)
 
 /* Dynamically initializes a list. */
-#define ql_new(a_head) do {						\
-	ql_first(a_head) = NULL;					\
-} while (0)
+#define ql_new(a_head)                                                         \
+	do {                                                                   \
+		ql_first(a_head) = NULL;                                       \
+	} while (0)
 
 /*
  * Sets dest to be the contents of src (overwriting any elements there), leaving
  * src empty.
  */
-#define ql_move(a_head_dest, a_head_src) do {				\
-	ql_first(a_head_dest) = ql_first(a_head_src);			\
-	ql_new(a_head_src);						\
-} while (0)
+#define ql_move(a_head_dest, a_head_src)                                       \
+	do {                                                                   \
+		ql_first(a_head_dest) = ql_first(a_head_src);                  \
+		ql_new(a_head_src);                                            \
+	} while (0)
 
 /* True if the list is empty, otherwise false. */
 #define ql_empty(a_head) (ql_first(a_head) == NULL)
@@ -68,85 +71,91 @@ struct {								\
 /*
  * Obtains the last item in the list.
  */
-#define ql_last(a_head, a_field)					\
+#define ql_last(a_head, a_field)                                               \
 	(ql_empty(a_head) ? NULL : qr_prev(ql_first(a_head), a_field))
 
 /*
  * Gets a pointer to the next/prev element in the list.  Trying to advance past
  * the end or retreat before the beginning of the list returns NULL.
  */
-#define ql_next(a_head, a_elm, a_field)					\
-	((ql_last(a_head, a_field) != (a_elm))				\
-	    ? qr_next((a_elm), a_field)	: NULL)
-#define ql_prev(a_head, a_elm, a_field)					\
-	((ql_first(a_head) != (a_elm)) ? qr_prev((a_elm), a_field)	\
-				       : NULL)
+#define ql_next(a_head, a_elm, a_field)                                        \
+	((ql_last(a_head, a_field) != (a_elm)) ? qr_next((a_elm), a_field)     \
+	                                       : NULL)
+#define ql_prev(a_head, a_elm, a_field)                                        \
+	((ql_first(a_head) != (a_elm)) ? qr_prev((a_elm), a_field) : NULL)
 
 /* Inserts a_elm before a_qlelm in the list. */
-#define ql_before_insert(a_head, a_qlelm, a_elm, a_field) do {		\
-	qr_before_insert((a_qlelm), (a_elm), a_field);			\
-	if (ql_first(a_head) == (a_qlelm)) {				\
-		ql_first(a_head) = (a_elm);				\
-	}								\
-} while (0)
+#define ql_before_insert(a_head, a_qlelm, a_elm, a_field)                      \
+	do {                                                                   \
+		qr_before_insert((a_qlelm), (a_elm), a_field);                 \
+		if (ql_first(a_head) == (a_qlelm)) {                           \
+			ql_first(a_head) = (a_elm);                            \
+		}                                                              \
+	} while (0)
 
 /* Inserts a_elm after a_qlelm in the list. */
-#define ql_after_insert(a_qlelm, a_elm, a_field)			\
+#define ql_after_insert(a_qlelm, a_elm, a_field)                               \
 	qr_after_insert((a_qlelm), (a_elm), a_field)
 
 /* Inserts a_elm as the first item in the list. */
-#define ql_head_insert(a_head, a_elm, a_field) do {			\
-	if (!ql_empty(a_head)) {					\
-		qr_before_insert(ql_first(a_head), (a_elm), a_field);	\
-	}								\
-	ql_first(a_head) = (a_elm);					\
-} while (0)
+#define ql_head_insert(a_head, a_elm, a_field)                                 \
+	do {                                                                   \
+		if (!ql_empty(a_head)) {                                       \
+			qr_before_insert(ql_first(a_head), (a_elm), a_field);  \
+		}                                                              \
+		ql_first(a_head) = (a_elm);                                    \
+	} while (0)
 
 /* Inserts a_elm as the last item in the list. */
-#define ql_tail_insert(a_head, a_elm, a_field) do {			\
-	if (!ql_empty(a_head)) {					\
-		qr_before_insert(ql_first(a_head), (a_elm), a_field);	\
-	}								\
-	ql_first(a_head) = qr_next((a_elm), a_field);			\
-} while (0)
+#define ql_tail_insert(a_head, a_elm, a_field)                                 \
+	do {                                                                   \
+		if (!ql_empty(a_head)) {                                       \
+			qr_before_insert(ql_first(a_head), (a_elm), a_field);  \
+		}                                                              \
+		ql_first(a_head) = qr_next((a_elm), a_field);                  \
+	} while (0)
 
 /*
  * Given lists a = [a_1, ..., a_n] and [b_1, ..., b_n], results in:
  * a = [a1, ..., a_n, b_1, ..., b_n] and b = [].
  */
-#define ql_concat(a_head_a, a_head_b, a_field) do {			\
-	if (ql_empty(a_head_a)) {					\
-		ql_move(a_head_a, a_head_b);				\
-	} else if (!ql_empty(a_head_b)) {				\
-		qr_meld(ql_first(a_head_a), ql_first(a_head_b),		\
-		    a_field);						\
-		ql_new(a_head_b);					\
-	}								\
-} while (0)
+#define ql_concat(a_head_a, a_head_b, a_field)                                 \
+	do {                                                                   \
+		if (ql_empty(a_head_a)) {                                      \
+			ql_move(a_head_a, a_head_b);                           \
+		} else if (!ql_empty(a_head_b)) {                              \
+			qr_meld(                                               \
+			    ql_first(a_head_a), ql_first(a_head_b), a_field);  \
+			ql_new(a_head_b);                                      \
+		}                                                              \
+	} while (0)
 
 /* Removes a_elm from the list. */
-#define ql_remove(a_head, a_elm, a_field) do {				\
-	if (ql_first(a_head) == (a_elm)) {				\
-		ql_first(a_head) = qr_next(ql_first(a_head), a_field);	\
-	}								\
-	if (ql_first(a_head) != (a_elm)) {				\
-		qr_remove((a_elm), a_field);				\
-	} else {							\
-		ql_new(a_head);						\
-	}								\
-} while (0)
+#define ql_remove(a_head, a_elm, a_field)                                      \
+	do {                                                                   \
+		if (ql_first(a_head) == (a_elm)) {                             \
+			ql_first(a_head) = qr_next(ql_first(a_head), a_field); \
+		}                                                              \
+		if (ql_first(a_head) != (a_elm)) {                             \
+			qr_remove((a_elm), a_field);                           \
+		} else {                                                       \
+			ql_new(a_head);                                        \
+		}                                                              \
+	} while (0)
 
 /* Removes the first item in the list. */
-#define ql_head_remove(a_head, a_type, a_field) do {			\
-	a_type *t = ql_first(a_head);					\
-	ql_remove((a_head), t, a_field);				\
-} while (0)
+#define ql_head_remove(a_head, a_type, a_field)                                \
+	do {                                                                   \
+		a_type *t = ql_first(a_head);                                  \
+		ql_remove((a_head), t, a_field);                               \
+	} while (0)
 
 /* Removes the last item in the list. */
-#define ql_tail_remove(a_head, a_type, a_field) do {			\
-	a_type *t = ql_last(a_head, a_field);				\
-	ql_remove((a_head), t, a_field);				\
-} while (0)
+#define ql_tail_remove(a_head, a_type, a_field)                                \
+	do {                                                                   \
+		a_type *t = ql_last(a_head, a_field);                          \
+		ql_remove((a_head), t, a_field);                               \
+	} while (0)
 
 /*
  * Given a = [a_1, a_2, ..., a_n-1, a_n, a_n+1, ...],
@@ -155,14 +164,15 @@ struct {								\
  * and replaces b's contents with:
  *   b = [a_n, a_n+1, ...]
  */
-#define ql_split(a_head_a, a_elm, a_head_b, a_field) do {		\
-	if (ql_first(a_head_a) == (a_elm)) {				\
-		ql_move(a_head_b, a_head_a);				\
-	} else {							\
-		qr_split(ql_first(a_head_a), (a_elm), a_field);		\
-		ql_first(a_head_b) = (a_elm);				\
-	}								\
-} while (0)
+#define ql_split(a_head_a, a_elm, a_head_b, a_field)                           \
+	do {                                                                   \
+		if (ql_first(a_head_a) == (a_elm)) {                           \
+			ql_move(a_head_b, a_head_a);                           \
+		} else {                                                       \
+			qr_split(ql_first(a_head_a), (a_elm), a_field);        \
+			ql_first(a_head_b) = (a_elm);                          \
+		}                                                              \
+	} while (0)
 
 /*
  * An optimized version of:
@@ -170,9 +180,10 @@ struct {								\
  *	ql_remove((a_head), t, a_field);
  *	ql_tail_insert((a_head), t, a_field);
  */
-#define ql_rotate(a_head, a_field) do {					\
-	ql_first(a_head) = qr_next(ql_first(a_head), a_field);		\
-} while (0)
+#define ql_rotate(a_head, a_field)                                             \
+	do {                                                                   \
+		ql_first(a_head) = qr_next(ql_first(a_head), a_field);         \
+	} while (0)
 
 /*
  * Helper macro to iterate over each element in a list in order, starting from
@@ -189,10 +200,10 @@ struct {								\
  * }
  */
 
-#define ql_foreach(a_var, a_head, a_field)				\
-	qr_foreach((a_var), ql_first(a_head), a_field)
+#define ql_foreach(a_var, a_head, a_field)                                     \
+	qr_foreach ((a_var), ql_first(a_head), a_field)
 
-#define ql_reverse_foreach(a_var, a_head, a_field)			\
+#define ql_reverse_foreach(a_var, a_head, a_field)                             \
 	qr_reverse_foreach((a_var), ql_first(a_head), a_field)
 
 #endif /* JEMALLOC_INTERNAL_QL_H */
diff --git a/include/jemalloc/internal/qr.h b/include/jemalloc/internal/qr.h
index ece4f556..1bd61f38 100644
--- a/include/jemalloc/internal/qr.h
+++ b/include/jemalloc/internal/qr.h
@@ -17,21 +17,22 @@
  */
 
 /* Ring definitions. */
-#define qr(a_type)							\
-struct {								\
-	a_type	*qre_next;						\
-	a_type	*qre_prev;						\
-}
+#define qr(a_type)                                                             \
+	struct {                                                               \
+		a_type *qre_next;                                              \
+		a_type *qre_prev;                                              \
+	}
 
 /*
  * Initialize a qr link.  Every link must be initialized before being used, even
  * if that initialization is going to be immediately overwritten (say, by being
  * passed into an insertion macro).
  */
-#define qr_new(a_qr, a_field) do {					\
-	(a_qr)->a_field.qre_next = (a_qr);				\
-	(a_qr)->a_field.qre_prev = (a_qr);				\
-} while (0)
+#define qr_new(a_qr, a_field)                                                  \
+	do {                                                                   \
+		(a_qr)->a_field.qre_next = (a_qr);                             \
+		(a_qr)->a_field.qre_prev = (a_qr);                             \
+	} while (0)
 
 /*
  * Go forwards or backwards in the ring.  Note that (the ring being circular), this
@@ -58,26 +59,27 @@ struct {								\
  *
  * a_qr_a can directly be a qr_next() macro, but a_qr_b cannot.
  */
-#define qr_meld(a_qr_a, a_qr_b, a_field) do {				\
-	(a_qr_b)->a_field.qre_prev->a_field.qre_next =			\
-	    (a_qr_a)->a_field.qre_prev;					\
-	(a_qr_a)->a_field.qre_prev = (a_qr_b)->a_field.qre_prev;	\
-	(a_qr_b)->a_field.qre_prev =					\
-	    (a_qr_b)->a_field.qre_prev->a_field.qre_next;		\
-	(a_qr_a)->a_field.qre_prev->a_field.qre_next = (a_qr_a);	\
-	(a_qr_b)->a_field.qre_prev->a_field.qre_next = (a_qr_b);	\
-} while (0)
+#define qr_meld(a_qr_a, a_qr_b, a_field)                                       \
+	do {                                                                   \
+		(a_qr_b)->a_field.qre_prev->a_field.qre_next =                 \
+		    (a_qr_a)->a_field.qre_prev;                                \
+		(a_qr_a)->a_field.qre_prev = (a_qr_b)->a_field.qre_prev;       \
+		(a_qr_b)->a_field.qre_prev =                                   \
+		    (a_qr_b)->a_field.qre_prev->a_field.qre_next;              \
+		(a_qr_a)->a_field.qre_prev->a_field.qre_next = (a_qr_a);       \
+		(a_qr_b)->a_field.qre_prev->a_field.qre_next = (a_qr_b);       \
+	} while (0)
 
 /*
  * Logically, this is just a meld.  The intent, though, is that a_qrelm is a
  * single-element ring, so that "before" has a more obvious interpretation than
  * meld.
  */
-#define qr_before_insert(a_qrelm, a_qr, a_field)			\
+#define qr_before_insert(a_qrelm, a_qr, a_field)                               \
 	qr_meld((a_qrelm), (a_qr), a_field)
 
 /* Ditto, but inserting after rather than before. */
-#define qr_after_insert(a_qrelm, a_qr, a_field)				\
+#define qr_after_insert(a_qrelm, a_qr, a_field)                                \
 	qr_before_insert(qr_next(a_qrelm, a_field), (a_qr), a_field)
 
 /*
@@ -98,14 +100,13 @@ struct {								\
  * qr_meld() and qr_split() are functionally equivalent, so there's no need to
  * have two copies of the code.
  */
-#define qr_split(a_qr_a, a_qr_b, a_field)				\
-	qr_meld((a_qr_a), (a_qr_b), a_field)
+#define qr_split(a_qr_a, a_qr_b, a_field) qr_meld((a_qr_a), (a_qr_b), a_field)
 
 /*
  * Splits off a_qr from the rest of its ring, so that it becomes a
  * single-element ring.
  */
-#define qr_remove(a_qr, a_field)					\
+#define qr_remove(a_qr, a_field)                                               \
 	qr_split(qr_next(a_qr, a_field), (a_qr), a_field)
 
 /*
@@ -121,20 +122,19 @@ struct {								\
  *   return sum;
  * }
  */
-#define qr_foreach(var, a_qr, a_field)					\
-	for ((var) = (a_qr);						\
-	    (var) != NULL;						\
-	    (var) = (((var)->a_field.qre_next != (a_qr))		\
-	    ? (var)->a_field.qre_next : NULL))
+#define qr_foreach(var, a_qr, a_field)                                         \
+	for ((var) = (a_qr); (var) != NULL;                                    \
+	     (var) = (((var)->a_field.qre_next != (a_qr))                      \
+	             ? (var)->a_field.qre_next                                 \
+	             : NULL))
 
 /*
  * The same (and with the same usage) as qr_foreach, but in the opposite order,
  * ending with a_qr.
  */
-#define qr_reverse_foreach(var, a_qr, a_field)				\
-	for ((var) = ((a_qr) != NULL) ? qr_prev(a_qr, a_field) : NULL;	\
-	    (var) != NULL;						\
-	    (var) = (((var) != (a_qr))					\
-	    ? (var)->a_field.qre_prev : NULL))
+#define qr_reverse_foreach(var, a_qr, a_field)                                 \
+	for ((var) = ((a_qr) != NULL) ? qr_prev(a_qr, a_field) : NULL;         \
+	     (var) != NULL;                                                    \
+	     (var) = (((var) != (a_qr)) ? (var)->a_field.qre_prev : NULL))
 
 #endif /* JEMALLOC_INTERNAL_QR_H */
diff --git a/include/jemalloc/internal/quantum.h b/include/jemalloc/internal/quantum.h
index b4beb309..2f7c0466 100644
--- a/include/jemalloc/internal/quantum.h
+++ b/include/jemalloc/internal/quantum.h
@@ -6,82 +6,84 @@
  * classes).
  */
 #ifndef LG_QUANTUM
-#  if (defined(__i386__) || defined(_M_IX86))
-#    define LG_QUANTUM		4
-#  endif
-#  ifdef __ia64__
-#    define LG_QUANTUM		4
-#  endif
-#  ifdef __alpha__
-#    define LG_QUANTUM		4
-#  endif
-#  if (defined(__sparc64__) || defined(__sparcv9) || defined(__sparc_v9__))
-#    define LG_QUANTUM		4
-#  endif
-#  if (defined(__amd64__) || defined(__x86_64__) || defined(_M_X64))
-#    define LG_QUANTUM		4
-#  endif
-#  ifdef __arm__
-#    define LG_QUANTUM		3
-#  endif
-#  if defined(__aarch64__) || defined(_M_ARM64)
-#    define LG_QUANTUM		4
-#  endif
-#  ifdef __hppa__
-#    define LG_QUANTUM		4
-#  endif
-#  ifdef __loongarch__
-#    define LG_QUANTUM		4
-#  endif
-#  ifdef __m68k__
-#    define LG_QUANTUM		3
-#  endif
-#  ifdef __mips__
-#    if defined(__mips_n32) || defined(__mips_n64)
-#      define LG_QUANTUM		4
-#    else
-#      define LG_QUANTUM		3
-#    endif
-#  endif
-#  ifdef __nios2__
-#    define LG_QUANTUM		3
-#  endif
-#  ifdef __or1k__
-#    define LG_QUANTUM		3
-#  endif
-#  if defined(__powerpc__) || defined(__ppc__) || defined(__powerpc64__) || defined(__ppc64__)
-#    define LG_QUANTUM		4
-#  endif
-#  if defined(__riscv) || defined(__riscv__)
-#    define LG_QUANTUM		4
-#  endif
-#  ifdef __s390__
-#    define LG_QUANTUM		4
-#  endif
-#  if (defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || \
-	defined(__SH4_SINGLE_ONLY__))
-#    define LG_QUANTUM		4
-#  endif
-#  ifdef __tile__
-#    define LG_QUANTUM		4
-#  endif
-#  ifdef __le32__
-#    define LG_QUANTUM		4
-#  endif
-#  ifdef __arc__
-#    define LG_QUANTUM		3
-#  endif
-#  ifndef LG_QUANTUM
-#    error "Unknown minimum alignment for architecture; specify via "
-	 "--with-lg-quantum"
-#  endif
+#	if (defined(__i386__) || defined(_M_IX86))
+#		define LG_QUANTUM 4
+#	endif
+#	ifdef __ia64__
+#		define LG_QUANTUM 4
+#	endif
+#	ifdef __alpha__
+#		define LG_QUANTUM 4
+#	endif
+#	if (defined(__sparc64__) || defined(__sparcv9)                        \
+	    || defined(__sparc_v9__))
+#		define LG_QUANTUM 4
+#	endif
+#	if (defined(__amd64__) || defined(__x86_64__) || defined(_M_X64))
+#		define LG_QUANTUM 4
+#	endif
+#	ifdef __arm__
+#		define LG_QUANTUM 3
+#	endif
+#	if defined(__aarch64__) || defined(_M_ARM64)
+#		define LG_QUANTUM 4
+#	endif
+#	ifdef __hppa__
+#		define LG_QUANTUM 4
+#	endif
+#	ifdef __loongarch__
+#		define LG_QUANTUM 4
+#	endif
+#	ifdef __m68k__
+#		define LG_QUANTUM 3
+#	endif
+#	ifdef __mips__
+#		if defined(__mips_n32) || defined(__mips_n64)
+#			define LG_QUANTUM 4
+#		else
+#			define LG_QUANTUM 3
+#		endif
+#	endif
+#	ifdef __nios2__
+#		define LG_QUANTUM 3
+#	endif
+#	ifdef __or1k__
+#		define LG_QUANTUM 3
+#	endif
+#	if defined(__powerpc__) || defined(__ppc__) || defined(__powerpc64__) \
+	    || defined(__ppc64__)
+#		define LG_QUANTUM 4
+#	endif
+#	if defined(__riscv) || defined(__riscv__)
+#		define LG_QUANTUM 4
+#	endif
+#	ifdef __s390__
+#		define LG_QUANTUM 4
+#	endif
+#	if (defined(__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__)  \
+	    || defined(__SH4_SINGLE_ONLY__))
+#		define LG_QUANTUM 4
+#	endif
+#	ifdef __tile__
+#		define LG_QUANTUM 4
+#	endif
+#	ifdef __le32__
+#		define LG_QUANTUM 4
+#	endif
+#	ifdef __arc__
+#		define LG_QUANTUM 3
+#	endif
+#	ifndef LG_QUANTUM
+#		error                                                          \
+		    "Unknown minimum alignment for architecture; specify via "
+"--with-lg-quantum"
+#	endif
 #endif
 
-#define QUANTUM			((size_t)(1U << LG_QUANTUM))
-#define QUANTUM_MASK		(QUANTUM - 1)
+#define QUANTUM ((size_t)(1U << LG_QUANTUM))
+#define QUANTUM_MASK (QUANTUM - 1)
 
 /* Return the smallest quantum multiple that is >= a. */
-#define QUANTUM_CEILING(a)						\
-	(((a) + QUANTUM_MASK) & ~QUANTUM_MASK)
+#define QUANTUM_CEILING(a) (((a) + QUANTUM_MASK) & ~QUANTUM_MASK)
 
 #endif /* JEMALLOC_INTERNAL_QUANTUM_H */
diff --git a/include/jemalloc/internal/rb.h b/include/jemalloc/internal/rb.h
index 235d548e..58510e4d 100644
--- a/include/jemalloc/internal/rb.h
+++ b/include/jemalloc/internal/rb.h
@@ -26,7 +26,7 @@
  */
 
 #ifndef __PGI
-#define RB_COMPACT
+#	define RB_COMPACT
 #endif
 
 /*
diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index f35368ae..07205958 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -18,48 +18,49 @@
  */
 
 /* Number of high insignificant bits. */
-#define RTREE_NHIB ((1U << (LG_SIZEOF_PTR+3)) - LG_VADDR)
+#define RTREE_NHIB ((1U << (LG_SIZEOF_PTR + 3)) - LG_VADDR)
 /* Number of low insigificant bits. */
 #define RTREE_NLIB LG_PAGE
 /* Number of significant bits. */
 #define RTREE_NSB (LG_VADDR - RTREE_NLIB)
 /* Number of levels in radix tree. */
 #if RTREE_NSB <= 10
-#  define RTREE_HEIGHT 1
+#	define RTREE_HEIGHT 1
 #elif RTREE_NSB <= 36
-#  define RTREE_HEIGHT 2
+#	define RTREE_HEIGHT 2
 #elif RTREE_NSB <= 52
-#  define RTREE_HEIGHT 3
+#	define RTREE_HEIGHT 3
 #else
-#  error Unsupported number of significant virtual address bits
+#	error Unsupported number of significant virtual address bits
 #endif
 /* Use compact leaf representation if virtual address encoding allows. */
 #if RTREE_NHIB >= LG_CEIL(SC_NSIZES)
-#  define RTREE_LEAF_COMPACT
+#	define RTREE_LEAF_COMPACT
 #endif
 
 typedef struct rtree_node_elm_s rtree_node_elm_t;
 struct rtree_node_elm_s {
-	atomic_p_t	child; /* (rtree_{node,leaf}_elm_t *) */
+	atomic_p_t child; /* (rtree_{node,leaf}_elm_t *) */
 };
 
 typedef struct rtree_metadata_s rtree_metadata_t;
 struct rtree_metadata_s {
-	szind_t szind;
-	extent_state_t state; /* Mirrors edata->state. */
-	bool is_head; /* Mirrors edata->is_head. */
-	bool slab;
+	szind_t        szind;
+	extent_state_t state;   /* Mirrors edata->state. */
+	bool           is_head; /* Mirrors edata->is_head. */
+	bool           slab;
 };
 
 typedef struct rtree_contents_s rtree_contents_t;
 struct rtree_contents_s {
-	edata_t *edata;
+	edata_t         *edata;
 	rtree_metadata_t metadata;
 };
 
 #define RTREE_LEAF_STATE_WIDTH EDATA_BITS_STATE_WIDTH
 #define RTREE_LEAF_STATE_SHIFT 2
-#define RTREE_LEAF_STATE_MASK MASK(RTREE_LEAF_STATE_WIDTH, RTREE_LEAF_STATE_SHIFT)
+#define RTREE_LEAF_STATE_MASK                                                  \
+	MASK(RTREE_LEAF_STATE_WIDTH, RTREE_LEAF_STATE_SHIFT)
 
 struct rtree_leaf_elm_s {
 #ifdef RTREE_LEAF_COMPACT
@@ -77,36 +78,36 @@ struct rtree_leaf_elm_s {
 	 *
 	 *   00000000 xxxxxxxx eeeeeeee [...] eeeeeeee e00ssshb
 	 */
-	atomic_p_t	le_bits;
+	atomic_p_t le_bits;
 #else
-	atomic_p_t	le_edata; /* (edata_t *) */
+	atomic_p_t le_edata; /* (edata_t *) */
 	/*
 	 * From high to low bits: szind (8 bits), state (4 bits), is_head, slab
 	 */
-	atomic_u_t	le_metadata;
+	atomic_u_t le_metadata;
 #endif
 };
 
 typedef struct rtree_level_s rtree_level_t;
 struct rtree_level_s {
 	/* Number of key bits distinguished by this level. */
-	unsigned		bits;
+	unsigned bits;
 	/*
 	 * Cumulative number of key bits distinguished by traversing to
 	 * corresponding tree level.
 	 */
-	unsigned		cumbits;
+	unsigned cumbits;
 };
 
 typedef struct rtree_s rtree_t;
 struct rtree_s {
-	base_t			*base;
-	malloc_mutex_t		init_lock;
+	base_t        *base;
+	malloc_mutex_t init_lock;
 	/* Number of elements based on rtree_levels[0].bits. */
 #if RTREE_HEIGHT > 1
-	rtree_node_elm_t	root[1U << (RTREE_NSB/RTREE_HEIGHT)];
+	rtree_node_elm_t root[1U << (RTREE_NSB / RTREE_HEIGHT)];
 #else
-	rtree_leaf_elm_t	root[1U << (RTREE_NSB/RTREE_HEIGHT)];
+	rtree_leaf_elm_t root[1U << (RTREE_NSB / RTREE_HEIGHT)];
 #endif
 };
 
@@ -118,17 +119,17 @@ struct rtree_s {
  */
 static const rtree_level_t rtree_levels[] = {
 #if RTREE_HEIGHT == 1
-	{RTREE_NSB, RTREE_NHIB + RTREE_NSB}
+    {RTREE_NSB, RTREE_NHIB + RTREE_NSB}
 #elif RTREE_HEIGHT == 2
-	{RTREE_NSB/2, RTREE_NHIB + RTREE_NSB/2},
-	{RTREE_NSB/2 + RTREE_NSB%2, RTREE_NHIB + RTREE_NSB}
+    {RTREE_NSB / 2, RTREE_NHIB + RTREE_NSB / 2},
+    {RTREE_NSB / 2 + RTREE_NSB % 2, RTREE_NHIB + RTREE_NSB}
 #elif RTREE_HEIGHT == 3
-	{RTREE_NSB/3, RTREE_NHIB + RTREE_NSB/3},
-	{RTREE_NSB/3 + RTREE_NSB%3/2,
-	    RTREE_NHIB + RTREE_NSB/3*2 + RTREE_NSB%3/2},
-	{RTREE_NSB/3 + RTREE_NSB%3 - RTREE_NSB%3/2, RTREE_NHIB + RTREE_NSB}
+    {RTREE_NSB / 3, RTREE_NHIB + RTREE_NSB / 3},
+    {RTREE_NSB / 3 + RTREE_NSB % 3 / 2,
+        RTREE_NHIB + RTREE_NSB / 3 * 2 + RTREE_NSB % 3 / 2},
+    {RTREE_NSB / 3 + RTREE_NSB % 3 - RTREE_NSB % 3 / 2, RTREE_NHIB + RTREE_NSB}
 #else
-#  error Unsupported rtree height
+#	error Unsupported rtree height
 #endif
 };
 
@@ -139,9 +140,9 @@ rtree_leaf_elm_t *rtree_leaf_elm_lookup_hard(tsdn_t *tsdn, rtree_t *rtree,
 
 JEMALLOC_ALWAYS_INLINE unsigned
 rtree_leaf_maskbits(void) {
-	unsigned ptrbits = ZU(1) << (LG_SIZEOF_PTR+3);
-	unsigned cumbits = (rtree_levels[RTREE_HEIGHT-1].cumbits -
-	    rtree_levels[RTREE_HEIGHT-1].bits);
+	unsigned ptrbits = ZU(1) << (LG_SIZEOF_PTR + 3);
+	unsigned cumbits = (rtree_levels[RTREE_HEIGHT - 1].cumbits
+	    - rtree_levels[RTREE_HEIGHT - 1].bits);
 	return ptrbits - cumbits;
 }
 
@@ -153,16 +154,16 @@ rtree_leafkey(uintptr_t key) {
 
 JEMALLOC_ALWAYS_INLINE size_t
 rtree_cache_direct_map(uintptr_t key) {
-	return (size_t)((key >> rtree_leaf_maskbits()) &
-	    (RTREE_CTX_NCACHE - 1));
+	return (
+	    size_t)((key >> rtree_leaf_maskbits()) & (RTREE_CTX_NCACHE - 1));
 }
 
 JEMALLOC_ALWAYS_INLINE uintptr_t
 rtree_subkey(uintptr_t key, unsigned level) {
-	unsigned ptrbits = ZU(1) << (LG_SIZEOF_PTR+3);
-	unsigned cumbits = rtree_levels[level].cumbits;
-	unsigned shiftbits = ptrbits - cumbits;
-	unsigned maskbits = rtree_levels[level].bits;
+	unsigned  ptrbits = ZU(1) << (LG_SIZEOF_PTR + 3);
+	unsigned  cumbits = rtree_levels[level].cumbits;
+	unsigned  shiftbits = ptrbits - cumbits;
+	unsigned  maskbits = rtree_levels[level].bits;
 	uintptr_t mask = (ZU(1) << maskbits) - 1;
 	return ((key >> shiftbits) & mask);
 }
@@ -178,12 +179,12 @@ rtree_subkey(uintptr_t key, unsigned level) {
  *             dependent on a previous rtree write, which means a stale read
  *             could result if synchronization were omitted here.
  */
-#  ifdef RTREE_LEAF_COMPACT
+#ifdef RTREE_LEAF_COMPACT
 JEMALLOC_ALWAYS_INLINE uintptr_t
-rtree_leaf_elm_bits_read(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_leaf_elm_t *elm, bool dependent) {
-	return (uintptr_t)atomic_load_p(&elm->le_bits, dependent
-	    ? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
+rtree_leaf_elm_bits_read(
+    tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, bool dependent) {
+	return (uintptr_t)atomic_load_p(
+	    &elm->le_bits, dependent ? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
 }
 
 JEMALLOC_ALWAYS_INLINE uintptr_t
@@ -195,10 +196,10 @@ rtree_leaf_elm_bits_encode(rtree_contents_t contents) {
 	uintptr_t szind_bits = (uintptr_t)contents.metadata.szind << LG_VADDR;
 	uintptr_t slab_bits = (uintptr_t)contents.metadata.slab;
 	uintptr_t is_head_bits = (uintptr_t)contents.metadata.is_head << 1;
-	uintptr_t state_bits = (uintptr_t)contents.metadata.state <<
-	    RTREE_LEAF_STATE_SHIFT;
-	uintptr_t metadata_bits = szind_bits | state_bits | is_head_bits |
-	    slab_bits;
+	uintptr_t state_bits = (uintptr_t)contents.metadata.state
+	    << RTREE_LEAF_STATE_SHIFT;
+	uintptr_t metadata_bits = szind_bits | state_bits | is_head_bits
+	    | slab_bits;
 	assert((edata_bits & metadata_bits) == 0);
 
 	return edata_bits | metadata_bits;
@@ -212,13 +213,13 @@ rtree_leaf_elm_bits_decode(uintptr_t bits) {
 	contents.metadata.slab = (bool)(bits & 1);
 	contents.metadata.is_head = (bool)(bits & (1 << 1));
 
-	uintptr_t state_bits = (bits & RTREE_LEAF_STATE_MASK) >>
-	    RTREE_LEAF_STATE_SHIFT;
+	uintptr_t state_bits = (bits & RTREE_LEAF_STATE_MASK)
+	    >> RTREE_LEAF_STATE_SHIFT;
 	assert(state_bits <= extent_state_max);
 	contents.metadata.state = (extent_state_t)state_bits;
 
 	uintptr_t low_bit_mask = ~((uintptr_t)EDATA_ALIGNMENT - 1);
-#    ifdef __aarch64__
+#	ifdef __aarch64__
 	/*
 	 * aarch64 doesn't sign extend the highest virtual address bit to set
 	 * the higher ones.  Instead, the high bits get zeroed.
@@ -228,49 +229,50 @@ rtree_leaf_elm_bits_decode(uintptr_t bits) {
 	uintptr_t mask = high_bit_mask & low_bit_mask;
 	/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
 	contents.edata = (edata_t *)(bits & mask);
-#    else
+#	else
 	/* Restore sign-extended high bits, mask metadata bits. */
 	/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
 	contents.edata = (edata_t *)((uintptr_t)((intptr_t)(bits << RTREE_NHIB)
-	    >> RTREE_NHIB) & low_bit_mask);
-#    endif
+	                                 >> RTREE_NHIB)
+	    & low_bit_mask);
+#	endif
 	assert((uintptr_t)contents.edata % (uintptr_t)EDATA_ALIGNMENT == 0);
 	return contents;
 }
 
-#  endif /* RTREE_LEAF_COMPACT */
+#endif /* RTREE_LEAF_COMPACT */
 
 JEMALLOC_ALWAYS_INLINE rtree_contents_t
-rtree_leaf_elm_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
-    bool dependent) {
+rtree_leaf_elm_read(
+    tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, bool dependent) {
 #ifdef RTREE_LEAF_COMPACT
 	uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
 	rtree_contents_t contents = rtree_leaf_elm_bits_decode(bits);
 	return contents;
 #else
 	rtree_contents_t contents;
-	unsigned metadata_bits = atomic_load_u(&elm->le_metadata, dependent
-	    ? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
+	unsigned         metadata_bits = atomic_load_u(
+            &elm->le_metadata, dependent ? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
 	contents.metadata.slab = (bool)(metadata_bits & 1);
 	contents.metadata.is_head = (bool)(metadata_bits & (1 << 1));
 
-	uintptr_t state_bits = (metadata_bits & RTREE_LEAF_STATE_MASK) >>
-	    RTREE_LEAF_STATE_SHIFT;
+	uintptr_t state_bits = (metadata_bits & RTREE_LEAF_STATE_MASK)
+	    >> RTREE_LEAF_STATE_SHIFT;
 	assert(state_bits <= extent_state_max);
 	contents.metadata.state = (extent_state_t)state_bits;
-	contents.metadata.szind = metadata_bits >> (RTREE_LEAF_STATE_SHIFT +
-	    RTREE_LEAF_STATE_WIDTH);
+	contents.metadata.szind = metadata_bits
+	    >> (RTREE_LEAF_STATE_SHIFT + RTREE_LEAF_STATE_WIDTH);
 
-	contents.edata = (edata_t *)atomic_load_p(&elm->le_edata, dependent
-	    ? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
+	contents.edata = (edata_t *)atomic_load_p(
+	    &elm->le_edata, dependent ? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
 
 	return contents;
 #endif
 }
 
 JEMALLOC_ALWAYS_INLINE void
-rtree_contents_encode(rtree_contents_t contents, void **bits,
-    unsigned *additional) {
+rtree_contents_encode(
+    rtree_contents_t contents, void **bits, unsigned *additional) {
 #ifdef RTREE_LEAF_COMPACT
 	/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
 	*bits = (void *)rtree_leaf_elm_bits_encode(contents);
@@ -282,15 +284,15 @@ rtree_contents_encode(rtree_contents_t contents, void **bits,
 	*additional = (unsigned)contents.metadata.slab
 	    | ((unsigned)contents.metadata.is_head << 1)
 	    | ((unsigned)contents.metadata.state << RTREE_LEAF_STATE_SHIFT)
-	    | ((unsigned)contents.metadata.szind << (RTREE_LEAF_STATE_SHIFT +
-	    RTREE_LEAF_STATE_WIDTH));
+	    | ((unsigned)contents.metadata.szind
+	        << (RTREE_LEAF_STATE_SHIFT + RTREE_LEAF_STATE_WIDTH));
 	*bits = contents.edata;
 #endif
 }
 
 JEMALLOC_ALWAYS_INLINE void
-rtree_leaf_elm_write_commit(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_leaf_elm_t *elm, void *bits, unsigned additional) {
+rtree_leaf_elm_write_commit(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
+    void *bits, unsigned additional) {
 #ifdef RTREE_LEAF_COMPACT
 	atomic_store_p(&elm->le_bits, bits, ATOMIC_RELEASE);
 #else
@@ -304,10 +306,10 @@ rtree_leaf_elm_write_commit(tsdn_t *tsdn, rtree_t *rtree,
 }
 
 JEMALLOC_ALWAYS_INLINE void
-rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_leaf_elm_t *elm, rtree_contents_t contents) {
+rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
+    rtree_contents_t contents) {
 	assert((uintptr_t)contents.edata % EDATA_ALIGNMENT == 0);
-	void *bits;
+	void    *bits;
 	unsigned additional;
 	rtree_contents_encode(contents, &bits, &additional);
 	rtree_leaf_elm_write_commit(tsdn, rtree, elm, bits, additional);
@@ -348,7 +350,7 @@ rtree_leaf_elm_state_update(tsdn_t *tsdn, rtree_t *rtree,
 JEMALLOC_ALWAYS_INLINE bool
 rtree_leaf_elm_lookup_fast(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
     uintptr_t key, rtree_leaf_elm_t **elm) {
-	size_t slot = rtree_cache_direct_map(key);
+	size_t    slot = rtree_cache_direct_map(key);
 	uintptr_t leafkey = rtree_leafkey(key);
 	assert(leafkey != RTREE_LEAFKEY_INVALID);
 
@@ -358,7 +360,7 @@ rtree_leaf_elm_lookup_fast(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 
 	rtree_leaf_elm_t *leaf = rtree_ctx->cache[slot].leaf;
 	assert(leaf != NULL);
-	uintptr_t subkey = rtree_subkey(key, RTREE_HEIGHT-1);
+	uintptr_t subkey = rtree_subkey(key, RTREE_HEIGHT - 1);
 	*elm = &leaf[subkey];
 
 	return false;
@@ -370,7 +372,7 @@ rtree_leaf_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	assert(key != 0);
 	assert(!dependent || !init_missing);
 
-	size_t slot = rtree_cache_direct_map(key);
+	size_t    slot = rtree_cache_direct_map(key);
 	uintptr_t leafkey = rtree_leafkey(key);
 	assert(leafkey != RTREE_LEAFKEY_INVALID);
 
@@ -378,39 +380,41 @@ rtree_leaf_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	if (likely(rtree_ctx->cache[slot].leafkey == leafkey)) {
 		rtree_leaf_elm_t *leaf = rtree_ctx->cache[slot].leaf;
 		assert(leaf != NULL);
-		uintptr_t subkey = rtree_subkey(key, RTREE_HEIGHT-1);
+		uintptr_t subkey = rtree_subkey(key, RTREE_HEIGHT - 1);
 		return &leaf[subkey];
 	}
 	/*
 	 * Search the L2 LRU cache.  On hit, swap the matching element into the
 	 * slot in L1 cache, and move the position in L2 up by 1.
 	 */
-#define RTREE_CACHE_CHECK_L2(i) do {					\
-	if (likely(rtree_ctx->l2_cache[i].leafkey == leafkey)) {	\
-		rtree_leaf_elm_t *leaf = rtree_ctx->l2_cache[i].leaf;	\
-		assert(leaf != NULL);					\
-		if (i > 0) {						\
-			/* Bubble up by one. */				\
-			rtree_ctx->l2_cache[i].leafkey =		\
-				rtree_ctx->l2_cache[i - 1].leafkey;	\
-			rtree_ctx->l2_cache[i].leaf =			\
-				rtree_ctx->l2_cache[i - 1].leaf;	\
-			rtree_ctx->l2_cache[i - 1].leafkey =		\
-			    rtree_ctx->cache[slot].leafkey;		\
-			rtree_ctx->l2_cache[i - 1].leaf =		\
-			    rtree_ctx->cache[slot].leaf;		\
-		} else {						\
-			rtree_ctx->l2_cache[0].leafkey =		\
-			    rtree_ctx->cache[slot].leafkey;		\
-			rtree_ctx->l2_cache[0].leaf =			\
-			    rtree_ctx->cache[slot].leaf;		\
-		}							\
-		rtree_ctx->cache[slot].leafkey = leafkey;		\
-		rtree_ctx->cache[slot].leaf = leaf;			\
-		uintptr_t subkey = rtree_subkey(key, RTREE_HEIGHT-1);	\
-		return &leaf[subkey];					\
-	}								\
-} while (0)
+#define RTREE_CACHE_CHECK_L2(i)                                                \
+	do {                                                                   \
+		if (likely(rtree_ctx->l2_cache[i].leafkey == leafkey)) {       \
+			rtree_leaf_elm_t *leaf = rtree_ctx->l2_cache[i].leaf;  \
+			assert(leaf != NULL);                                  \
+			if (i > 0) {                                           \
+				/* Bubble up by one. */                        \
+				rtree_ctx->l2_cache[i].leafkey =               \
+				    rtree_ctx->l2_cache[i - 1].leafkey;        \
+				rtree_ctx->l2_cache[i].leaf =                  \
+				    rtree_ctx->l2_cache[i - 1].leaf;           \
+				rtree_ctx->l2_cache[i - 1].leafkey =           \
+				    rtree_ctx->cache[slot].leafkey;            \
+				rtree_ctx->l2_cache[i - 1].leaf =              \
+				    rtree_ctx->cache[slot].leaf;               \
+			} else {                                               \
+				rtree_ctx->l2_cache[0].leafkey =               \
+				    rtree_ctx->cache[slot].leafkey;            \
+				rtree_ctx->l2_cache[0].leaf =                  \
+				    rtree_ctx->cache[slot].leaf;               \
+			}                                                      \
+			rtree_ctx->cache[slot].leafkey = leafkey;              \
+			rtree_ctx->cache[slot].leaf = leaf;                    \
+			uintptr_t subkey = rtree_subkey(                       \
+			    key, RTREE_HEIGHT - 1);                            \
+			return &leaf[subkey];                                  \
+		}                                                              \
+	} while (0)
 	/* Check the first cache entry. */
 	RTREE_CACHE_CHECK_L2(0);
 	/* Search the remaining cache elements. */
@@ -419,8 +423,8 @@ rtree_leaf_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	}
 #undef RTREE_CACHE_CHECK_L2
 
-	return rtree_leaf_elm_lookup_hard(tsdn, rtree, rtree_ctx, key,
-	    dependent, init_missing);
+	return rtree_leaf_elm_lookup_hard(
+	    tsdn, rtree, rtree_ctx, key, dependent, init_missing);
 }
 
 /*
@@ -440,8 +444,8 @@ rtree_read_independent(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 }
 
 static inline rtree_contents_t
-rtree_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
-    uintptr_t key) {
+rtree_read(
+    tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key) {
 	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx,
 	    key, /* dependent */ true, /* init_missing */ false);
 	assert(elm != NULL);
@@ -449,21 +453,22 @@ rtree_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 }
 
 static inline rtree_metadata_t
-rtree_metadata_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
-    uintptr_t key) {
+rtree_metadata_read(
+    tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key) {
 	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx,
 	    key, /* dependent */ true, /* init_missing */ false);
 	assert(elm != NULL);
 	return rtree_leaf_elm_read(tsdn, rtree, elm,
-	    /* dependent */ true).metadata;
+	    /* dependent */ true)
+	    .metadata;
 }
 
 /*
  * Returns true when the request cannot be fulfilled by fastpath.
  */
 static inline bool
-rtree_metadata_try_read_fast(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
-    uintptr_t key, rtree_metadata_t *r_rtree_metadata) {
+rtree_metadata_try_read_fast(tsdn_t *tsdn, rtree_t *rtree,
+    rtree_ctx_t *rtree_ctx, uintptr_t key, rtree_metadata_t *r_rtree_metadata) {
 	rtree_leaf_elm_t *elm;
 	/*
 	 * Should check the bool return value (lookup success or not) instead of
@@ -476,7 +481,8 @@ rtree_metadata_try_read_fast(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ct
 	}
 	assert(elm != NULL);
 	*r_rtree_metadata = rtree_leaf_elm_read(tsdn, rtree, elm,
-	    /* dependent */ true).metadata;
+	    /* dependent */ true)
+	                        .metadata;
 	return false;
 }
 
@@ -490,22 +496,27 @@ rtree_write_range_impl(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	 * are dependent w/o init_missing, assuming the range spans across at
 	 * most 2 rtree leaf nodes (each covers 1 GiB of vaddr).
 	 */
-	void *bits;
+	void    *bits;
 	unsigned additional;
 	rtree_contents_encode(contents, &bits, &additional);
 
 	rtree_leaf_elm_t *elm = NULL; /* Dead store. */
 	for (uintptr_t addr = base; addr <= end; addr += PAGE) {
-		if (addr == base ||
-		    (addr & ((ZU(1) << rtree_leaf_maskbits()) - 1)) == 0) {
-			elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx, addr,
+		if (addr == base
+		    || (addr & ((ZU(1) << rtree_leaf_maskbits()) - 1)) == 0) {
+			elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx,
+			    addr,
 			    /* dependent */ true, /* init_missing */ false);
 			assert(elm != NULL);
 		}
-		assert(elm == rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx, addr,
-		    /* dependent */ true, /* init_missing */ false));
-		assert(!clearing || rtree_leaf_elm_read(tsdn, rtree, elm,
-		    /* dependent */ true).edata != NULL);
+		assert(elm
+		    == rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx, addr,
+		        /* dependent */ true, /* init_missing */ false));
+		assert(!clearing
+		    || rtree_leaf_elm_read(tsdn, rtree, elm,
+		           /* dependent */ true)
+		            .edata
+		        != NULL);
 		rtree_leaf_elm_write_commit(tsdn, rtree, elm, bits, additional);
 		elm++;
 	}
@@ -533,13 +544,15 @@ rtree_write(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key,
 }
 
 static inline void
-rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
-    uintptr_t key) {
+rtree_clear(
+    tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key) {
 	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx,
 	    key, /* dependent */ true, /* init_missing */ false);
 	assert(elm != NULL);
 	assert(rtree_leaf_elm_read(tsdn, rtree, elm,
-	    /* dependent */ true).edata != NULL);
+	           /* dependent */ true)
+	           .edata
+	    != NULL);
 	rtree_contents_t contents;
 	contents.edata = NULL;
 	contents.metadata.szind = SC_NSIZES;
diff --git a/include/jemalloc/internal/rtree_tsd.h b/include/jemalloc/internal/rtree_tsd.h
index 59f18570..4014dde0 100644
--- a/include/jemalloc/internal/rtree_tsd.h
+++ b/include/jemalloc/internal/rtree_tsd.h
@@ -25,7 +25,8 @@
 
 /* Needed for initialization only. */
 #define RTREE_LEAFKEY_INVALID ((uintptr_t)1)
-#define RTREE_CTX_CACHE_ELM_INVALID {RTREE_LEAFKEY_INVALID, NULL}
+#define RTREE_CTX_CACHE_ELM_INVALID                                            \
+	{ RTREE_LEAFKEY_INVALID, NULL }
 
 #define RTREE_CTX_INIT_ELM_1 RTREE_CTX_CACHE_ELM_INVALID
 #define RTREE_CTX_INIT_ELM_2 RTREE_CTX_INIT_ELM_1, RTREE_CTX_INIT_ELM_1
@@ -40,23 +41,27 @@
  * Static initializer (to invalidate the cache entries) is required because the
  * free fastpath may access the rtree cache before a full tsd initialization.
  */
-#define RTREE_CTX_INITIALIZER {{RTREE_CTX_INIT_ELM_DATA(RTREE_CTX_NCACHE)}, \
-			       {RTREE_CTX_INIT_ELM_DATA(RTREE_CTX_NCACHE_L2)}}
+#define RTREE_CTX_INITIALIZER                                                  \
+	{                                                                      \
+		{RTREE_CTX_INIT_ELM_DATA(RTREE_CTX_NCACHE)}, {                 \
+			RTREE_CTX_INIT_ELM_DATA(RTREE_CTX_NCACHE_L2)           \
+		}                                                              \
+	}
 
 typedef struct rtree_leaf_elm_s rtree_leaf_elm_t;
 
 typedef struct rtree_ctx_cache_elm_s rtree_ctx_cache_elm_t;
 struct rtree_ctx_cache_elm_s {
-	uintptr_t		leafkey;
-	rtree_leaf_elm_t	*leaf;
+	uintptr_t         leafkey;
+	rtree_leaf_elm_t *leaf;
 };
 
 typedef struct rtree_ctx_s rtree_ctx_t;
 struct rtree_ctx_s {
 	/* Direct mapped cache. */
-	rtree_ctx_cache_elm_t	cache[RTREE_CTX_NCACHE];
+	rtree_ctx_cache_elm_t cache[RTREE_CTX_NCACHE];
 	/* L2 LRU cache. */
-	rtree_ctx_cache_elm_t	l2_cache[RTREE_CTX_NCACHE_L2];
+	rtree_ctx_cache_elm_t l2_cache[RTREE_CTX_NCACHE_L2];
 };
 
 void rtree_ctx_data_init(rtree_ctx_t *ctx);
diff --git a/include/jemalloc/internal/safety_check.h b/include/jemalloc/internal/safety_check.h
index 194b7744..2b4b2d0e 100644
--- a/include/jemalloc/internal/safety_check.h
+++ b/include/jemalloc/internal/safety_check.h
@@ -7,8 +7,8 @@
 
 #define SAFETY_CHECK_DOUBLE_FREE_MAX_SCAN_DEFAULT 32
 
-void safety_check_fail_sized_dealloc(bool current_dealloc, const void *ptr,
-    size_t true_size, size_t input_size);
+void safety_check_fail_sized_dealloc(
+    bool current_dealloc, const void *ptr, size_t true_size, size_t input_size);
 void safety_check_fail(const char *format, ...);
 
 typedef void (*safety_check_abort_hook_t)(const char *message);
@@ -16,7 +16,7 @@ typedef void (*safety_check_abort_hook_t)(const char *message);
 /* Can set to NULL for a default. */
 void safety_check_set_abort(safety_check_abort_hook_t abort_fn);
 
-#define REDZONE_SIZE ((size_t) 32)
+#define REDZONE_SIZE ((size_t)32)
 #define REDZONE_FILL_VALUE 0xBC
 
 /*
@@ -27,9 +27,10 @@ void safety_check_set_abort(safety_check_abort_hook_t abort_fn);
  */
 JEMALLOC_ALWAYS_INLINE const unsigned char *
 compute_redzone_end(const void *_ptr, size_t usize, size_t bumped_usize) {
-	const unsigned char *ptr = (const unsigned char *) _ptr;
-	const unsigned char *redzone_end = usize + REDZONE_SIZE < bumped_usize ?
-	    &ptr[usize + REDZONE_SIZE] : &ptr[bumped_usize];
+	const unsigned char *ptr = (const unsigned char *)_ptr;
+	const unsigned char *redzone_end = usize + REDZONE_SIZE < bumped_usize
+	    ? &ptr[usize + REDZONE_SIZE]
+	    : &ptr[bumped_usize];
 	const unsigned char *page_end = (const unsigned char *)
 	    ALIGNMENT_ADDR2CEILING(&ptr[usize], os_page);
 	return redzone_end < page_end ? redzone_end : page_end;
@@ -38,8 +39,8 @@ compute_redzone_end(const void *_ptr, size_t usize, size_t bumped_usize) {
 JEMALLOC_ALWAYS_INLINE void
 safety_check_set_redzone(void *ptr, size_t usize, size_t bumped_usize) {
 	assert(usize <= bumped_usize);
-	const unsigned char *redzone_end =
-		compute_redzone_end(ptr, usize, bumped_usize);
+	const unsigned char *redzone_end = compute_redzone_end(
+	    ptr, usize, bumped_usize);
 	for (unsigned char *curr = &((unsigned char *)ptr)[usize];
 	     curr < redzone_end; curr++) {
 		*curr = REDZONE_FILL_VALUE;
@@ -47,11 +48,11 @@ safety_check_set_redzone(void *ptr, size_t usize, size_t bumped_usize) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-safety_check_verify_redzone(const void *ptr, size_t usize, size_t bumped_usize)
-{
-	const unsigned char *redzone_end =
-		compute_redzone_end(ptr, usize, bumped_usize);
-	for (const unsigned char *curr= &((const unsigned char *)ptr)[usize];
+safety_check_verify_redzone(
+    const void *ptr, size_t usize, size_t bumped_usize) {
+	const unsigned char *redzone_end = compute_redzone_end(
+	    ptr, usize, bumped_usize);
+	for (const unsigned char *curr = &((const unsigned char *)ptr)[usize];
 	     curr < redzone_end; curr++) {
 		if (unlikely(*curr != REDZONE_FILL_VALUE)) {
 			safety_check_fail("Use after free error\n");
diff --git a/include/jemalloc/internal/san.h b/include/jemalloc/internal/san.h
index 669f99dd..5dcae376 100644
--- a/include/jemalloc/internal/san.h
+++ b/include/jemalloc/internal/san.h
@@ -32,22 +32,22 @@ void san_unguard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
  * Unguard the extent, but don't modify emap boundaries. Must be called on an
  * extent that has been erased from emap and shouldn't be placed back.
  */
-void san_unguard_pages_pre_destroy(tsdn_t *tsdn, ehooks_t *ehooks,
-    edata_t *edata, emap_t *emap);
+void san_unguard_pages_pre_destroy(
+    tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap);
 void san_check_stashed_ptrs(void **ptrs, size_t nstashed, size_t usize);
 
 void tsd_san_init(tsd_t *tsd);
 void san_init(ssize_t lg_san_uaf_align);
 
 static inline void
-san_guard_pages_two_sided(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
-    emap_t *emap, bool remap) {
+san_guard_pages_two_sided(
+    tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap, bool remap) {
 	san_guard_pages(tsdn, ehooks, edata, emap, true, true, remap);
 }
 
 static inline void
-san_unguard_pages_two_sided(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
-    emap_t *emap) {
+san_unguard_pages_two_sided(
+    tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap) {
 	san_unguard_pages(tsdn, ehooks, edata, emap, true, true);
 }
 
@@ -83,14 +83,14 @@ san_guard_enabled(void) {
 }
 
 static inline bool
-san_large_extent_decide_guard(tsdn_t *tsdn, ehooks_t *ehooks, size_t size,
-    size_t alignment) {
-	if (opt_san_guard_large == 0 || ehooks_guard_will_fail(ehooks) ||
-	    tsdn_null(tsdn)) {
+san_large_extent_decide_guard(
+    tsdn_t *tsdn, ehooks_t *ehooks, size_t size, size_t alignment) {
+	if (opt_san_guard_large == 0 || ehooks_guard_will_fail(ehooks)
+	    || tsdn_null(tsdn)) {
 		return false;
 	}
 
-	tsd_t *tsd = tsdn_tsd(tsdn);
+	tsd_t   *tsd = tsdn_tsd(tsdn);
 	uint64_t n = tsd_san_extents_until_guard_large_get(tsd);
 	assert(n >= 1);
 	if (n > 1) {
@@ -101,10 +101,10 @@ san_large_extent_decide_guard(tsdn_t *tsdn, ehooks_t *ehooks, size_t size,
 		*tsd_san_extents_until_guard_largep_get(tsd) = n - 1;
 	}
 
-	if (n == 1 && (alignment <= PAGE) &&
-	    (san_two_side_guarded_sz(size) <= SC_LARGE_MAXCLASS)) {
-		*tsd_san_extents_until_guard_largep_get(tsd) =
-		    opt_san_guard_large;
+	if (n == 1 && (alignment <= PAGE)
+	    && (san_two_side_guarded_sz(size) <= SC_LARGE_MAXCLASS)) {
+		*tsd_san_extents_until_guard_largep_get(
+		    tsd) = opt_san_guard_large;
 		return true;
 	} else {
 		assert(tsd_san_extents_until_guard_large_get(tsd) >= 1);
@@ -114,17 +114,17 @@ san_large_extent_decide_guard(tsdn_t *tsdn, ehooks_t *ehooks, size_t size,
 
 static inline bool
 san_slab_extent_decide_guard(tsdn_t *tsdn, ehooks_t *ehooks) {
-	if (opt_san_guard_small == 0 || ehooks_guard_will_fail(ehooks) ||
-	    tsdn_null(tsdn)) {
+	if (opt_san_guard_small == 0 || ehooks_guard_will_fail(ehooks)
+	    || tsdn_null(tsdn)) {
 		return false;
 	}
 
-	tsd_t *tsd = tsdn_tsd(tsdn);
+	tsd_t   *tsd = tsdn_tsd(tsdn);
 	uint64_t n = tsd_san_extents_until_guard_small_get(tsd);
 	assert(n >= 1);
 	if (n == 1) {
-		*tsd_san_extents_until_guard_smallp_get(tsd) =
-		    opt_san_guard_small;
+		*tsd_san_extents_until_guard_smallp_get(
+		    tsd) = opt_san_guard_small;
 		return true;
 	} else {
 		*tsd_san_extents_until_guard_smallp_get(tsd) = n - 1;
@@ -134,8 +134,8 @@ san_slab_extent_decide_guard(tsdn_t *tsdn, ehooks_t *ehooks) {
 }
 
 static inline void
-san_junk_ptr_locations(void *ptr, size_t usize, void **first, void **mid,
-    void **last) {
+san_junk_ptr_locations(
+    void *ptr, size_t usize, void **first, void **mid, void **last) {
 	size_t ptr_sz = sizeof(void *);
 
 	*first = ptr;
@@ -184,8 +184,8 @@ static inline bool
 san_uaf_detection_enabled(void) {
 	bool ret = config_uaf_detection && (opt_lg_san_uaf_align != -1);
 	if (config_uaf_detection && ret) {
-		assert(san_cache_bin_nonfast_mask == ((uintptr_t)1 <<
-		    opt_lg_san_uaf_align) - 1);
+		assert(san_cache_bin_nonfast_mask
+		    == ((uintptr_t)1 << opt_lg_san_uaf_align) - 1);
 	}
 
 	return ret;
diff --git a/include/jemalloc/internal/san_bump.h b/include/jemalloc/internal/san_bump.h
index d6e9cfc5..9e42b69b 100644
--- a/include/jemalloc/internal/san_bump.h
+++ b/include/jemalloc/internal/san_bump.h
@@ -12,7 +12,7 @@
 extern bool opt_retain;
 
 typedef struct ehooks_s ehooks_t;
-typedef struct pac_s pac_t;
+typedef struct pac_s    pac_t;
 
 typedef struct san_bump_alloc_s san_bump_alloc_t;
 struct san_bump_alloc_s {
@@ -36,7 +36,7 @@ san_bump_enabled(void) {
 }
 
 static inline bool
-san_bump_alloc_init(san_bump_alloc_t* sba) {
+san_bump_alloc_init(san_bump_alloc_t *sba) {
 	bool err = malloc_mutex_init(&sba->mtx, "sanitizer_bump_allocator",
 	    WITNESS_RANK_SAN_BUMP_ALLOC, malloc_mutex_rank_exclusive);
 	if (err) {
@@ -47,8 +47,7 @@ san_bump_alloc_init(san_bump_alloc_t* sba) {
 	return false;
 }
 
-edata_t *
-san_bump_alloc(tsdn_t *tsdn, san_bump_alloc_t* sba, pac_t *pac, ehooks_t *ehooks,
-    size_t size, bool zero);
+edata_t *san_bump_alloc(tsdn_t *tsdn, san_bump_alloc_t *sba, pac_t *pac,
+    ehooks_t *ehooks, size_t size, bool zero);
 
 #endif /* JEMALLOC_INTERNAL_SAN_BUMP_H */
diff --git a/include/jemalloc/internal/sc.h b/include/jemalloc/internal/sc.h
index 97956e7a..17a8278a 100644
--- a/include/jemalloc/internal/sc.h
+++ b/include/jemalloc/internal/sc.h
@@ -174,7 +174,7 @@
 
 #if SC_LG_TINY_MIN == 0
 /* The div module doesn't support division by 1, which this would require. */
-#error "Unsupported LG_TINY_MIN"
+#	error "Unsupported LG_TINY_MIN"
 #endif
 
 /*
@@ -194,8 +194,8 @@
  * We could probably save some space in arenas by capping this at LG_VADDR size.
  */
 #define SC_LG_BASE_MAX (SC_PTR_BITS - 2)
-#define SC_NREGULAR (SC_NGROUP * 					\
-    (SC_LG_BASE_MAX - SC_LG_FIRST_REGULAR_BASE + 1) - 1)
+#define SC_NREGULAR                                                            \
+	(SC_NGROUP * (SC_LG_BASE_MAX - SC_LG_FIRST_REGULAR_BASE + 1) - 1)
 #define SC_NSIZES (SC_NTINY + SC_NPSEUDO + SC_NREGULAR)
 
 /*
@@ -222,29 +222,29 @@
  *
  * This gives us the quantity we seek.
  */
-#define SC_NPSIZES (							\
-    SC_NGROUP								\
-    + (SC_LG_BASE_MAX - (LG_PAGE + SC_LG_NGROUP)) * SC_NGROUP		\
-    + SC_NGROUP - 1)
+#define SC_NPSIZES                                                             \
+	(SC_NGROUP + (SC_LG_BASE_MAX - (LG_PAGE + SC_LG_NGROUP)) * SC_NGROUP   \
+	    + SC_NGROUP - 1)
 
 /*
  * We declare a size class is binnable if size < page size * group. Or, in other
  * words, lg(size) < lg(page size) + lg(group size).
  */
-#define SC_NBINS (							\
-    /* Sub-regular size classes. */					\
-    SC_NTINY + SC_NPSEUDO						\
-    /* Groups with lg_regular_min_base <= lg_base <= lg_base_max */	\
-    + SC_NGROUP * (LG_PAGE + SC_LG_NGROUP - SC_LG_FIRST_REGULAR_BASE)	\
-    /* Last SC of the last group hits the bound exactly; exclude it. */	\
-    - 1)
+#define SC_NBINS                                                                                                    \
+	(/* Sub-regular size classes. */                                                                            \
+	    SC_NTINY                                                                                                \
+	    + SC_NPSEUDO /* Groups with lg_regular_min_base <= lg_base <= lg_base_max */                            \
+	    + SC_NGROUP                                                                                             \
+	        * (LG_PAGE + SC_LG_NGROUP                                                                           \
+	            - SC_LG_FIRST_REGULAR_BASE) /* Last SC of the last group hits the bound exactly; exclude it. */ \
+	    - 1)
 
 /*
  * The size2index_tab lookup table uses uint8_t to encode each bin index, so we
  * cannot support more than 256 small size classes.
  */
 #if (SC_NBINS > 256)
-#  error "Too many small size classes"
+#	error "Too many small size classes"
 #endif
 
 /* The largest size class in the lookup table, and its binary log. */
@@ -256,12 +256,12 @@
 #define SC_SMALL_MAX_DELTA (1 << (LG_PAGE - 1))
 
 /* The largest size class allocated out of a slab. */
-#define SC_SMALL_MAXCLASS (SC_SMALL_MAX_BASE				\
-    + (SC_NGROUP - 1) * SC_SMALL_MAX_DELTA)
+#define SC_SMALL_MAXCLASS                                                      \
+	(SC_SMALL_MAX_BASE + (SC_NGROUP - 1) * SC_SMALL_MAX_DELTA)
 
 /* The fastpath assumes all lookup-able sizes are small. */
 #if (SC_SMALL_MAXCLASS < SC_LOOKUP_MAXCLASS)
-#  error "Lookup table sizes must be small"
+#	error "Lookup table sizes must be small"
 #endif
 
 /* The smallest size class not allocated out of a slab. */
@@ -277,13 +277,13 @@
 
 /* Maximum number of regions in one slab. */
 #ifndef CONFIG_LG_SLAB_MAXREGS
-#  define SC_LG_SLAB_MAXREGS (LG_PAGE - SC_LG_TINY_MIN)
+#	define SC_LG_SLAB_MAXREGS (LG_PAGE - SC_LG_TINY_MIN)
 #else
-#  if CONFIG_LG_SLAB_MAXREGS < (LG_PAGE - SC_LG_TINY_MIN)
-#    error "Unsupported SC_LG_SLAB_MAXREGS"
-#  else
-#    define SC_LG_SLAB_MAXREGS CONFIG_LG_SLAB_MAXREGS
-#  endif
+#	if CONFIG_LG_SLAB_MAXREGS < (LG_PAGE - SC_LG_TINY_MIN)
+#		error "Unsupported SC_LG_SLAB_MAXREGS"
+#	else
+#		define SC_LG_SLAB_MAXREGS CONFIG_LG_SLAB_MAXREGS
+#	endif
 #endif
 
 /*
@@ -364,13 +364,13 @@ struct sc_data_s {
 };
 
 size_t reg_size_compute(int lg_base, int lg_delta, int ndelta);
-void sc_data_init(sc_data_t *data);
+void   sc_data_init(sc_data_t *data);
 /*
  * Updates slab sizes in [begin, end] to be pgs pages in length, if possible.
  * Otherwise, does its best to accommodate the request.
  */
-void sc_data_update_slab_size(sc_data_t *data, size_t begin, size_t end,
-    int pgs);
+void sc_data_update_slab_size(
+    sc_data_t *data, size_t begin, size_t end, int pgs);
 void sc_boot(sc_data_t *data);
 
 #endif /* JEMALLOC_INTERNAL_SC_H */
diff --git a/include/jemalloc/internal/sec.h b/include/jemalloc/internal/sec.h
index 8ef1e9fb..50daf066 100644
--- a/include/jemalloc/internal/sec.h
+++ b/include/jemalloc/internal/sec.h
@@ -59,7 +59,7 @@ struct sec_bin_s {
 	 * stats; rather, it allows us to quickly determine the change in the
 	 * centralized counter when flushing.
 	 */
-	size_t bytes_cur;
+	size_t              bytes_cur;
 	edata_list_active_t freelist;
 };
 
@@ -80,7 +80,7 @@ struct sec_shard_s {
 	 * that we won't go down these pathways very often after custom extent
 	 * hooks are installed.
 	 */
-	bool enabled;
+	bool       enabled;
 	sec_bin_t *bins;
 	/* Number of bytes in all bins in the shard. */
 	size_t bytes_cur;
@@ -90,12 +90,12 @@ struct sec_shard_s {
 
 typedef struct sec_s sec_t;
 struct sec_s {
-	pai_t pai;
+	pai_t  pai;
 	pai_t *fallback;
 
-	sec_opts_t opts;
+	sec_opts_t   opts;
 	sec_shard_t *shards;
-	pszind_t npsizes;
+	pszind_t     npsizes;
 };
 
 bool sec_init(tsdn_t *tsdn, sec_t *sec, base_t *base, pai_t *fallback,
@@ -110,8 +110,8 @@ void sec_disable(tsdn_t *tsdn, sec_t *sec);
  * split), which simplifies the stats management.
  */
 void sec_stats_merge(tsdn_t *tsdn, sec_t *sec, sec_stats_t *stats);
-void sec_mutex_stats_read(tsdn_t *tsdn, sec_t *sec,
-    mutex_prof_data_t *mutex_prof_data);
+void sec_mutex_stats_read(
+    tsdn_t *tsdn, sec_t *sec, mutex_prof_data_t *mutex_prof_data);
 
 /*
  * We use the arena lock ordering; these are acquired in phase 2 of forking, but
diff --git a/include/jemalloc/internal/sec_opts.h b/include/jemalloc/internal/sec_opts.h
index 19ed1492..e0699d7a 100644
--- a/include/jemalloc/internal/sec_opts.h
+++ b/include/jemalloc/internal/sec_opts.h
@@ -44,18 +44,14 @@ struct sec_opts_s {
 	size_t batch_fill_extra;
 };
 
-#define SEC_OPTS_DEFAULT {						\
-	/* nshards */							\
-	4,								\
-	/* max_alloc */							\
-	(32 * 1024) < PAGE ? PAGE : (32 * 1024),			\
-	/* max_bytes */							\
-	256 * 1024,							\
-	/* bytes_after_flush */						\
-	128 * 1024,							\
-	/* batch_fill_extra */						\
-	0								\
-}
-
+#define SEC_OPTS_DEFAULT                                                       \
+	{                                                                      \
+		/* nshards */                                                  \
+		4,                                           /* max_alloc */   \
+		    (32 * 1024) < PAGE ? PAGE : (32 * 1024), /* max_bytes */   \
+		    256 * 1024, /* bytes_after_flush */                        \
+		    128 * 1024, /* batch_fill_extra */                         \
+		    0                                                          \
+	}
 
 #endif /* JEMALLOC_INTERNAL_SEC_OPTS_H */
diff --git a/include/jemalloc/internal/smoothstep.h b/include/jemalloc/internal/smoothstep.h
index 2e14430f..135c4905 100644
--- a/include/jemalloc/internal/smoothstep.h
+++ b/include/jemalloc/internal/smoothstep.h
@@ -23,210 +23,210 @@
  *   smootheststep(x) = -20x  + 70x  - 84x  + 35x
  */
 
-#define SMOOTHSTEP_VARIANT	"smoother"
-#define SMOOTHSTEP_NSTEPS	200
-#define SMOOTHSTEP_BFP		24
-#define SMOOTHSTEP \
- /* STEP(step, h,                            x,     y) */ \
-    STEP(   1, UINT64_C(0x0000000000000014), 0.005, 0.000001240643750) \
-    STEP(   2, UINT64_C(0x00000000000000a5), 0.010, 0.000009850600000) \
-    STEP(   3, UINT64_C(0x0000000000000229), 0.015, 0.000032995181250) \
-    STEP(   4, UINT64_C(0x0000000000000516), 0.020, 0.000077619200000) \
-    STEP(   5, UINT64_C(0x00000000000009dc), 0.025, 0.000150449218750) \
-    STEP(   6, UINT64_C(0x00000000000010e8), 0.030, 0.000257995800000) \
-    STEP(   7, UINT64_C(0x0000000000001aa4), 0.035, 0.000406555756250) \
-    STEP(   8, UINT64_C(0x0000000000002777), 0.040, 0.000602214400000) \
-    STEP(   9, UINT64_C(0x00000000000037c2), 0.045, 0.000850847793750) \
-    STEP(  10, UINT64_C(0x0000000000004be6), 0.050, 0.001158125000000) \
-    STEP(  11, UINT64_C(0x000000000000643c), 0.055, 0.001529510331250) \
-    STEP(  12, UINT64_C(0x000000000000811f), 0.060, 0.001970265600000) \
-    STEP(  13, UINT64_C(0x000000000000a2e2), 0.065, 0.002485452368750) \
-    STEP(  14, UINT64_C(0x000000000000c9d8), 0.070, 0.003079934200000) \
-    STEP(  15, UINT64_C(0x000000000000f64f), 0.075, 0.003758378906250) \
-    STEP(  16, UINT64_C(0x0000000000012891), 0.080, 0.004525260800000) \
-    STEP(  17, UINT64_C(0x00000000000160e7), 0.085, 0.005384862943750) \
-    STEP(  18, UINT64_C(0x0000000000019f95), 0.090, 0.006341279400000) \
-    STEP(  19, UINT64_C(0x000000000001e4dc), 0.095, 0.007398417481250) \
-    STEP(  20, UINT64_C(0x00000000000230fc), 0.100, 0.008560000000000) \
-    STEP(  21, UINT64_C(0x0000000000028430), 0.105, 0.009829567518750) \
-    STEP(  22, UINT64_C(0x000000000002deb0), 0.110, 0.011210480600000) \
-    STEP(  23, UINT64_C(0x00000000000340b1), 0.115, 0.012705922056250) \
-    STEP(  24, UINT64_C(0x000000000003aa67), 0.120, 0.014318899200000) \
-    STEP(  25, UINT64_C(0x0000000000041c00), 0.125, 0.016052246093750) \
-    STEP(  26, UINT64_C(0x00000000000495a8), 0.130, 0.017908625800000) \
-    STEP(  27, UINT64_C(0x000000000005178b), 0.135, 0.019890532631250) \
-    STEP(  28, UINT64_C(0x000000000005a1cf), 0.140, 0.022000294400000) \
-    STEP(  29, UINT64_C(0x0000000000063498), 0.145, 0.024240074668750) \
-    STEP(  30, UINT64_C(0x000000000006d009), 0.150, 0.026611875000000) \
-    STEP(  31, UINT64_C(0x000000000007743f), 0.155, 0.029117537206250) \
-    STEP(  32, UINT64_C(0x0000000000082157), 0.160, 0.031758745600000) \
-    STEP(  33, UINT64_C(0x000000000008d76b), 0.165, 0.034537029243750) \
-    STEP(  34, UINT64_C(0x0000000000099691), 0.170, 0.037453764200000) \
-    STEP(  35, UINT64_C(0x00000000000a5edf), 0.175, 0.040510175781250) \
-    STEP(  36, UINT64_C(0x00000000000b3067), 0.180, 0.043707340800000) \
-    STEP(  37, UINT64_C(0x00000000000c0b38), 0.185, 0.047046189818750) \
-    STEP(  38, UINT64_C(0x00000000000cef5e), 0.190, 0.050527509400000) \
-    STEP(  39, UINT64_C(0x00000000000ddce6), 0.195, 0.054151944356250) \
-    STEP(  40, UINT64_C(0x00000000000ed3d8), 0.200, 0.057920000000000) \
-    STEP(  41, UINT64_C(0x00000000000fd439), 0.205, 0.061832044393750) \
-    STEP(  42, UINT64_C(0x000000000010de0e), 0.210, 0.065888310600000) \
-    STEP(  43, UINT64_C(0x000000000011f158), 0.215, 0.070088898931250) \
-    STEP(  44, UINT64_C(0x0000000000130e17), 0.220, 0.074433779200000) \
-    STEP(  45, UINT64_C(0x0000000000143448), 0.225, 0.078922792968750) \
-    STEP(  46, UINT64_C(0x00000000001563e7), 0.230, 0.083555655800000) \
-    STEP(  47, UINT64_C(0x0000000000169cec), 0.235, 0.088331959506250) \
-    STEP(  48, UINT64_C(0x000000000017df4f), 0.240, 0.093251174400000) \
-    STEP(  49, UINT64_C(0x0000000000192b04), 0.245, 0.098312651543750) \
-    STEP(  50, UINT64_C(0x00000000001a8000), 0.250, 0.103515625000000) \
-    STEP(  51, UINT64_C(0x00000000001bde32), 0.255, 0.108859214081250) \
-    STEP(  52, UINT64_C(0x00000000001d458b), 0.260, 0.114342425600000) \
-    STEP(  53, UINT64_C(0x00000000001eb5f8), 0.265, 0.119964156118750) \
-    STEP(  54, UINT64_C(0x0000000000202f65), 0.270, 0.125723194200000) \
-    STEP(  55, UINT64_C(0x000000000021b1bb), 0.275, 0.131618222656250) \
-    STEP(  56, UINT64_C(0x0000000000233ce3), 0.280, 0.137647820800000) \
-    STEP(  57, UINT64_C(0x000000000024d0c3), 0.285, 0.143810466693750) \
-    STEP(  58, UINT64_C(0x0000000000266d40), 0.290, 0.150104539400000) \
-    STEP(  59, UINT64_C(0x000000000028123d), 0.295, 0.156528321231250) \
-    STEP(  60, UINT64_C(0x000000000029bf9c), 0.300, 0.163080000000000) \
-    STEP(  61, UINT64_C(0x00000000002b753d), 0.305, 0.169757671268750) \
-    STEP(  62, UINT64_C(0x00000000002d32fe), 0.310, 0.176559340600000) \
-    STEP(  63, UINT64_C(0x00000000002ef8bc), 0.315, 0.183482925806250) \
-    STEP(  64, UINT64_C(0x000000000030c654), 0.320, 0.190526259200000) \
-    STEP(  65, UINT64_C(0x0000000000329b9f), 0.325, 0.197687089843750) \
-    STEP(  66, UINT64_C(0x0000000000347875), 0.330, 0.204963085800000) \
-    STEP(  67, UINT64_C(0x0000000000365cb0), 0.335, 0.212351836381250) \
-    STEP(  68, UINT64_C(0x0000000000384825), 0.340, 0.219850854400000) \
-    STEP(  69, UINT64_C(0x00000000003a3aa8), 0.345, 0.227457578418750) \
-    STEP(  70, UINT64_C(0x00000000003c340f), 0.350, 0.235169375000000) \
-    STEP(  71, UINT64_C(0x00000000003e342b), 0.355, 0.242983540956250) \
-    STEP(  72, UINT64_C(0x0000000000403ace), 0.360, 0.250897305600000) \
-    STEP(  73, UINT64_C(0x00000000004247c8), 0.365, 0.258907832993750) \
-    STEP(  74, UINT64_C(0x0000000000445ae9), 0.370, 0.267012224200000) \
-    STEP(  75, UINT64_C(0x0000000000467400), 0.375, 0.275207519531250) \
-    STEP(  76, UINT64_C(0x00000000004892d8), 0.380, 0.283490700800000) \
-    STEP(  77, UINT64_C(0x00000000004ab740), 0.385, 0.291858693568750) \
-    STEP(  78, UINT64_C(0x00000000004ce102), 0.390, 0.300308369400000) \
-    STEP(  79, UINT64_C(0x00000000004f0fe9), 0.395, 0.308836548106250) \
-    STEP(  80, UINT64_C(0x00000000005143bf), 0.400, 0.317440000000000) \
-    STEP(  81, UINT64_C(0x0000000000537c4d), 0.405, 0.326115448143750) \
-    STEP(  82, UINT64_C(0x000000000055b95b), 0.410, 0.334859570600000) \
-    STEP(  83, UINT64_C(0x000000000057fab1), 0.415, 0.343669002681250) \
-    STEP(  84, UINT64_C(0x00000000005a4015), 0.420, 0.352540339200000) \
-    STEP(  85, UINT64_C(0x00000000005c894e), 0.425, 0.361470136718750) \
-    STEP(  86, UINT64_C(0x00000000005ed622), 0.430, 0.370454915800000) \
-    STEP(  87, UINT64_C(0x0000000000612655), 0.435, 0.379491163256250) \
-    STEP(  88, UINT64_C(0x00000000006379ac), 0.440, 0.388575334400000) \
-    STEP(  89, UINT64_C(0x000000000065cfeb), 0.445, 0.397703855293750) \
-    STEP(  90, UINT64_C(0x00000000006828d6), 0.450, 0.406873125000000) \
-    STEP(  91, UINT64_C(0x00000000006a842f), 0.455, 0.416079517831250) \
-    STEP(  92, UINT64_C(0x00000000006ce1bb), 0.460, 0.425319385600000) \
-    STEP(  93, UINT64_C(0x00000000006f413a), 0.465, 0.434589059868750) \
-    STEP(  94, UINT64_C(0x000000000071a270), 0.470, 0.443884854200000) \
-    STEP(  95, UINT64_C(0x000000000074051d), 0.475, 0.453203066406250) \
-    STEP(  96, UINT64_C(0x0000000000766905), 0.480, 0.462539980800000) \
-    STEP(  97, UINT64_C(0x000000000078cde7), 0.485, 0.471891870443750) \
-    STEP(  98, UINT64_C(0x00000000007b3387), 0.490, 0.481254999400000) \
-    STEP(  99, UINT64_C(0x00000000007d99a4), 0.495, 0.490625624981250) \
-    STEP( 100, UINT64_C(0x0000000000800000), 0.500, 0.500000000000000) \
-    STEP( 101, UINT64_C(0x000000000082665b), 0.505, 0.509374375018750) \
-    STEP( 102, UINT64_C(0x000000000084cc78), 0.510, 0.518745000600000) \
-    STEP( 103, UINT64_C(0x0000000000873218), 0.515, 0.528108129556250) \
-    STEP( 104, UINT64_C(0x00000000008996fa), 0.520, 0.537460019200000) \
-    STEP( 105, UINT64_C(0x00000000008bfae2), 0.525, 0.546796933593750) \
-    STEP( 106, UINT64_C(0x00000000008e5d8f), 0.530, 0.556115145800000) \
-    STEP( 107, UINT64_C(0x000000000090bec5), 0.535, 0.565410940131250) \
-    STEP( 108, UINT64_C(0x0000000000931e44), 0.540, 0.574680614400000) \
-    STEP( 109, UINT64_C(0x0000000000957bd0), 0.545, 0.583920482168750) \
-    STEP( 110, UINT64_C(0x000000000097d729), 0.550, 0.593126875000000) \
-    STEP( 111, UINT64_C(0x00000000009a3014), 0.555, 0.602296144706250) \
-    STEP( 112, UINT64_C(0x00000000009c8653), 0.560, 0.611424665600000) \
-    STEP( 113, UINT64_C(0x00000000009ed9aa), 0.565, 0.620508836743750) \
-    STEP( 114, UINT64_C(0x0000000000a129dd), 0.570, 0.629545084200000) \
-    STEP( 115, UINT64_C(0x0000000000a376b1), 0.575, 0.638529863281250) \
-    STEP( 116, UINT64_C(0x0000000000a5bfea), 0.580, 0.647459660800000) \
-    STEP( 117, UINT64_C(0x0000000000a8054e), 0.585, 0.656330997318750) \
-    STEP( 118, UINT64_C(0x0000000000aa46a4), 0.590, 0.665140429400000) \
-    STEP( 119, UINT64_C(0x0000000000ac83b2), 0.595, 0.673884551856250) \
-    STEP( 120, UINT64_C(0x0000000000aebc40), 0.600, 0.682560000000000) \
-    STEP( 121, UINT64_C(0x0000000000b0f016), 0.605, 0.691163451893750) \
-    STEP( 122, UINT64_C(0x0000000000b31efd), 0.610, 0.699691630600000) \
-    STEP( 123, UINT64_C(0x0000000000b548bf), 0.615, 0.708141306431250) \
-    STEP( 124, UINT64_C(0x0000000000b76d27), 0.620, 0.716509299200000) \
-    STEP( 125, UINT64_C(0x0000000000b98c00), 0.625, 0.724792480468750) \
-    STEP( 126, UINT64_C(0x0000000000bba516), 0.630, 0.732987775800000) \
-    STEP( 127, UINT64_C(0x0000000000bdb837), 0.635, 0.741092167006250) \
-    STEP( 128, UINT64_C(0x0000000000bfc531), 0.640, 0.749102694400000) \
-    STEP( 129, UINT64_C(0x0000000000c1cbd4), 0.645, 0.757016459043750) \
-    STEP( 130, UINT64_C(0x0000000000c3cbf0), 0.650, 0.764830625000000) \
-    STEP( 131, UINT64_C(0x0000000000c5c557), 0.655, 0.772542421581250) \
-    STEP( 132, UINT64_C(0x0000000000c7b7da), 0.660, 0.780149145600000) \
-    STEP( 133, UINT64_C(0x0000000000c9a34f), 0.665, 0.787648163618750) \
-    STEP( 134, UINT64_C(0x0000000000cb878a), 0.670, 0.795036914200000) \
-    STEP( 135, UINT64_C(0x0000000000cd6460), 0.675, 0.802312910156250) \
-    STEP( 136, UINT64_C(0x0000000000cf39ab), 0.680, 0.809473740800000) \
-    STEP( 137, UINT64_C(0x0000000000d10743), 0.685, 0.816517074193750) \
-    STEP( 138, UINT64_C(0x0000000000d2cd01), 0.690, 0.823440659400000) \
-    STEP( 139, UINT64_C(0x0000000000d48ac2), 0.695, 0.830242328731250) \
-    STEP( 140, UINT64_C(0x0000000000d64063), 0.700, 0.836920000000000) \
-    STEP( 141, UINT64_C(0x0000000000d7edc2), 0.705, 0.843471678768750) \
-    STEP( 142, UINT64_C(0x0000000000d992bf), 0.710, 0.849895460600000) \
-    STEP( 143, UINT64_C(0x0000000000db2f3c), 0.715, 0.856189533306250) \
-    STEP( 144, UINT64_C(0x0000000000dcc31c), 0.720, 0.862352179200000) \
-    STEP( 145, UINT64_C(0x0000000000de4e44), 0.725, 0.868381777343750) \
-    STEP( 146, UINT64_C(0x0000000000dfd09a), 0.730, 0.874276805800000) \
-    STEP( 147, UINT64_C(0x0000000000e14a07), 0.735, 0.880035843881250) \
-    STEP( 148, UINT64_C(0x0000000000e2ba74), 0.740, 0.885657574400000) \
-    STEP( 149, UINT64_C(0x0000000000e421cd), 0.745, 0.891140785918750) \
-    STEP( 150, UINT64_C(0x0000000000e58000), 0.750, 0.896484375000000) \
-    STEP( 151, UINT64_C(0x0000000000e6d4fb), 0.755, 0.901687348456250) \
-    STEP( 152, UINT64_C(0x0000000000e820b0), 0.760, 0.906748825600000) \
-    STEP( 153, UINT64_C(0x0000000000e96313), 0.765, 0.911668040493750) \
-    STEP( 154, UINT64_C(0x0000000000ea9c18), 0.770, 0.916444344200000) \
-    STEP( 155, UINT64_C(0x0000000000ebcbb7), 0.775, 0.921077207031250) \
-    STEP( 156, UINT64_C(0x0000000000ecf1e8), 0.780, 0.925566220800000) \
-    STEP( 157, UINT64_C(0x0000000000ee0ea7), 0.785, 0.929911101068750) \
-    STEP( 158, UINT64_C(0x0000000000ef21f1), 0.790, 0.934111689400000) \
-    STEP( 159, UINT64_C(0x0000000000f02bc6), 0.795, 0.938167955606250) \
-    STEP( 160, UINT64_C(0x0000000000f12c27), 0.800, 0.942080000000000) \
-    STEP( 161, UINT64_C(0x0000000000f22319), 0.805, 0.945848055643750) \
-    STEP( 162, UINT64_C(0x0000000000f310a1), 0.810, 0.949472490600000) \
-    STEP( 163, UINT64_C(0x0000000000f3f4c7), 0.815, 0.952953810181250) \
-    STEP( 164, UINT64_C(0x0000000000f4cf98), 0.820, 0.956292659200000) \
-    STEP( 165, UINT64_C(0x0000000000f5a120), 0.825, 0.959489824218750) \
-    STEP( 166, UINT64_C(0x0000000000f6696e), 0.830, 0.962546235800000) \
-    STEP( 167, UINT64_C(0x0000000000f72894), 0.835, 0.965462970756250) \
-    STEP( 168, UINT64_C(0x0000000000f7dea8), 0.840, 0.968241254400000) \
-    STEP( 169, UINT64_C(0x0000000000f88bc0), 0.845, 0.970882462793750) \
-    STEP( 170, UINT64_C(0x0000000000f92ff6), 0.850, 0.973388125000000) \
-    STEP( 171, UINT64_C(0x0000000000f9cb67), 0.855, 0.975759925331250) \
-    STEP( 172, UINT64_C(0x0000000000fa5e30), 0.860, 0.977999705600000) \
-    STEP( 173, UINT64_C(0x0000000000fae874), 0.865, 0.980109467368750) \
-    STEP( 174, UINT64_C(0x0000000000fb6a57), 0.870, 0.982091374200000) \
-    STEP( 175, UINT64_C(0x0000000000fbe400), 0.875, 0.983947753906250) \
-    STEP( 176, UINT64_C(0x0000000000fc5598), 0.880, 0.985681100800000) \
-    STEP( 177, UINT64_C(0x0000000000fcbf4e), 0.885, 0.987294077943750) \
-    STEP( 178, UINT64_C(0x0000000000fd214f), 0.890, 0.988789519400000) \
-    STEP( 179, UINT64_C(0x0000000000fd7bcf), 0.895, 0.990170432481250) \
-    STEP( 180, UINT64_C(0x0000000000fdcf03), 0.900, 0.991440000000000) \
-    STEP( 181, UINT64_C(0x0000000000fe1b23), 0.905, 0.992601582518750) \
-    STEP( 182, UINT64_C(0x0000000000fe606a), 0.910, 0.993658720600000) \
-    STEP( 183, UINT64_C(0x0000000000fe9f18), 0.915, 0.994615137056250) \
-    STEP( 184, UINT64_C(0x0000000000fed76e), 0.920, 0.995474739200000) \
-    STEP( 185, UINT64_C(0x0000000000ff09b0), 0.925, 0.996241621093750) \
-    STEP( 186, UINT64_C(0x0000000000ff3627), 0.930, 0.996920065800000) \
-    STEP( 187, UINT64_C(0x0000000000ff5d1d), 0.935, 0.997514547631250) \
-    STEP( 188, UINT64_C(0x0000000000ff7ee0), 0.940, 0.998029734400000) \
-    STEP( 189, UINT64_C(0x0000000000ff9bc3), 0.945, 0.998470489668750) \
-    STEP( 190, UINT64_C(0x0000000000ffb419), 0.950, 0.998841875000000) \
-    STEP( 191, UINT64_C(0x0000000000ffc83d), 0.955, 0.999149152206250) \
-    STEP( 192, UINT64_C(0x0000000000ffd888), 0.960, 0.999397785600000) \
-    STEP( 193, UINT64_C(0x0000000000ffe55b), 0.965, 0.999593444243750) \
-    STEP( 194, UINT64_C(0x0000000000ffef17), 0.970, 0.999742004200000) \
-    STEP( 195, UINT64_C(0x0000000000fff623), 0.975, 0.999849550781250) \
-    STEP( 196, UINT64_C(0x0000000000fffae9), 0.980, 0.999922380800000) \
-    STEP( 197, UINT64_C(0x0000000000fffdd6), 0.985, 0.999967004818750) \
-    STEP( 198, UINT64_C(0x0000000000ffff5a), 0.990, 0.999990149400000) \
-    STEP( 199, UINT64_C(0x0000000000ffffeb), 0.995, 0.999998759356250) \
-    STEP( 200, UINT64_C(0x0000000001000000), 1.000, 1.000000000000000) \
+#define SMOOTHSTEP_VARIANT "smoother"
+#define SMOOTHSTEP_NSTEPS 200
+#define SMOOTHSTEP_BFP 24
+#define SMOOTHSTEP                                                             \
+	/* STEP(step, h,                            x,     y) */               \
+	STEP(1, UINT64_C(0x0000000000000014), 0.005, 0.000001240643750)        \
+	STEP(2, UINT64_C(0x00000000000000a5), 0.010, 0.000009850600000)        \
+	STEP(3, UINT64_C(0x0000000000000229), 0.015, 0.000032995181250)        \
+	STEP(4, UINT64_C(0x0000000000000516), 0.020, 0.000077619200000)        \
+	STEP(5, UINT64_C(0x00000000000009dc), 0.025, 0.000150449218750)        \
+	STEP(6, UINT64_C(0x00000000000010e8), 0.030, 0.000257995800000)        \
+	STEP(7, UINT64_C(0x0000000000001aa4), 0.035, 0.000406555756250)        \
+	STEP(8, UINT64_C(0x0000000000002777), 0.040, 0.000602214400000)        \
+	STEP(9, UINT64_C(0x00000000000037c2), 0.045, 0.000850847793750)        \
+	STEP(10, UINT64_C(0x0000000000004be6), 0.050, 0.001158125000000)       \
+	STEP(11, UINT64_C(0x000000000000643c), 0.055, 0.001529510331250)       \
+	STEP(12, UINT64_C(0x000000000000811f), 0.060, 0.001970265600000)       \
+	STEP(13, UINT64_C(0x000000000000a2e2), 0.065, 0.002485452368750)       \
+	STEP(14, UINT64_C(0x000000000000c9d8), 0.070, 0.003079934200000)       \
+	STEP(15, UINT64_C(0x000000000000f64f), 0.075, 0.003758378906250)       \
+	STEP(16, UINT64_C(0x0000000000012891), 0.080, 0.004525260800000)       \
+	STEP(17, UINT64_C(0x00000000000160e7), 0.085, 0.005384862943750)       \
+	STEP(18, UINT64_C(0x0000000000019f95), 0.090, 0.006341279400000)       \
+	STEP(19, UINT64_C(0x000000000001e4dc), 0.095, 0.007398417481250)       \
+	STEP(20, UINT64_C(0x00000000000230fc), 0.100, 0.008560000000000)       \
+	STEP(21, UINT64_C(0x0000000000028430), 0.105, 0.009829567518750)       \
+	STEP(22, UINT64_C(0x000000000002deb0), 0.110, 0.011210480600000)       \
+	STEP(23, UINT64_C(0x00000000000340b1), 0.115, 0.012705922056250)       \
+	STEP(24, UINT64_C(0x000000000003aa67), 0.120, 0.014318899200000)       \
+	STEP(25, UINT64_C(0x0000000000041c00), 0.125, 0.016052246093750)       \
+	STEP(26, UINT64_C(0x00000000000495a8), 0.130, 0.017908625800000)       \
+	STEP(27, UINT64_C(0x000000000005178b), 0.135, 0.019890532631250)       \
+	STEP(28, UINT64_C(0x000000000005a1cf), 0.140, 0.022000294400000)       \
+	STEP(29, UINT64_C(0x0000000000063498), 0.145, 0.024240074668750)       \
+	STEP(30, UINT64_C(0x000000000006d009), 0.150, 0.026611875000000)       \
+	STEP(31, UINT64_C(0x000000000007743f), 0.155, 0.029117537206250)       \
+	STEP(32, UINT64_C(0x0000000000082157), 0.160, 0.031758745600000)       \
+	STEP(33, UINT64_C(0x000000000008d76b), 0.165, 0.034537029243750)       \
+	STEP(34, UINT64_C(0x0000000000099691), 0.170, 0.037453764200000)       \
+	STEP(35, UINT64_C(0x00000000000a5edf), 0.175, 0.040510175781250)       \
+	STEP(36, UINT64_C(0x00000000000b3067), 0.180, 0.043707340800000)       \
+	STEP(37, UINT64_C(0x00000000000c0b38), 0.185, 0.047046189818750)       \
+	STEP(38, UINT64_C(0x00000000000cef5e), 0.190, 0.050527509400000)       \
+	STEP(39, UINT64_C(0x00000000000ddce6), 0.195, 0.054151944356250)       \
+	STEP(40, UINT64_C(0x00000000000ed3d8), 0.200, 0.057920000000000)       \
+	STEP(41, UINT64_C(0x00000000000fd439), 0.205, 0.061832044393750)       \
+	STEP(42, UINT64_C(0x000000000010de0e), 0.210, 0.065888310600000)       \
+	STEP(43, UINT64_C(0x000000000011f158), 0.215, 0.070088898931250)       \
+	STEP(44, UINT64_C(0x0000000000130e17), 0.220, 0.074433779200000)       \
+	STEP(45, UINT64_C(0x0000000000143448), 0.225, 0.078922792968750)       \
+	STEP(46, UINT64_C(0x00000000001563e7), 0.230, 0.083555655800000)       \
+	STEP(47, UINT64_C(0x0000000000169cec), 0.235, 0.088331959506250)       \
+	STEP(48, UINT64_C(0x000000000017df4f), 0.240, 0.093251174400000)       \
+	STEP(49, UINT64_C(0x0000000000192b04), 0.245, 0.098312651543750)       \
+	STEP(50, UINT64_C(0x00000000001a8000), 0.250, 0.103515625000000)       \
+	STEP(51, UINT64_C(0x00000000001bde32), 0.255, 0.108859214081250)       \
+	STEP(52, UINT64_C(0x00000000001d458b), 0.260, 0.114342425600000)       \
+	STEP(53, UINT64_C(0x00000000001eb5f8), 0.265, 0.119964156118750)       \
+	STEP(54, UINT64_C(0x0000000000202f65), 0.270, 0.125723194200000)       \
+	STEP(55, UINT64_C(0x000000000021b1bb), 0.275, 0.131618222656250)       \
+	STEP(56, UINT64_C(0x0000000000233ce3), 0.280, 0.137647820800000)       \
+	STEP(57, UINT64_C(0x000000000024d0c3), 0.285, 0.143810466693750)       \
+	STEP(58, UINT64_C(0x0000000000266d40), 0.290, 0.150104539400000)       \
+	STEP(59, UINT64_C(0x000000000028123d), 0.295, 0.156528321231250)       \
+	STEP(60, UINT64_C(0x000000000029bf9c), 0.300, 0.163080000000000)       \
+	STEP(61, UINT64_C(0x00000000002b753d), 0.305, 0.169757671268750)       \
+	STEP(62, UINT64_C(0x00000000002d32fe), 0.310, 0.176559340600000)       \
+	STEP(63, UINT64_C(0x00000000002ef8bc), 0.315, 0.183482925806250)       \
+	STEP(64, UINT64_C(0x000000000030c654), 0.320, 0.190526259200000)       \
+	STEP(65, UINT64_C(0x0000000000329b9f), 0.325, 0.197687089843750)       \
+	STEP(66, UINT64_C(0x0000000000347875), 0.330, 0.204963085800000)       \
+	STEP(67, UINT64_C(0x0000000000365cb0), 0.335, 0.212351836381250)       \
+	STEP(68, UINT64_C(0x0000000000384825), 0.340, 0.219850854400000)       \
+	STEP(69, UINT64_C(0x00000000003a3aa8), 0.345, 0.227457578418750)       \
+	STEP(70, UINT64_C(0x00000000003c340f), 0.350, 0.235169375000000)       \
+	STEP(71, UINT64_C(0x00000000003e342b), 0.355, 0.242983540956250)       \
+	STEP(72, UINT64_C(0x0000000000403ace), 0.360, 0.250897305600000)       \
+	STEP(73, UINT64_C(0x00000000004247c8), 0.365, 0.258907832993750)       \
+	STEP(74, UINT64_C(0x0000000000445ae9), 0.370, 0.267012224200000)       \
+	STEP(75, UINT64_C(0x0000000000467400), 0.375, 0.275207519531250)       \
+	STEP(76, UINT64_C(0x00000000004892d8), 0.380, 0.283490700800000)       \
+	STEP(77, UINT64_C(0x00000000004ab740), 0.385, 0.291858693568750)       \
+	STEP(78, UINT64_C(0x00000000004ce102), 0.390, 0.300308369400000)       \
+	STEP(79, UINT64_C(0x00000000004f0fe9), 0.395, 0.308836548106250)       \
+	STEP(80, UINT64_C(0x00000000005143bf), 0.400, 0.317440000000000)       \
+	STEP(81, UINT64_C(0x0000000000537c4d), 0.405, 0.326115448143750)       \
+	STEP(82, UINT64_C(0x000000000055b95b), 0.410, 0.334859570600000)       \
+	STEP(83, UINT64_C(0x000000000057fab1), 0.415, 0.343669002681250)       \
+	STEP(84, UINT64_C(0x00000000005a4015), 0.420, 0.352540339200000)       \
+	STEP(85, UINT64_C(0x00000000005c894e), 0.425, 0.361470136718750)       \
+	STEP(86, UINT64_C(0x00000000005ed622), 0.430, 0.370454915800000)       \
+	STEP(87, UINT64_C(0x0000000000612655), 0.435, 0.379491163256250)       \
+	STEP(88, UINT64_C(0x00000000006379ac), 0.440, 0.388575334400000)       \
+	STEP(89, UINT64_C(0x000000000065cfeb), 0.445, 0.397703855293750)       \
+	STEP(90, UINT64_C(0x00000000006828d6), 0.450, 0.406873125000000)       \
+	STEP(91, UINT64_C(0x00000000006a842f), 0.455, 0.416079517831250)       \
+	STEP(92, UINT64_C(0x00000000006ce1bb), 0.460, 0.425319385600000)       \
+	STEP(93, UINT64_C(0x00000000006f413a), 0.465, 0.434589059868750)       \
+	STEP(94, UINT64_C(0x000000000071a270), 0.470, 0.443884854200000)       \
+	STEP(95, UINT64_C(0x000000000074051d), 0.475, 0.453203066406250)       \
+	STEP(96, UINT64_C(0x0000000000766905), 0.480, 0.462539980800000)       \
+	STEP(97, UINT64_C(0x000000000078cde7), 0.485, 0.471891870443750)       \
+	STEP(98, UINT64_C(0x00000000007b3387), 0.490, 0.481254999400000)       \
+	STEP(99, UINT64_C(0x00000000007d99a4), 0.495, 0.490625624981250)       \
+	STEP(100, UINT64_C(0x0000000000800000), 0.500, 0.500000000000000)      \
+	STEP(101, UINT64_C(0x000000000082665b), 0.505, 0.509374375018750)      \
+	STEP(102, UINT64_C(0x000000000084cc78), 0.510, 0.518745000600000)      \
+	STEP(103, UINT64_C(0x0000000000873218), 0.515, 0.528108129556250)      \
+	STEP(104, UINT64_C(0x00000000008996fa), 0.520, 0.537460019200000)      \
+	STEP(105, UINT64_C(0x00000000008bfae2), 0.525, 0.546796933593750)      \
+	STEP(106, UINT64_C(0x00000000008e5d8f), 0.530, 0.556115145800000)      \
+	STEP(107, UINT64_C(0x000000000090bec5), 0.535, 0.565410940131250)      \
+	STEP(108, UINT64_C(0x0000000000931e44), 0.540, 0.574680614400000)      \
+	STEP(109, UINT64_C(0x0000000000957bd0), 0.545, 0.583920482168750)      \
+	STEP(110, UINT64_C(0x000000000097d729), 0.550, 0.593126875000000)      \
+	STEP(111, UINT64_C(0x00000000009a3014), 0.555, 0.602296144706250)      \
+	STEP(112, UINT64_C(0x00000000009c8653), 0.560, 0.611424665600000)      \
+	STEP(113, UINT64_C(0x00000000009ed9aa), 0.565, 0.620508836743750)      \
+	STEP(114, UINT64_C(0x0000000000a129dd), 0.570, 0.629545084200000)      \
+	STEP(115, UINT64_C(0x0000000000a376b1), 0.575, 0.638529863281250)      \
+	STEP(116, UINT64_C(0x0000000000a5bfea), 0.580, 0.647459660800000)      \
+	STEP(117, UINT64_C(0x0000000000a8054e), 0.585, 0.656330997318750)      \
+	STEP(118, UINT64_C(0x0000000000aa46a4), 0.590, 0.665140429400000)      \
+	STEP(119, UINT64_C(0x0000000000ac83b2), 0.595, 0.673884551856250)      \
+	STEP(120, UINT64_C(0x0000000000aebc40), 0.600, 0.682560000000000)      \
+	STEP(121, UINT64_C(0x0000000000b0f016), 0.605, 0.691163451893750)      \
+	STEP(122, UINT64_C(0x0000000000b31efd), 0.610, 0.699691630600000)      \
+	STEP(123, UINT64_C(0x0000000000b548bf), 0.615, 0.708141306431250)      \
+	STEP(124, UINT64_C(0x0000000000b76d27), 0.620, 0.716509299200000)      \
+	STEP(125, UINT64_C(0x0000000000b98c00), 0.625, 0.724792480468750)      \
+	STEP(126, UINT64_C(0x0000000000bba516), 0.630, 0.732987775800000)      \
+	STEP(127, UINT64_C(0x0000000000bdb837), 0.635, 0.741092167006250)      \
+	STEP(128, UINT64_C(0x0000000000bfc531), 0.640, 0.749102694400000)      \
+	STEP(129, UINT64_C(0x0000000000c1cbd4), 0.645, 0.757016459043750)      \
+	STEP(130, UINT64_C(0x0000000000c3cbf0), 0.650, 0.764830625000000)      \
+	STEP(131, UINT64_C(0x0000000000c5c557), 0.655, 0.772542421581250)      \
+	STEP(132, UINT64_C(0x0000000000c7b7da), 0.660, 0.780149145600000)      \
+	STEP(133, UINT64_C(0x0000000000c9a34f), 0.665, 0.787648163618750)      \
+	STEP(134, UINT64_C(0x0000000000cb878a), 0.670, 0.795036914200000)      \
+	STEP(135, UINT64_C(0x0000000000cd6460), 0.675, 0.802312910156250)      \
+	STEP(136, UINT64_C(0x0000000000cf39ab), 0.680, 0.809473740800000)      \
+	STEP(137, UINT64_C(0x0000000000d10743), 0.685, 0.816517074193750)      \
+	STEP(138, UINT64_C(0x0000000000d2cd01), 0.690, 0.823440659400000)      \
+	STEP(139, UINT64_C(0x0000000000d48ac2), 0.695, 0.830242328731250)      \
+	STEP(140, UINT64_C(0x0000000000d64063), 0.700, 0.836920000000000)      \
+	STEP(141, UINT64_C(0x0000000000d7edc2), 0.705, 0.843471678768750)      \
+	STEP(142, UINT64_C(0x0000000000d992bf), 0.710, 0.849895460600000)      \
+	STEP(143, UINT64_C(0x0000000000db2f3c), 0.715, 0.856189533306250)      \
+	STEP(144, UINT64_C(0x0000000000dcc31c), 0.720, 0.862352179200000)      \
+	STEP(145, UINT64_C(0x0000000000de4e44), 0.725, 0.868381777343750)      \
+	STEP(146, UINT64_C(0x0000000000dfd09a), 0.730, 0.874276805800000)      \
+	STEP(147, UINT64_C(0x0000000000e14a07), 0.735, 0.880035843881250)      \
+	STEP(148, UINT64_C(0x0000000000e2ba74), 0.740, 0.885657574400000)      \
+	STEP(149, UINT64_C(0x0000000000e421cd), 0.745, 0.891140785918750)      \
+	STEP(150, UINT64_C(0x0000000000e58000), 0.750, 0.896484375000000)      \
+	STEP(151, UINT64_C(0x0000000000e6d4fb), 0.755, 0.901687348456250)      \
+	STEP(152, UINT64_C(0x0000000000e820b0), 0.760, 0.906748825600000)      \
+	STEP(153, UINT64_C(0x0000000000e96313), 0.765, 0.911668040493750)      \
+	STEP(154, UINT64_C(0x0000000000ea9c18), 0.770, 0.916444344200000)      \
+	STEP(155, UINT64_C(0x0000000000ebcbb7), 0.775, 0.921077207031250)      \
+	STEP(156, UINT64_C(0x0000000000ecf1e8), 0.780, 0.925566220800000)      \
+	STEP(157, UINT64_C(0x0000000000ee0ea7), 0.785, 0.929911101068750)      \
+	STEP(158, UINT64_C(0x0000000000ef21f1), 0.790, 0.934111689400000)      \
+	STEP(159, UINT64_C(0x0000000000f02bc6), 0.795, 0.938167955606250)      \
+	STEP(160, UINT64_C(0x0000000000f12c27), 0.800, 0.942080000000000)      \
+	STEP(161, UINT64_C(0x0000000000f22319), 0.805, 0.945848055643750)      \
+	STEP(162, UINT64_C(0x0000000000f310a1), 0.810, 0.949472490600000)      \
+	STEP(163, UINT64_C(0x0000000000f3f4c7), 0.815, 0.952953810181250)      \
+	STEP(164, UINT64_C(0x0000000000f4cf98), 0.820, 0.956292659200000)      \
+	STEP(165, UINT64_C(0x0000000000f5a120), 0.825, 0.959489824218750)      \
+	STEP(166, UINT64_C(0x0000000000f6696e), 0.830, 0.962546235800000)      \
+	STEP(167, UINT64_C(0x0000000000f72894), 0.835, 0.965462970756250)      \
+	STEP(168, UINT64_C(0x0000000000f7dea8), 0.840, 0.968241254400000)      \
+	STEP(169, UINT64_C(0x0000000000f88bc0), 0.845, 0.970882462793750)      \
+	STEP(170, UINT64_C(0x0000000000f92ff6), 0.850, 0.973388125000000)      \
+	STEP(171, UINT64_C(0x0000000000f9cb67), 0.855, 0.975759925331250)      \
+	STEP(172, UINT64_C(0x0000000000fa5e30), 0.860, 0.977999705600000)      \
+	STEP(173, UINT64_C(0x0000000000fae874), 0.865, 0.980109467368750)      \
+	STEP(174, UINT64_C(0x0000000000fb6a57), 0.870, 0.982091374200000)      \
+	STEP(175, UINT64_C(0x0000000000fbe400), 0.875, 0.983947753906250)      \
+	STEP(176, UINT64_C(0x0000000000fc5598), 0.880, 0.985681100800000)      \
+	STEP(177, UINT64_C(0x0000000000fcbf4e), 0.885, 0.987294077943750)      \
+	STEP(178, UINT64_C(0x0000000000fd214f), 0.890, 0.988789519400000)      \
+	STEP(179, UINT64_C(0x0000000000fd7bcf), 0.895, 0.990170432481250)      \
+	STEP(180, UINT64_C(0x0000000000fdcf03), 0.900, 0.991440000000000)      \
+	STEP(181, UINT64_C(0x0000000000fe1b23), 0.905, 0.992601582518750)      \
+	STEP(182, UINT64_C(0x0000000000fe606a), 0.910, 0.993658720600000)      \
+	STEP(183, UINT64_C(0x0000000000fe9f18), 0.915, 0.994615137056250)      \
+	STEP(184, UINT64_C(0x0000000000fed76e), 0.920, 0.995474739200000)      \
+	STEP(185, UINT64_C(0x0000000000ff09b0), 0.925, 0.996241621093750)      \
+	STEP(186, UINT64_C(0x0000000000ff3627), 0.930, 0.996920065800000)      \
+	STEP(187, UINT64_C(0x0000000000ff5d1d), 0.935, 0.997514547631250)      \
+	STEP(188, UINT64_C(0x0000000000ff7ee0), 0.940, 0.998029734400000)      \
+	STEP(189, UINT64_C(0x0000000000ff9bc3), 0.945, 0.998470489668750)      \
+	STEP(190, UINT64_C(0x0000000000ffb419), 0.950, 0.998841875000000)      \
+	STEP(191, UINT64_C(0x0000000000ffc83d), 0.955, 0.999149152206250)      \
+	STEP(192, UINT64_C(0x0000000000ffd888), 0.960, 0.999397785600000)      \
+	STEP(193, UINT64_C(0x0000000000ffe55b), 0.965, 0.999593444243750)      \
+	STEP(194, UINT64_C(0x0000000000ffef17), 0.970, 0.999742004200000)      \
+	STEP(195, UINT64_C(0x0000000000fff623), 0.975, 0.999849550781250)      \
+	STEP(196, UINT64_C(0x0000000000fffae9), 0.980, 0.999922380800000)      \
+	STEP(197, UINT64_C(0x0000000000fffdd6), 0.985, 0.999967004818750)      \
+	STEP(198, UINT64_C(0x0000000000ffff5a), 0.990, 0.999990149400000)      \
+	STEP(199, UINT64_C(0x0000000000ffffeb), 0.995, 0.999998759356250)      \
+	STEP(200, UINT64_C(0x0000000001000000), 1.000, 1.000000000000000)
 
 #endif /* JEMALLOC_INTERNAL_SMOOTHSTEP_H */
diff --git a/include/jemalloc/internal/spin.h b/include/jemalloc/internal/spin.h
index 87c400d5..4cd5e1db 100644
--- a/include/jemalloc/internal/spin.h
+++ b/include/jemalloc/internal/spin.h
@@ -3,7 +3,8 @@
 
 #include "jemalloc/internal/jemalloc_preamble.h"
 
-#define SPIN_INITIALIZER {0U}
+#define SPIN_INITIALIZER                                                       \
+	{ 0U }
 
 typedef struct {
 	unsigned iteration;
@@ -11,12 +12,12 @@ typedef struct {
 
 static inline void
 spin_cpu_spinwait(void) {
-#  if HAVE_CPU_SPINWAIT
+#if HAVE_CPU_SPINWAIT
 	CPU_SPINWAIT;
-#  else
+#else
 	volatile int x = 0;
 	x = x;
-#  endif
+#endif
 }
 
 static inline void
diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h
index a5f1be32..1c7b23e0 100644
--- a/include/jemalloc/internal/stats.h
+++ b/include/jemalloc/internal/stats.h
@@ -7,32 +7,32 @@
 #include "jemalloc/internal/tsd_types.h"
 
 /*  OPTION(opt,		var_name,	default,	set_value_to) */
-#define STATS_PRINT_OPTIONS						\
-    OPTION('J',		json,		false,		true)		\
-    OPTION('g',		general,	true,		false)		\
-    OPTION('m',		merged,		config_stats,	false)		\
-    OPTION('d',		destroyed,	config_stats,	false)		\
-    OPTION('a',		unmerged,	config_stats,	false)		\
-    OPTION('b',		bins,		true,		false)		\
-    OPTION('l',		large,		true,		false)		\
-    OPTION('x',		mutex,		true,		false)		\
-    OPTION('e',		extents,	true,		false)		\
-    OPTION('h',		hpa,		config_stats,	false)
+#define STATS_PRINT_OPTIONS                                                    \
+	OPTION('J', json, false, true)                                         \
+	OPTION('g', general, true, false)                                      \
+	OPTION('m', merged, config_stats, false)                               \
+	OPTION('d', destroyed, config_stats, false)                            \
+	OPTION('a', unmerged, config_stats, false)                             \
+	OPTION('b', bins, true, false)                                         \
+	OPTION('l', large, true, false)                                        \
+	OPTION('x', mutex, true, false)                                        \
+	OPTION('e', extents, true, false)                                      \
+	OPTION('h', hpa, config_stats, false)
 
 enum {
 #define OPTION(o, v, d, s) stats_print_option_num_##v,
-    STATS_PRINT_OPTIONS
+	STATS_PRINT_OPTIONS
 #undef OPTION
-    stats_print_tot_num_options
+	    stats_print_tot_num_options
 };
 
 /* Options for stats_print. */
 extern bool opt_stats_print;
-extern char opt_stats_print_opts[stats_print_tot_num_options+1];
+extern char opt_stats_print_opts[stats_print_tot_num_options + 1];
 
 /* Utilities for stats_interval. */
 extern int64_t opt_stats_interval;
-extern char opt_stats_interval_opts[stats_print_tot_num_options+1];
+extern char    opt_stats_interval_opts[stats_print_tot_num_options + 1];
 
 #define STATS_INTERVAL_DEFAULT -1
 /*
diff --git a/include/jemalloc/internal/sz.h b/include/jemalloc/internal/sz.h
index 3a32e232..d75a3034 100644
--- a/include/jemalloc/internal/sz.h
+++ b/include/jemalloc/internal/sz.h
@@ -76,8 +76,9 @@ sz_psz2ind(size_t psz) {
 	 * SC_NGROUP. off_to_first_ps_rg begins from 1, instead of 0. e.g.
 	 * off_to_first_ps_rg is 1 when psz is (PAGE * SC_NGROUP + 1).
 	 */
-	pszind_t off_to_first_ps_rg = (x < SC_LG_NGROUP + LG_PAGE) ?
-	    0 : x - (SC_LG_NGROUP + LG_PAGE);
+	pszind_t off_to_first_ps_rg = (x < SC_LG_NGROUP + LG_PAGE)
+	    ? 0
+	    : x - (SC_LG_NGROUP + LG_PAGE);
 
 	/*
 	 * Same as sc_s::lg_delta.
@@ -85,8 +86,9 @@ sz_psz2ind(size_t psz) {
 	 * for each increase in offset, it's multiplied by two.
 	 * Therefore, lg_delta = LG_PAGE + (off_to_first_ps_rg - 1).
 	 */
-	pszind_t lg_delta = (off_to_first_ps_rg == 0) ?
-	    LG_PAGE : LG_PAGE + (off_to_first_ps_rg - 1);
+	pszind_t lg_delta = (off_to_first_ps_rg == 0)
+	    ? LG_PAGE
+	    : LG_PAGE + (off_to_first_ps_rg - 1);
 
 	/*
 	 * Let's write psz in binary, e.g. 0011 for 0x3, 0111 for 0x7.
@@ -118,13 +120,13 @@ sz_pind2sz_compute(pszind_t pind) {
 	size_t grp = pind >> SC_LG_NGROUP;
 	size_t mod = pind & ((ZU(1) << SC_LG_NGROUP) - 1);
 
-	size_t grp_size_mask = ~((!!grp)-1);
-	size_t grp_size = ((ZU(1) << (LG_PAGE + (SC_LG_NGROUP-1))) << grp)
+	size_t grp_size_mask = ~((!!grp) - 1);
+	size_t grp_size = ((ZU(1) << (LG_PAGE + (SC_LG_NGROUP - 1))) << grp)
 	    & grp_size_mask;
 
 	size_t shift = (grp == 0) ? 1 : grp;
-	size_t lg_delta = shift + (LG_PAGE-1);
-	size_t mod_size = (mod+1) << lg_delta;
+	size_t lg_delta = shift + (LG_PAGE - 1);
+	size_t mod_size = (mod + 1) << lg_delta;
 
 	size_t sz = grp_size + mod_size;
 	return sz;
@@ -148,9 +150,10 @@ sz_psz2u(size_t psz) {
 	if (unlikely(psz > SC_LARGE_MAXCLASS)) {
 		return SC_LARGE_MAXCLASS + PAGE;
 	}
-	size_t x = lg_floor((psz<<1)-1);
-	size_t lg_delta = (x < SC_LG_NGROUP + LG_PAGE + 1) ?
-	    LG_PAGE : x - SC_LG_NGROUP - 1;
+	size_t x = lg_floor((psz << 1) - 1);
+	size_t lg_delta = (x < SC_LG_NGROUP + LG_PAGE + 1)
+	    ? LG_PAGE
+	    : x - SC_LG_NGROUP - 1;
 	size_t delta = ZU(1) << lg_delta;
 	size_t delta_mask = delta - 1;
 	size_t usize = (psz + delta_mask) & ~delta_mask;
@@ -174,17 +177,19 @@ sz_size2index_compute_inline(size_t size) {
 	}
 #endif
 	{
-		szind_t x = lg_floor((size<<1)-1);
-		szind_t shift = (x < SC_LG_NGROUP + LG_QUANTUM) ? 0 :
-		    x - (SC_LG_NGROUP + LG_QUANTUM);
+		szind_t x = lg_floor((size << 1) - 1);
+		szind_t shift = (x < SC_LG_NGROUP + LG_QUANTUM)
+		    ? 0
+		    : x - (SC_LG_NGROUP + LG_QUANTUM);
 		szind_t grp = shift << SC_LG_NGROUP;
 
 		szind_t lg_delta = (x < SC_LG_NGROUP + LG_QUANTUM + 1)
-		    ? LG_QUANTUM : x - SC_LG_NGROUP - 1;
+		    ? LG_QUANTUM
+		    : x - SC_LG_NGROUP - 1;
 
-		size_t delta_inverse_mask = ZU(-1) << lg_delta;
-		szind_t mod = ((((size-1) & delta_inverse_mask) >> lg_delta)) &
-		    ((ZU(1) << SC_LG_NGROUP) - 1);
+		size_t  delta_inverse_mask = ZU(-1) << lg_delta;
+		szind_t mod = ((((size - 1) & delta_inverse_mask) >> lg_delta))
+		    & ((ZU(1) << SC_LG_NGROUP) - 1);
 
 		szind_t index = SC_NTINY + grp + mod;
 		return index;
@@ -228,16 +233,16 @@ sz_index2size_compute_inline(szind_t index) {
 	{
 		size_t reduced_index = index - SC_NTINY;
 		size_t grp = reduced_index >> SC_LG_NGROUP;
-		size_t mod = reduced_index & ((ZU(1) << SC_LG_NGROUP) -
-		    1);
+		size_t mod = reduced_index & ((ZU(1) << SC_LG_NGROUP) - 1);
 
-		size_t grp_size_mask = ~((!!grp)-1);
-		size_t grp_size = ((ZU(1) << (LG_QUANTUM +
-		    (SC_LG_NGROUP-1))) << grp) & grp_size_mask;
+		size_t grp_size_mask = ~((!!grp) - 1);
+		size_t grp_size = ((ZU(1) << (LG_QUANTUM + (SC_LG_NGROUP - 1)))
+		                      << grp)
+		    & grp_size_mask;
 
 		size_t shift = (grp == 0) ? 1 : grp;
-		size_t lg_delta = shift + (LG_QUANTUM-1);
-		size_t mod_size = (mod+1) << lg_delta;
+		size_t lg_delta = shift + (LG_QUANTUM - 1);
+		size_t mod_size = (mod + 1) << lg_delta;
 
 		size_t usize = grp_size + mod_size;
 		return usize;
@@ -269,8 +274,8 @@ sz_index2size_unsafe(szind_t index) {
 
 JEMALLOC_ALWAYS_INLINE size_t
 sz_index2size(szind_t index) {
-	assert(!sz_large_size_classes_disabled() ||
-	    index <= sz_size2index(USIZE_GROW_SLOW_THRESHOLD));
+	assert(!sz_large_size_classes_disabled()
+	    || index <= sz_size2index(USIZE_GROW_SLOW_THRESHOLD));
 	size_t size = sz_index2size_unsafe(index);
 	/*
 	 * With large size classes disabled, the usize above
@@ -285,8 +290,8 @@ sz_index2size(szind_t index) {
 	 * the size is no larger than USIZE_GROW_SLOW_THRESHOLD here
 	 * instead of SC_LARGE_MINCLASS.
 	 */
-	assert(!sz_large_size_classes_disabled() ||
-	    size <= USIZE_GROW_SLOW_THRESHOLD);
+	assert(!sz_large_size_classes_disabled()
+	    || size <= USIZE_GROW_SLOW_THRESHOLD);
 	return size;
 }
 
@@ -309,9 +314,10 @@ sz_size2index_usize_fastpath(size_t size, szind_t *ind, size_t *usize) {
 
 JEMALLOC_ALWAYS_INLINE size_t
 sz_s2u_compute_using_delta(size_t size) {
-	size_t x = lg_floor((size<<1)-1);
+	size_t x = lg_floor((size << 1) - 1);
 	size_t lg_delta = (x < SC_LG_NGROUP + LG_QUANTUM + 1)
-	    ?  LG_QUANTUM : x - SC_LG_NGROUP - 1;
+	    ? LG_QUANTUM
+	    : x - SC_LG_NGROUP - 1;
 	size_t delta = ZU(1) << lg_delta;
 	size_t delta_mask = delta - 1;
 	size_t usize = (size + delta_mask) & ~delta_mask;
@@ -331,8 +337,8 @@ sz_s2u_compute(size_t size) {
 	if (size <= (ZU(1) << SC_LG_TINY_MAXCLASS)) {
 		size_t lg_tmin = SC_LG_TINY_MAXCLASS - SC_NTINY + 1;
 		size_t lg_ceil = lg_floor(pow2_ceil_zu(size));
-		return (lg_ceil < lg_tmin ? (ZU(1) << lg_tmin) :
-		    (ZU(1) << lg_ceil));
+		return (lg_ceil < lg_tmin ? (ZU(1) << lg_tmin)
+		                          : (ZU(1) << lg_ceil));
 	}
 #endif
 	if (size <= SC_SMALL_MAXCLASS || !sz_large_size_classes_disabled()) {
diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index 76d601c3..73126db7 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -8,15 +8,15 @@
 #include "jemalloc/internal/tcache_types.h"
 #include "jemalloc/internal/thread_event_registry.h"
 
-extern bool opt_tcache;
-extern size_t opt_tcache_max;
-extern ssize_t	opt_lg_tcache_nslots_mul;
+extern bool     opt_tcache;
+extern size_t   opt_tcache_max;
+extern ssize_t  opt_lg_tcache_nslots_mul;
 extern unsigned opt_tcache_nslots_small_min;
 extern unsigned opt_tcache_nslots_small_max;
 extern unsigned opt_tcache_nslots_large;
-extern ssize_t opt_lg_tcache_shift;
-extern size_t opt_tcache_gc_incr_bytes;
-extern size_t opt_tcache_gc_delay_bytes;
+extern ssize_t  opt_lg_tcache_shift;
+extern size_t   opt_tcache_gc_incr_bytes;
+extern size_t   opt_tcache_gc_delay_bytes;
 extern unsigned opt_lg_tcache_flush_small_div;
 extern unsigned opt_lg_tcache_flush_large_div;
 
@@ -27,14 +27,14 @@ extern unsigned opt_lg_tcache_flush_large_div;
  * it should not be changed on the fly.  To change the number of tcache bins
  * in use, refer to tcache_nbins of each tcache.
  */
-extern unsigned	global_do_not_change_tcache_nbins;
+extern unsigned global_do_not_change_tcache_nbins;
 
 /*
  * Maximum cached size class.  Same as above, this is only used during threads
  * initialization and should not be changed.  To change the maximum cached size
  * class, refer to tcache_max of each tcache.
  */
-extern size_t	global_do_not_change_tcache_maxclass;
+extern size_t global_do_not_change_tcache_maxclass;
 
 /*
  * Explicit tcaches, managed via the tcache.{create,flush,destroy} mallctls and
@@ -44,11 +44,11 @@ extern size_t	global_do_not_change_tcache_maxclass;
  * touched.  This allows the entire array to be allocated the first time an
  * explicit tcache is created without a disproportionate impact on memory usage.
  */
-extern tcaches_t	*tcaches;
+extern tcaches_t *tcaches;
 
 size_t tcache_salloc(tsdn_t *tsdn, const void *ptr);
-void *tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
-    cache_bin_t *cache_bin, szind_t binind, bool *tcache_success);
+void  *tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
+     cache_bin_t *cache_bin, szind_t binind, bool *tcache_success);
 
 void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache,
     cache_bin_t *cache_bin, szind_t binind, unsigned rem);
@@ -56,23 +56,23 @@ void tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache,
     cache_bin_t *cache_bin, szind_t binind, unsigned rem);
 void tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache,
     cache_bin_t *cache_bin, szind_t binind, bool is_small);
-bool tcache_bin_info_default_init(const char *bin_settings_segment_cur,
-    size_t len_left);
+bool tcache_bin_info_default_init(
+    const char *bin_settings_segment_cur, size_t len_left);
 bool tcache_bins_ncached_max_write(tsd_t *tsd, char *settings, size_t len);
-bool tcache_bin_ncached_max_read(tsd_t *tsd, size_t bin_size,
-    cache_bin_sz_t *ncached_max);
-void tcache_arena_reassociate(tsdn_t *tsdn, tcache_slow_t *tcache_slow,
-    tcache_t *tcache, arena_t *arena);
+bool tcache_bin_ncached_max_read(
+    tsd_t *tsd, size_t bin_size, cache_bin_sz_t *ncached_max);
+void tcache_arena_reassociate(
+    tsdn_t *tsdn, tcache_slow_t *tcache_slow, tcache_t *tcache, arena_t *arena);
 tcache_t *tcache_create_explicit(tsd_t *tsd);
-void thread_tcache_max_set(tsd_t *tsd, size_t tcache_max);
-void tcache_cleanup(tsd_t *tsd);
-void tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena);
-bool tcaches_create(tsd_t *tsd, base_t *base, unsigned *r_ind);
-void tcaches_flush(tsd_t *tsd, unsigned ind);
-void tcaches_destroy(tsd_t *tsd, unsigned ind);
-bool tcache_boot(tsdn_t *tsdn, base_t *base);
-void tcache_arena_associate(tsdn_t *tsdn, tcache_slow_t *tcache_slow,
-    tcache_t *tcache, arena_t *arena);
+void      thread_tcache_max_set(tsd_t *tsd, size_t tcache_max);
+void      tcache_cleanup(tsd_t *tsd);
+void      tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena);
+bool      tcaches_create(tsd_t *tsd, base_t *base, unsigned *r_ind);
+void      tcaches_flush(tsd_t *tsd, unsigned ind);
+void      tcaches_destroy(tsd_t *tsd, unsigned ind);
+bool      tcache_boot(tsdn_t *tsdn, base_t *base);
+void      tcache_arena_associate(
+         tsdn_t *tsdn, tcache_slow_t *tcache_slow, tcache_t *tcache, arena_t *arena);
 void tcache_prefork(tsdn_t *tsdn);
 void tcache_postfork_parent(tsdn_t *tsdn);
 void tcache_postfork_child(tsdn_t *tsdn);
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index e8e3b41f..6bd1b339 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -42,8 +42,8 @@ tcache_max_set(tcache_slow_t *tcache_slow, size_t tcache_max) {
 }
 
 static inline void
-tcache_bin_settings_backup(tcache_t *tcache,
-    cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]) {
+tcache_bin_settings_backup(
+    tcache_t *tcache, cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]) {
 	for (unsigned i = 0; i < TCACHE_NBINS_MAX; i++) {
 		cache_bin_info_init(&tcache_bin_info[i],
 		    cache_bin_ncached_max_get_unsafe(&tcache->bins[i]));
@@ -51,8 +51,7 @@ tcache_bin_settings_backup(tcache_t *tcache,
 }
 
 JEMALLOC_ALWAYS_INLINE bool
-tcache_bin_disabled(szind_t ind, cache_bin_t *bin,
-    tcache_slow_t *tcache_slow) {
+tcache_bin_disabled(szind_t ind, cache_bin_t *bin, tcache_slow_t *tcache_slow) {
 	assert(bin != NULL);
 	assert(ind < TCACHE_NBINS_MAX);
 	bool disabled = cache_bin_disabled(bin);
@@ -66,7 +65,7 @@ tcache_bin_disabled(szind_t ind, cache_bin_t *bin,
 	 * ind >= nbins or ncached_max == 0.  If a bin is enabled, it has
 	 * ind < nbins and ncached_max > 0.
 	 */
-	unsigned nbins = tcache_nbins_get(tcache_slow);
+	unsigned       nbins = tcache_nbins_get(tcache_slow);
 	cache_bin_sz_t ncached_max = cache_bin_ncached_max_get_unsafe(bin);
 	if (ind >= nbins) {
 		assert(disabled);
@@ -88,10 +87,10 @@ tcache_bin_disabled(szind_t ind, cache_bin_t *bin,
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
-    size_t size, szind_t binind, bool zero, bool slow_path) {
+tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
+    szind_t binind, bool zero, bool slow_path) {
 	void *ret;
-	bool tcache_success;
+	bool  tcache_success;
 
 	assert(binind < SC_NBINS);
 	cache_bin_t *bin = &tcache->bins[binind];
@@ -103,8 +102,8 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
 		if (unlikely(arena == NULL)) {
 			return NULL;
 		}
-		if (unlikely(tcache_bin_disabled(binind, bin,
-		    tcache->tcache_slow))) {
+		if (unlikely(tcache_bin_disabled(
+		        binind, bin, tcache->tcache_slow))) {
 			/* stats and zero are handled directly by the arena. */
 			return arena_malloc_hard(tsd_tsdn(tsd), arena, size,
 			    binind, zero, /* slab */ true);
@@ -112,8 +111,8 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
 		tcache_bin_flush_stashed(tsd, tcache, bin, binind,
 		    /* is_small */ true);
 
-		ret = tcache_alloc_small_hard(tsd_tsdn(tsd), arena, tcache,
-		    bin, binind, &tcache_hard_success);
+		ret = tcache_alloc_small_hard(tsd_tsdn(tsd), arena, tcache, bin,
+		    binind, &tcache_hard_success);
 		if (tcache_hard_success == false) {
 			return NULL;
 		}
@@ -135,11 +134,11 @@ JEMALLOC_ALWAYS_INLINE void *
 tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
     szind_t binind, bool zero, bool slow_path) {
 	void *ret;
-	bool tcache_success;
+	bool  tcache_success;
 
 	cache_bin_t *bin = &tcache->bins[binind];
-	assert(binind >= SC_NBINS &&
-	    !tcache_bin_disabled(binind, bin, tcache->tcache_slow));
+	assert(binind >= SC_NBINS
+	    && !tcache_bin_disabled(binind, bin, tcache->tcache_slow));
 	ret = cache_bin_alloc(bin, &tcache_success);
 	assert(tcache_success == (ret != NULL));
 	if (unlikely(!tcache_success)) {
@@ -174,8 +173,8 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 }
 
 JEMALLOC_ALWAYS_INLINE void
-tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
-    bool slow_path) {
+tcache_dalloc_small(
+    tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind, bool slow_path) {
 	assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= SC_SMALL_MAXCLASS);
 
 	cache_bin_t *bin = &tcache->bins[binind];
@@ -195,13 +194,13 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 	}
 
 	if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
-		if (unlikely(tcache_bin_disabled(binind, bin,
-		    tcache->tcache_slow))) {
+		if (unlikely(tcache_bin_disabled(
+		        binind, bin, tcache->tcache_slow))) {
 			arena_dalloc_small(tsd_tsdn(tsd), ptr);
 			return;
 		}
 		cache_bin_sz_t max = cache_bin_ncached_max_get(bin);
-		unsigned remain = max >> opt_lg_tcache_flush_small_div;
+		unsigned       remain = max >> opt_lg_tcache_flush_small_div;
 		tcache_bin_flush_small(tsd, tcache, bin, binind, remain);
 		bool ret = cache_bin_dalloc_easy(bin, ptr);
 		assert(ret);
@@ -209,19 +208,18 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 }
 
 JEMALLOC_ALWAYS_INLINE void
-tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
-    bool slow_path) {
-
+tcache_dalloc_large(
+    tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind, bool slow_path) {
 	assert(tcache_salloc(tsd_tsdn(tsd), ptr) > SC_SMALL_MAXCLASS);
-	assert(tcache_salloc(tsd_tsdn(tsd), ptr) <=
-	    tcache_max_get(tcache->tcache_slow));
-	assert(!tcache_bin_disabled(binind, &tcache->bins[binind],
-	    tcache->tcache_slow));
+	assert(tcache_salloc(tsd_tsdn(tsd), ptr)
+	    <= tcache_max_get(tcache->tcache_slow));
+	assert(!tcache_bin_disabled(
+	    binind, &tcache->bins[binind], tcache->tcache_slow));
 
 	cache_bin_t *bin = &tcache->bins[binind];
 	if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
-		unsigned remain = cache_bin_ncached_max_get(bin) >>
-		    opt_lg_tcache_flush_large_div;
+		unsigned remain = cache_bin_ncached_max_get(bin)
+		    >> opt_lg_tcache_flush_large_div;
 		tcache_bin_flush_large(tsd, tcache, bin, binind, remain);
 		bool ret = cache_bin_dalloc_easy(bin, ptr);
 		assert(ret);
diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h
index e9a68152..2c000de3 100644
--- a/include/jemalloc/internal/tcache_structs.h
+++ b/include/jemalloc/internal/tcache_structs.h
@@ -30,45 +30,45 @@ struct tcache_slow_s {
 	cache_bin_array_descriptor_t cache_bin_array_descriptor;
 
 	/* The arena this tcache is associated with. */
-	arena_t		*arena;
+	arena_t *arena;
 	/* The number of bins activated in the tcache. */
-	unsigned	tcache_nbins;
+	unsigned tcache_nbins;
 	/* Last time GC has been performed.  */
-	nstime_t	last_gc_time;
+	nstime_t last_gc_time;
 	/* Next bin to GC. */
-	szind_t		next_gc_bin;
-	szind_t		next_gc_bin_small;
-	szind_t		next_gc_bin_large;
+	szind_t next_gc_bin;
+	szind_t next_gc_bin_small;
+	szind_t next_gc_bin_large;
 	/* For small bins, help determine how many items to fill at a time. */
-	cache_bin_fill_ctl_t	bin_fill_ctl_do_not_access_directly[SC_NBINS];
+	cache_bin_fill_ctl_t bin_fill_ctl_do_not_access_directly[SC_NBINS];
 	/* For small bins, whether has been refilled since last GC. */
-	bool		bin_refilled[SC_NBINS];
+	bool bin_refilled[SC_NBINS];
 	/*
 	 * For small bins, the number of items we can pretend to flush before
 	 * actually flushing.
 	 */
-	uint8_t		bin_flush_delay_items[SC_NBINS];
+	uint8_t bin_flush_delay_items[SC_NBINS];
 	/*
 	 * The start of the allocation containing the dynamic allocation for
 	 * either the cache bins alone, or the cache bin memory as well as this
 	 * tcache_slow_t and its associated tcache_t.
 	 */
-	void		*dyn_alloc;
+	void *dyn_alloc;
 
 	/* The associated bins. */
-	tcache_t	*tcache;
+	tcache_t *tcache;
 };
 
 struct tcache_s {
-	tcache_slow_t	*tcache_slow;
-	cache_bin_t	bins[TCACHE_NBINS_MAX];
+	tcache_slow_t *tcache_slow;
+	cache_bin_t    bins[TCACHE_NBINS_MAX];
 };
 
 /* Linkage for list of available (previously used) explicit tcache IDs. */
 struct tcaches_s {
 	union {
-		tcache_t	*tcache;
-		tcaches_t	*next;
+		tcache_t  *tcache;
+		tcaches_t *next;
 	};
 };
 
diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h
index b3828ecf..27d80d3c 100644
--- a/include/jemalloc/internal/tcache_types.h
+++ b/include/jemalloc/internal/tcache_types.h
@@ -5,12 +5,16 @@
 #include "jemalloc/internal/sc.h"
 
 typedef struct tcache_slow_s tcache_slow_t;
-typedef struct tcache_s tcache_t;
-typedef struct tcaches_s tcaches_t;
+typedef struct tcache_s      tcache_t;
+typedef struct tcaches_s     tcaches_t;
 
 /* Used in TSD static initializer only. Real init in tsd_tcache_data_init(). */
-#define TCACHE_ZERO_INITIALIZER {0}
-#define TCACHE_SLOW_ZERO_INITIALIZER {{0}}
+#define TCACHE_ZERO_INITIALIZER                                                \
+	{ 0 }
+#define TCACHE_SLOW_ZERO_INITIALIZER                                           \
+	{                                                                      \
+		{ 0 }                                                          \
+	}
 
 /* Used in TSD static initializer only. Will be initialized to opt_tcache. */
 #define TCACHE_ENABLED_ZERO_INITIALIZER false
@@ -21,9 +25,11 @@ typedef struct tcaches_s tcaches_t;
 
 #define TCACHE_LG_MAXCLASS_LIMIT LG_USIZE_GROW_SLOW_THRESHOLD
 #define TCACHE_MAXCLASS_LIMIT ((size_t)1 << TCACHE_LG_MAXCLASS_LIMIT)
-#define TCACHE_NBINS_MAX (SC_NBINS + SC_NGROUP *			\
-    (TCACHE_LG_MAXCLASS_LIMIT - SC_LG_LARGE_MINCLASS) + 1)
-#define TCACHE_GC_NEIGHBOR_LIMIT ((uintptr_t)1 << 21) /* 2M */
+#define TCACHE_NBINS_MAX                                                       \
+	(SC_NBINS                                                              \
+	    + SC_NGROUP * (TCACHE_LG_MAXCLASS_LIMIT - SC_LG_LARGE_MINCLASS)    \
+	    + 1)
+#define TCACHE_GC_NEIGHBOR_LIMIT ((uintptr_t)1 << 21)       /* 2M */
 #define TCACHE_GC_INTERVAL_NS ((uint64_t)10 * KQU(1000000)) /* 10ms */
 #define TCACHE_GC_SMALL_NBINS_MAX ((SC_NBINS > 8) ? (SC_NBINS >> 3) : 1)
 #define TCACHE_GC_LARGE_NBINS_MAX 1
diff --git a/include/jemalloc/internal/test_hooks.h b/include/jemalloc/internal/test_hooks.h
index af3f2755..35f3a211 100644
--- a/include/jemalloc/internal/test_hooks.h
+++ b/include/jemalloc/internal/test_hooks.h
@@ -7,20 +7,22 @@ extern JEMALLOC_EXPORT void (*test_hooks_arena_new_hook)(void);
 extern JEMALLOC_EXPORT void (*test_hooks_libc_hook)(void);
 
 #if defined(JEMALLOC_JET) || defined(JEMALLOC_UNIT_TEST)
-#  define JEMALLOC_TEST_HOOK(fn, hook) ((void)(hook != NULL && (hook(), 0)), fn)
+#	define JEMALLOC_TEST_HOOK(fn, hook)                                   \
+		((void)(hook != NULL && (hook(), 0)), fn)
 
-#  define open JEMALLOC_TEST_HOOK(open, test_hooks_libc_hook)
-#  define read JEMALLOC_TEST_HOOK(read, test_hooks_libc_hook)
-#  define write JEMALLOC_TEST_HOOK(write, test_hooks_libc_hook)
-#  define readlink JEMALLOC_TEST_HOOK(readlink, test_hooks_libc_hook)
-#  define close JEMALLOC_TEST_HOOK(close, test_hooks_libc_hook)
-#  define creat JEMALLOC_TEST_HOOK(creat, test_hooks_libc_hook)
-#  define secure_getenv JEMALLOC_TEST_HOOK(secure_getenv, test_hooks_libc_hook)
+#	define open JEMALLOC_TEST_HOOK(open, test_hooks_libc_hook)
+#	define read JEMALLOC_TEST_HOOK(read, test_hooks_libc_hook)
+#	define write JEMALLOC_TEST_HOOK(write, test_hooks_libc_hook)
+#	define readlink JEMALLOC_TEST_HOOK(readlink, test_hooks_libc_hook)
+#	define close JEMALLOC_TEST_HOOK(close, test_hooks_libc_hook)
+#	define creat JEMALLOC_TEST_HOOK(creat, test_hooks_libc_hook)
+#	define secure_getenv                                                  \
+		JEMALLOC_TEST_HOOK(secure_getenv, test_hooks_libc_hook)
 /* Note that this is undef'd and re-define'd in src/prof.c. */
-#  define _Unwind_Backtrace JEMALLOC_TEST_HOOK(_Unwind_Backtrace, test_hooks_libc_hook)
+#	define _Unwind_Backtrace                                              \
+		JEMALLOC_TEST_HOOK(_Unwind_Backtrace, test_hooks_libc_hook)
 #else
-#  define JEMALLOC_TEST_HOOK(fn, hook) fn
+#	define JEMALLOC_TEST_HOOK(fn, hook) fn
 #endif
 
-
 #endif /* JEMALLOC_INTERNAL_TEST_HOOKS_H */
diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h
index bf9ca3cc..e9e2b6cd 100644
--- a/include/jemalloc/internal/thread_event.h
+++ b/include/jemalloc/internal/thread_event.h
@@ -37,7 +37,7 @@
 #define TE_INVALID_ELAPSED UINT64_MAX
 
 typedef struct te_ctx_s {
-	bool is_alloc;
+	bool      is_alloc;
 	uint64_t *current;
 	uint64_t *last_event;
 	uint64_t *next_event;
@@ -48,22 +48,20 @@ void te_assert_invariants_debug(tsd_t *tsd);
 void te_event_trigger(tsd_t *tsd, te_ctx_t *ctx);
 void te_recompute_fast_threshold(tsd_t *tsd);
 void tsd_te_init(tsd_t *tsd);
-void te_adjust_thresholds_helper(tsd_t *tsd, te_ctx_t *ctx,
-    uint64_t wait);
+void te_adjust_thresholds_helper(tsd_t *tsd, te_ctx_t *ctx, uint64_t wait);
 
 /* List of all thread event counters. */
-#define ITERATE_OVER_ALL_COUNTERS					\
-	C(thread_allocated)						\
-	C(thread_allocated_last_event)					\
-	C(prof_sample_last_event)					\
+#define ITERATE_OVER_ALL_COUNTERS                                              \
+	C(thread_allocated)                                                    \
+	C(thread_allocated_last_event)                                         \
+	C(prof_sample_last_event)                                              \
 	C(stats_interval_last_event)
 
 /* Getters directly wrap TSD getters. */
-#define C(counter)							\
-JEMALLOC_ALWAYS_INLINE uint64_t						\
-counter##_get(tsd_t *tsd) {						\
-	return tsd_##counter##_get(tsd);				\
-}
+#define C(counter)                                                             \
+	JEMALLOC_ALWAYS_INLINE uint64_t counter##_get(tsd_t *tsd) {            \
+		return tsd_##counter##_get(tsd);                               \
+	}
 
 ITERATE_OVER_ALL_COUNTERS
 #undef C
@@ -75,11 +73,10 @@ ITERATE_OVER_ALL_COUNTERS
  * temporarily delay the event and let it be immediately triggered at the next
  * allocation call.
  */
-#define C(counter)							\
-JEMALLOC_ALWAYS_INLINE void						\
-counter##_set(tsd_t *tsd, uint64_t v) {					\
-	*tsd_##counter##p_get(tsd) = v;					\
-}
+#define C(counter)                                                             \
+	JEMALLOC_ALWAYS_INLINE void counter##_set(tsd_t *tsd, uint64_t v) {    \
+		*tsd_##counter##p_get(tsd) = v;                                \
+	}
 
 ITERATE_OVER_ALL_COUNTERS
 #undef C
diff --git a/include/jemalloc/internal/thread_event_registry.h b/include/jemalloc/internal/thread_event_registry.h
index 1957e727..7ded440d 100644
--- a/include/jemalloc/internal/thread_event_registry.h
+++ b/include/jemalloc/internal/thread_event_registry.h
@@ -87,8 +87,8 @@ typedef void (*user_event_cb_t)(
 typedef struct user_hook_object_s user_hook_object_t;
 struct user_hook_object_s {
 	user_event_cb_t callback;
-	uint64_t interval;
-	bool is_alloc_only;
+	uint64_t        interval;
+	bool            is_alloc_only;
 };
 
 /*
diff --git a/include/jemalloc/internal/ticker.h b/include/jemalloc/internal/ticker.h
index dca9bd10..a1eec628 100644
--- a/include/jemalloc/internal/ticker.h
+++ b/include/jemalloc/internal/ticker.h
@@ -53,7 +53,7 @@ ticker_read(const ticker_t *ticker) {
  * worth the hassle, but this is on the fast path of both malloc and free (via
  * tcache_event).
  */
-#if defined(__GNUC__) && !defined(__clang__)				\
+#if defined(__GNUC__) && !defined(__clang__)                                   \
     && (defined(__x86_64__) || defined(__i386__))
 JEMALLOC_NOINLINE
 #endif
@@ -129,7 +129,8 @@ struct ticker_geom_s {
  * the behavior over long periods of time rather than the exact timing of the
  * initial ticks.
  */
-#define TICKER_GEOM_INIT(nticks) {nticks, nticks}
+#define TICKER_GEOM_INIT(nticks)                                               \
+	{ nticks, nticks }
 
 static inline void
 ticker_geom_init(ticker_geom_t *ticker, int32_t nticks) {
@@ -150,22 +151,21 @@ ticker_geom_read(const ticker_geom_t *ticker) {
 }
 
 /* Same deal as above. */
-#if defined(__GNUC__) && !defined(__clang__)				\
+#if defined(__GNUC__) && !defined(__clang__)                                   \
     && (defined(__x86_64__) || defined(__i386__))
 JEMALLOC_NOINLINE
 #endif
 static bool
-ticker_geom_fixup(ticker_geom_t *ticker, uint64_t *prng_state,
-    bool delay_trigger) {
+ticker_geom_fixup(
+    ticker_geom_t *ticker, uint64_t *prng_state, bool delay_trigger) {
 	if (delay_trigger) {
 		ticker->tick = 0;
 		return false;
 	}
 
 	uint64_t idx = prng_lg_range_u64(prng_state, TICKER_GEOM_NBITS);
-	ticker->tick = (uint32_t)(
-	    (uint64_t)ticker->nticks * (uint64_t)ticker_geom_table[idx]
-	    / (uint64_t)TICKER_GEOM_MUL);
+	ticker->tick = (uint32_t)((uint64_t)ticker->nticks
+	    * (uint64_t)ticker_geom_table[idx] / (uint64_t)TICKER_GEOM_MUL);
 
 	return true;
 }
@@ -181,8 +181,8 @@ ticker_geom_ticks(ticker_geom_t *ticker, uint64_t *prng_state, int32_t nticks,
 }
 
 static inline bool
-ticker_geom_tick(ticker_geom_t *ticker, uint64_t *prng_state,
-    bool delay_trigger) {
+ticker_geom_tick(
+    ticker_geom_t *ticker, uint64_t *prng_state, bool delay_trigger) {
 	return ticker_geom_ticks(ticker, prng_state, 1, delay_trigger);
 }
 
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index c06605df..84101c65 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -7,14 +7,14 @@
  * tsd_boot1, tsd_boot, tsd_booted_get, tsd_get_allocates, tsd_get, and tsd_set.
  */
 #ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
-#include "jemalloc/internal/jemalloc_preamble.h"
-#include "jemalloc/internal/tsd_malloc_thread_cleanup.h"
+#	include "jemalloc/internal/jemalloc_preamble.h"
+#	include "jemalloc/internal/tsd_malloc_thread_cleanup.h"
 #elif (defined(JEMALLOC_TLS))
-#include "jemalloc/internal/tsd_tls.h"
+#	include "jemalloc/internal/tsd_tls.h"
 #elif (defined(_WIN32))
-#include "jemalloc/internal/tsd_win.h"
+#	include "jemalloc/internal/tsd_win.h"
 #else
-#include "jemalloc/internal/tsd_generic.h"
+#	include "jemalloc/internal/tsd_generic.h"
 #endif
 
 /*
@@ -22,11 +22,10 @@
  * foo.  This omits some safety checks, and so can be used during tsd
  * initialization and cleanup.
  */
-#define O(n, t, nt)							\
-JEMALLOC_ALWAYS_INLINE t *						\
-tsd_##n##p_get_unsafe(tsd_t *tsd) {					\
-	return &tsd->TSD_MANGLE(n);					\
-}
+#define O(n, t, nt)                                                            \
+	JEMALLOC_ALWAYS_INLINE t *tsd_##n##p_get_unsafe(tsd_t *tsd) {          \
+		return &tsd->TSD_MANGLE(n);                                    \
+	}
 TSD_DATA_SLOW
 TSD_DATA_FAST
 TSD_DATA_SLOWER
@@ -59,39 +58,36 @@ TSD_DATA_SLOWER
  * tsdn_foop_get(tsdn) returns either the thread-local instance of foo (if tsdn
  * isn't NULL), or NULL (if tsdn is NULL), cast to the nullable pointer type.
  */
-#define O(n, t, nt)							\
-JEMALLOC_ALWAYS_INLINE nt *						\
-tsdn_##n##p_get(tsdn_t *tsdn) {						\
-	if (tsdn_null(tsdn)) {						\
-		return NULL;						\
-	}								\
-	tsd_t *tsd = tsdn_tsd(tsdn);					\
-	return (nt *)tsd_##n##p_get(tsd);				\
-}
+#define O(n, t, nt)                                                            \
+	JEMALLOC_ALWAYS_INLINE nt *tsdn_##n##p_get(tsdn_t *tsdn) {             \
+		if (tsdn_null(tsdn)) {                                         \
+			return NULL;                                           \
+		}                                                              \
+		tsd_t *tsd = tsdn_tsd(tsdn);                                   \
+		return (nt *)tsd_##n##p_get(tsd);                              \
+	}
 TSD_DATA_SLOW
 TSD_DATA_FAST
 TSD_DATA_SLOWER
 #undef O
 
 /* tsd_foo_get(tsd) returns the value of the thread-local instance of foo. */
-#define O(n, t, nt)							\
-JEMALLOC_ALWAYS_INLINE t						\
-tsd_##n##_get(tsd_t *tsd) {						\
-	return *tsd_##n##p_get(tsd);					\
-}
+#define O(n, t, nt)                                                            \
+	JEMALLOC_ALWAYS_INLINE t tsd_##n##_get(tsd_t *tsd) {                   \
+		return *tsd_##n##p_get(tsd);                                   \
+	}
 TSD_DATA_SLOW
 TSD_DATA_FAST
 TSD_DATA_SLOWER
 #undef O
 
 /* tsd_foo_set(tsd, val) updates the thread-local instance of foo to be val. */
-#define O(n, t, nt)							\
-JEMALLOC_ALWAYS_INLINE void						\
-tsd_##n##_set(tsd_t *tsd, t val) {					\
-	assert(tsd_state_get(tsd) != tsd_state_reincarnated &&		\
-	    tsd_state_get(tsd) != tsd_state_minimal_initialized);	\
-	*tsd_##n##p_get(tsd) = val;					\
-}
+#define O(n, t, nt)                                                            \
+	JEMALLOC_ALWAYS_INLINE void tsd_##n##_set(tsd_t *tsd, t val) {         \
+		assert(tsd_state_get(tsd) != tsd_state_reincarnated            \
+		    && tsd_state_get(tsd) != tsd_state_minimal_initialized);   \
+		*tsd_##n##p_get(tsd) = val;                                    \
+	}
 TSD_DATA_SLOW
 TSD_DATA_FAST
 TSD_DATA_SLOWER
@@ -104,8 +100,8 @@ tsd_assert_fast(tsd_t *tsd) {
 	 * counters; it's not in general possible to ensure that they won't
 	 * change asynchronously from underneath us.
 	 */
-	assert(!malloc_slow && tsd_tcache_enabled_get(tsd) &&
-	    tsd_reentrancy_level_get(tsd) == 0);
+	assert(!malloc_slow && tsd_tcache_enabled_get(tsd)
+	    && tsd_reentrancy_level_get(tsd) == 0);
 }
 
 JEMALLOC_ALWAYS_INLINE bool
@@ -194,8 +190,8 @@ tsdn_rtree_ctx(tsdn_t *tsdn, rtree_ctx_t *fallback) {
 
 static inline bool
 tsd_state_nocleanup(tsd_t *tsd) {
-	return tsd_state_get(tsd) == tsd_state_reincarnated ||
-	    tsd_state_get(tsd) == tsd_state_minimal_initialized;
+	return tsd_state_get(tsd) == tsd_state_reincarnated
+	    || tsd_state_get(tsd) == tsd_state_minimal_initialized;
 }
 
 /*
diff --git a/include/jemalloc/internal/tsd_generic.h b/include/jemalloc/internal/tsd_generic.h
index aa8042a4..e049766f 100644
--- a/include/jemalloc/internal/tsd_generic.h
+++ b/include/jemalloc/internal/tsd_generic.h
@@ -1,5 +1,5 @@
 #ifdef JEMALLOC_INTERNAL_TSD_GENERIC_H
-#error This file should be included only once, by tsd.h.
+#	error This file should be included only once, by tsd.h.
 #endif
 #define JEMALLOC_INTERNAL_TSD_GENERIC_H
 
@@ -12,25 +12,24 @@ typedef struct tsd_init_block_s tsd_init_block_t;
 struct tsd_init_block_s {
 	ql_elm(tsd_init_block_t) link;
 	pthread_t thread;
-	void *data;
+	void     *data;
 };
 
 /* Defined in tsd.c, to allow the mutex headers to have tsd dependencies. */
 typedef struct tsd_init_head_s tsd_init_head_t;
 
 typedef struct {
-	bool initialized;
+	bool  initialized;
 	tsd_t val;
 } tsd_wrapper_t;
 
-void *tsd_init_check_recursion(tsd_init_head_t *head,
-    tsd_init_block_t *block);
-void tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block);
+void *tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block);
+void  tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block);
 
-extern pthread_key_t tsd_tsd;
+extern pthread_key_t   tsd_tsd;
 extern tsd_init_head_t tsd_init_head;
-extern tsd_wrapper_t tsd_boot_wrapper;
-extern bool tsd_booted;
+extern tsd_wrapper_t   tsd_boot_wrapper;
+extern bool            tsd_booted;
 
 /* Initialization/cleanup. */
 JEMALLOC_ALWAYS_INLINE void
@@ -42,8 +41,8 @@ tsd_cleanup_wrapper(void *arg) {
 		tsd_cleanup(&wrapper->val);
 		if (wrapper->initialized) {
 			/* Trigger another cleanup round. */
-			if (pthread_setspecific(tsd_tsd, (void *)wrapper) != 0)
-			{
+			if (pthread_setspecific(tsd_tsd, (void *)wrapper)
+			    != 0) {
 				malloc_write("<jemalloc>: Error setting TSD\n");
 				if (opt_abort) {
 					abort();
@@ -78,23 +77,23 @@ tsd_wrapper_get(bool init) {
 
 	if (init && unlikely(wrapper == NULL)) {
 		tsd_init_block_t block;
-		wrapper = (tsd_wrapper_t *)
-		    tsd_init_check_recursion(&tsd_init_head, &block);
+		wrapper = (tsd_wrapper_t *)tsd_init_check_recursion(
+		    &tsd_init_head, &block);
 		if (wrapper) {
 			return wrapper;
 		}
-		wrapper = (tsd_wrapper_t *)
-		    malloc_tsd_malloc(sizeof(tsd_wrapper_t));
+		wrapper = (tsd_wrapper_t *)malloc_tsd_malloc(
+		    sizeof(tsd_wrapper_t));
 		block.data = (void *)wrapper;
 		if (wrapper == NULL) {
 			malloc_write("<jemalloc>: Error allocating TSD\n");
 			abort();
 		} else {
 			wrapper->initialized = false;
-      JEMALLOC_DIAGNOSTIC_PUSH
-      JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
+			JEMALLOC_DIAGNOSTIC_PUSH
+			JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
 			tsd_t initializer = TSD_INITIALIZER;
-      JEMALLOC_DIAGNOSTIC_POP
+			JEMALLOC_DIAGNOSTIC_POP
 			wrapper->val = initializer;
 		}
 		tsd_wrapper_set(wrapper);
@@ -105,11 +104,11 @@ tsd_wrapper_get(bool init) {
 
 JEMALLOC_ALWAYS_INLINE bool
 tsd_boot0(void) {
-	tsd_wrapper_t *wrapper;
+	tsd_wrapper_t   *wrapper;
 	tsd_init_block_t block;
 
-	wrapper = (tsd_wrapper_t *)
-	    tsd_init_check_recursion(&tsd_init_head, &block);
+	wrapper = (tsd_wrapper_t *)tsd_init_check_recursion(
+	    &tsd_init_head, &block);
 	if (wrapper) {
 		return false;
 	}
@@ -134,10 +133,10 @@ tsd_boot1(void) {
 	tsd_boot_wrapper.initialized = false;
 	tsd_cleanup(&tsd_boot_wrapper.val);
 	wrapper->initialized = false;
-  JEMALLOC_DIAGNOSTIC_PUSH
-  JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
+	JEMALLOC_DIAGNOSTIC_PUSH
+	JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
 	tsd_t initializer = TSD_INITIALIZER;
-  JEMALLOC_DIAGNOSTIC_POP
+	JEMALLOC_DIAGNOSTIC_POP
 	wrapper->val = initializer;
 	tsd_wrapper_set(wrapper);
 }
diff --git a/include/jemalloc/internal/tsd_internals.h b/include/jemalloc/internal/tsd_internals.h
index 69b60519..f675587d 100644
--- a/include/jemalloc/internal/tsd_internals.h
+++ b/include/jemalloc/internal/tsd_internals.h
@@ -48,123 +48,113 @@
 
 #ifdef JEMALLOC_JET
 typedef void (*test_callback_t)(int *);
-#  define MALLOC_TSD_TEST_DATA_INIT 0x72b65c10
-#  define MALLOC_TEST_TSD \
-    O(test_data,		int,			int)		\
-    O(test_callback,		test_callback_t,	int)
-#  define MALLOC_TEST_TSD_INITIALIZER , MALLOC_TSD_TEST_DATA_INIT, NULL
+#	define MALLOC_TSD_TEST_DATA_INIT 0x72b65c10
+#	define MALLOC_TEST_TSD                                                \
+		O(test_data, int, int)                                         \
+		O(test_callback, test_callback_t, int)
+#	define MALLOC_TEST_TSD_INITIALIZER , MALLOC_TSD_TEST_DATA_INIT, NULL
 #else
-#  define MALLOC_TEST_TSD
-#  define MALLOC_TEST_TSD_INITIALIZER
+#	define MALLOC_TEST_TSD
+#	define MALLOC_TEST_TSD_INITIALIZER
 #endif
 
 typedef ql_elm(tsd_t) tsd_link_t;
 
 /*  O(name,			type,			nullable type) */
-#define TSD_DATA_SLOW							\
-    O(tcache_enabled,		bool,			bool)		\
-    O(reentrancy_level,		int8_t,			int8_t)		\
-    O(min_init_state_nfetched,		uint8_t,	uint8_t)	\
-    O(thread_allocated_last_event,	uint64_t,	uint64_t)	\
-    O(thread_allocated_next_event,	uint64_t,	uint64_t)	\
-    O(thread_deallocated_last_event,	uint64_t,	uint64_t)	\
-    O(thread_deallocated_next_event,	uint64_t,	uint64_t)	\
-    O(te_data, 			te_data_t,		te_data_t)	\
-    O(prof_sample_last_event,	uint64_t,		uint64_t)	\
-    O(stats_interval_last_event, uint64_t, 		uint64_t)	\
-    O(prof_tdata,		prof_tdata_t *,		prof_tdata_t *)	\
-    O(prng_state,		uint64_t,		uint64_t)	\
-    O(san_extents_until_guard_small,	uint64_t,	uint64_t)	\
-    O(san_extents_until_guard_large,	uint64_t,	uint64_t)	\
-    O(iarena,			arena_t *,		arena_t *)	\
-    O(arena,			arena_t *,		arena_t *)	\
-    O(arena_decay_ticker,	ticker_geom_t,		ticker_geom_t)	\
-    O(sec_shard,		uint8_t,		uint8_t)	\
-    O(binshards,		tsd_binshards_t,	tsd_binshards_t)\
-    O(tsd_link,			tsd_link_t,		tsd_link_t)	\
-    O(in_hook,			bool,			bool)		\
-    O(peak,			peak_t,			peak_t)		\
-    O(activity_callback_thunk,	activity_callback_thunk_t,		\
-	activity_callback_thunk_t)					\
-    O(tcache_slow,		tcache_slow_t,		tcache_slow_t)	\
-    O(rtree_ctx,		rtree_ctx_t,		rtree_ctx_t)
+#define TSD_DATA_SLOW                                                          \
+	O(tcache_enabled, bool, bool)                                          \
+	O(reentrancy_level, int8_t, int8_t)                                    \
+	O(min_init_state_nfetched, uint8_t, uint8_t)                           \
+	O(thread_allocated_last_event, uint64_t, uint64_t)                     \
+	O(thread_allocated_next_event, uint64_t, uint64_t)                     \
+	O(thread_deallocated_last_event, uint64_t, uint64_t)                   \
+	O(thread_deallocated_next_event, uint64_t, uint64_t)                   \
+	O(te_data, te_data_t, te_data_t)                                       \
+	O(prof_sample_last_event, uint64_t, uint64_t)                          \
+	O(stats_interval_last_event, uint64_t, uint64_t)                       \
+	O(prof_tdata, prof_tdata_t *, prof_tdata_t *)                          \
+	O(prng_state, uint64_t, uint64_t)                                      \
+	O(san_extents_until_guard_small, uint64_t, uint64_t)                   \
+	O(san_extents_until_guard_large, uint64_t, uint64_t)                   \
+	O(iarena, arena_t *, arena_t *)                                        \
+	O(arena, arena_t *, arena_t *)                                         \
+	O(arena_decay_ticker, ticker_geom_t, ticker_geom_t)                    \
+	O(sec_shard, uint8_t, uint8_t)                                         \
+	O(binshards, tsd_binshards_t, tsd_binshards_t)                         \
+	O(tsd_link, tsd_link_t, tsd_link_t)                                    \
+	O(in_hook, bool, bool)                                                 \
+	O(peak, peak_t, peak_t)                                                \
+	O(activity_callback_thunk, activity_callback_thunk_t,                  \
+	    activity_callback_thunk_t)                                         \
+	O(tcache_slow, tcache_slow_t, tcache_slow_t)                           \
+	O(rtree_ctx, rtree_ctx_t, rtree_ctx_t)
 
-#define TSD_DATA_SLOW_INITIALIZER					\
-    /* tcache_enabled */	TCACHE_ENABLED_ZERO_INITIALIZER,	\
-    /* reentrancy_level */	0,					\
-    /* min_init_state_nfetched */	0,				\
-    /* thread_allocated_last_event */	0,				\
-    /* thread_allocated_next_event */	0,				\
-    /* thread_deallocated_last_event */	0,				\
-    /* thread_deallocated_next_event */	0,				\
-    /* te_data */			TE_DATA_INITIALIZER,   		\
-    /* prof_sample_last_event */	0,				\
-    /* stats_interval_last_event */	0,				\
-    /* prof_tdata */		NULL,					\
-    /* prng_state */		0,					\
-    /* san_extents_until_guard_small */	0,				\
-    /* san_extents_until_guard_large */	0,				\
-    /* iarena */		NULL,					\
-    /* arena */			NULL,					\
-    /* arena_decay_ticker */						\
-	TICKER_GEOM_INIT(ARENA_DECAY_NTICKS_PER_UPDATE),		\
-    /* sec_shard */		(uint8_t)-1,				\
-    /* binshards */		TSD_BINSHARDS_ZERO_INITIALIZER,		\
-    /* tsd_link */		{NULL},					\
-    /* in_hook */		false,					\
-    /* peak */			PEAK_INITIALIZER,			\
-    /* activity_callback_thunk */					\
-	ACTIVITY_CALLBACK_THUNK_INITIALIZER,				\
-    /* tcache_slow */		TCACHE_SLOW_ZERO_INITIALIZER,		\
-    /* rtree_ctx */		RTREE_CTX_INITIALIZER,
+#define TSD_DATA_SLOW_INITIALIZER                                              \
+	/* tcache_enabled */ TCACHE_ENABLED_ZERO_INITIALIZER,                  \
+	    /* reentrancy_level */ 0, /* min_init_state_nfetched */ 0,         \
+	    /* thread_allocated_last_event */ 0,                               \
+	    /* thread_allocated_next_event */ 0,                               \
+	    /* thread_deallocated_last_event */ 0,                             \
+	    /* thread_deallocated_next_event */ 0,                             \
+	    /* te_data */ TE_DATA_INITIALIZER, /* prof_sample_last_event */ 0, \
+	    /* stats_interval_last_event */ 0, /* prof_tdata */ NULL,          \
+	    /* prng_state */ 0, /* san_extents_until_guard_small */ 0,         \
+	    /* san_extents_until_guard_large */ 0, /* iarena */ NULL,          \
+	    /* arena */ NULL, /* arena_decay_ticker */                         \
+	    TICKER_GEOM_INIT(ARENA_DECAY_NTICKS_PER_UPDATE),                   \
+	    /* sec_shard */ (uint8_t) - 1,                                     \
+	    /* binshards */ TSD_BINSHARDS_ZERO_INITIALIZER,                    \
+	    /* tsd_link */ {NULL}, /* in_hook */ false,                        \
+	    /* peak */ PEAK_INITIALIZER, /* activity_callback_thunk */         \
+	    ACTIVITY_CALLBACK_THUNK_INITIALIZER,                               \
+	    /* tcache_slow */ TCACHE_SLOW_ZERO_INITIALIZER,                    \
+	    /* rtree_ctx */ RTREE_CTX_INITIALIZER,
 
 /*  O(name,			type,			nullable type) */
-#define TSD_DATA_FAST							\
-    O(thread_allocated,		uint64_t,		uint64_t)	\
-    O(thread_allocated_next_event_fast,	uint64_t,	uint64_t)	\
-    O(thread_deallocated,	uint64_t,		uint64_t)	\
-    O(thread_deallocated_next_event_fast, uint64_t,	uint64_t)	\
-    O(tcache,			tcache_t,		tcache_t)
+#define TSD_DATA_FAST                                                          \
+	O(thread_allocated, uint64_t, uint64_t)                                \
+	O(thread_allocated_next_event_fast, uint64_t, uint64_t)                \
+	O(thread_deallocated, uint64_t, uint64_t)                              \
+	O(thread_deallocated_next_event_fast, uint64_t, uint64_t)              \
+	O(tcache, tcache_t, tcache_t)
 
-#define TSD_DATA_FAST_INITIALIZER					\
-    /* thread_allocated */	0,					\
-    /* thread_allocated_next_event_fast */ 0, 				\
-    /* thread_deallocated */	0,					\
-    /* thread_deallocated_next_event_fast */	0,			\
-    /* tcache */		TCACHE_ZERO_INITIALIZER,
+#define TSD_DATA_FAST_INITIALIZER                                              \
+	/* thread_allocated */ 0, /* thread_allocated_next_event_fast */ 0,    \
+	    /* thread_deallocated */ 0,                                        \
+	    /* thread_deallocated_next_event_fast */ 0,                        \
+	    /* tcache */ TCACHE_ZERO_INITIALIZER,
 
 /*  O(name,			type,			nullable type) */
-#define TSD_DATA_SLOWER							\
-    O(witness_tsd,              witness_tsd_t,		witness_tsdn_t)	\
-    MALLOC_TEST_TSD
+#define TSD_DATA_SLOWER                                                        \
+	O(witness_tsd, witness_tsd_t, witness_tsdn_t)                          \
+	MALLOC_TEST_TSD
 
-#define TSD_DATA_SLOWER_INITIALIZER					\
-    /* witness */		WITNESS_TSD_INITIALIZER			\
-    /* test data */		MALLOC_TEST_TSD_INITIALIZER
+#define TSD_DATA_SLOWER_INITIALIZER                                            \
+	/* witness */ WITNESS_TSD_INITIALIZER                                  \
+	/* test data */ MALLOC_TEST_TSD_INITIALIZER
 
-
-#define TSD_INITIALIZER {						\
-    				TSD_DATA_SLOW_INITIALIZER		\
-    /* state */			ATOMIC_INIT(tsd_state_uninitialized),	\
-    				TSD_DATA_FAST_INITIALIZER		\
-    				TSD_DATA_SLOWER_INITIALIZER		\
-}
+#define TSD_INITIALIZER                                                        \
+	{                                                                      \
+		TSD_DATA_SLOW_INITIALIZER                                      \
+		/* state */ ATOMIC_INIT(tsd_state_uninitialized),              \
+		    TSD_DATA_FAST_INITIALIZER TSD_DATA_SLOWER_INITIALIZER      \
+	}
 
 #if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32)
 void _malloc_tsd_cleanup_register(bool (*f)(void));
 #endif
 
-void *malloc_tsd_malloc(size_t size);
-void malloc_tsd_dalloc(void *wrapper);
+void  *malloc_tsd_malloc(size_t size);
+void   malloc_tsd_dalloc(void *wrapper);
 tsd_t *malloc_tsd_boot0(void);
-void malloc_tsd_boot1(void);
-void tsd_cleanup(void *arg);
+void   malloc_tsd_boot1(void);
+void   tsd_cleanup(void *arg);
 tsd_t *tsd_fetch_slow(tsd_t *tsd, bool minimal);
-void tsd_state_set(tsd_t *tsd, uint8_t new_state);
-void tsd_slow_update(tsd_t *tsd);
-void tsd_prefork(tsd_t *tsd);
-void tsd_postfork_parent(tsd_t *tsd);
-void tsd_postfork_child(tsd_t *tsd);
+void   tsd_state_set(tsd_t *tsd, uint8_t new_state);
+void   tsd_slow_update(tsd_t *tsd);
+void   tsd_prefork(tsd_t *tsd);
+void   tsd_postfork_parent(tsd_t *tsd);
+void   tsd_postfork_child(tsd_t *tsd);
 
 /*
  * Call ..._inc when your module wants to take all threads down the slow paths,
@@ -224,15 +214,15 @@ enum {
 #define TSD_MANGLE(n) cant_access_tsd_items_directly_use_a_getter_or_setter_##n
 
 #ifdef JEMALLOC_U8_ATOMICS
-#  define tsd_state_t atomic_u8_t
-#  define tsd_atomic_load atomic_load_u8
-#  define tsd_atomic_store atomic_store_u8
-#  define tsd_atomic_exchange atomic_exchange_u8
+#	define tsd_state_t atomic_u8_t
+#	define tsd_atomic_load atomic_load_u8
+#	define tsd_atomic_store atomic_store_u8
+#	define tsd_atomic_exchange atomic_exchange_u8
 #else
-#  define tsd_state_t atomic_u32_t
-#  define tsd_atomic_load atomic_load_u32
-#  define tsd_atomic_store atomic_store_u32
-#  define tsd_atomic_exchange atomic_exchange_u32
+#	define tsd_state_t atomic_u32_t
+#	define tsd_atomic_load atomic_load_u32
+#	define tsd_atomic_store atomic_store_u32
+#	define tsd_atomic_exchange atomic_exchange_u32
 #endif
 
 /* The actual tsd. */
@@ -243,8 +233,7 @@ struct tsd_s {
 	 * setters below.
 	 */
 
-#define O(n, t, nt)							\
-	t TSD_MANGLE(n);
+#define O(n, t, nt) t TSD_MANGLE(n);
 
 	TSD_DATA_SLOW
 	/*
diff --git a/include/jemalloc/internal/tsd_malloc_thread_cleanup.h b/include/jemalloc/internal/tsd_malloc_thread_cleanup.h
index fb9ea1b4..00756df1 100644
--- a/include/jemalloc/internal/tsd_malloc_thread_cleanup.h
+++ b/include/jemalloc/internal/tsd_malloc_thread_cleanup.h
@@ -1,5 +1,5 @@
 #ifdef JEMALLOC_INTERNAL_TSD_MALLOC_THREAD_CLEANUP_H
-#error This file should be included only once, by tsd.h.
+#	error This file should be included only once, by tsd.h.
 #endif
 #define JEMALLOC_INTERNAL_TSD_MALLOC_THREAD_CLEANUP_H
 
diff --git a/include/jemalloc/internal/tsd_tls.h b/include/jemalloc/internal/tsd_tls.h
index 5e5a6e5e..6536eb54 100644
--- a/include/jemalloc/internal/tsd_tls.h
+++ b/include/jemalloc/internal/tsd_tls.h
@@ -1,5 +1,5 @@
 #ifdef JEMALLOC_INTERNAL_TSD_TLS_H
-#error This file should be included only once, by tsd.h.
+#	error This file should be included only once, by tsd.h.
 #endif
 #define JEMALLOC_INTERNAL_TSD_TLS_H
 
@@ -11,7 +11,7 @@
 
 extern JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls;
 extern pthread_key_t tsd_tsd;
-extern bool tsd_booted;
+extern bool          tsd_booted;
 
 /* Initialization/cleanup. */
 JEMALLOC_ALWAYS_INLINE bool
diff --git a/include/jemalloc/internal/tsd_types.h b/include/jemalloc/internal/tsd_types.h
index 73bbe486..46479506 100644
--- a/include/jemalloc/internal/tsd_types.h
+++ b/include/jemalloc/internal/tsd_types.h
@@ -1,11 +1,11 @@
 #ifndef JEMALLOC_INTERNAL_TSD_TYPES_H
 #define JEMALLOC_INTERNAL_TSD_TYPES_H
 
-#define MALLOC_TSD_CLEANUPS_MAX	4
+#define MALLOC_TSD_CLEANUPS_MAX 4
 
 #include "jemalloc/internal/jemalloc_preamble.h"
 
-typedef struct tsd_s tsd_t;
+typedef struct tsd_s  tsd_t;
 typedef struct tsdn_s tsdn_t;
 typedef bool (*malloc_tsd_cleanup_t)(void);
 
diff --git a/include/jemalloc/internal/tsd_win.h b/include/jemalloc/internal/tsd_win.h
index 559ee78f..8b22bec1 100644
--- a/include/jemalloc/internal/tsd_win.h
+++ b/include/jemalloc/internal/tsd_win.h
@@ -1,5 +1,5 @@
 #ifdef JEMALLOC_INTERNAL_TSD_WIN_H
-#error This file should be included only once, by tsd.h.
+#	error This file should be included only once, by tsd.h.
 #endif
 #define JEMALLOC_INTERNAL_TSD_WIN_H
 
@@ -13,29 +13,29 @@
    than a type cast. */
 typedef struct {
 	tsd_t val;
-	bool initialized;
+	bool  initialized;
 } tsd_wrapper_t;
 
 #if defined(JEMALLOC_LEGACY_WINDOWS_SUPPORT) || !defined(_MSC_VER)
 
-extern DWORD tsd_tsd;
+extern DWORD         tsd_tsd;
 extern tsd_wrapper_t tsd_boot_wrapper;
-extern bool tsd_booted;
-#if defined(_M_ARM64EC)
-#define JEMALLOC_WIN32_TLSGETVALUE2 0
-#else
-#define JEMALLOC_WIN32_TLSGETVALUE2 1
-#endif
-#if JEMALLOC_WIN32_TLSGETVALUE2
-typedef LPVOID (WINAPI *TGV2)(DWORD dwTlsIndex);
-extern TGV2 tls_get_value2;
+extern bool          tsd_booted;
+#        if defined(_M_ARM64EC)
+#                define JEMALLOC_WIN32_TLSGETVALUE2 0
+#        else
+#                define JEMALLOC_WIN32_TLSGETVALUE2 1
+#        endif
+#        if JEMALLOC_WIN32_TLSGETVALUE2
+typedef LPVOID(WINAPI *TGV2)(DWORD dwTlsIndex);
+extern TGV2    tls_get_value2;
 extern HMODULE tgv2_mod;
-#endif
+#	endif
 
 /* Initialization/cleanup. */
 JEMALLOC_ALWAYS_INLINE bool
 tsd_cleanup_wrapper(void) {
-	DWORD error = GetLastError();
+	DWORD          error = GetLastError();
 	tsd_wrapper_t *wrapper = (tsd_wrapper_t *)TlsGetValue(tsd_tsd);
 	SetLastError(error);
 
@@ -66,20 +66,20 @@ tsd_wrapper_set(tsd_wrapper_t *wrapper) {
 JEMALLOC_ALWAYS_INLINE tsd_wrapper_t *
 tsd_wrapper_get(bool init) {
 	tsd_wrapper_t *wrapper;
-#if JEMALLOC_WIN32_TLSGETVALUE2
+#	if JEMALLOC_WIN32_TLSGETVALUE2
 	if (tls_get_value2 != NULL) {
-		wrapper = (tsd_wrapper_t *) tls_get_value2(tsd_tsd);
+		wrapper = (tsd_wrapper_t *)tls_get_value2(tsd_tsd);
 	} else
-#endif
+#	endif
 	{
 		DWORD error = GetLastError();
-		wrapper = (tsd_wrapper_t *) TlsGetValue(tsd_tsd);
+		wrapper = (tsd_wrapper_t *)TlsGetValue(tsd_tsd);
 		SetLastError(error);
 	}
 
 	if (init && unlikely(wrapper == NULL)) {
-		wrapper = (tsd_wrapper_t *)
-		    malloc_tsd_malloc(sizeof(tsd_wrapper_t));
+		wrapper = (tsd_wrapper_t *)malloc_tsd_malloc(
+		    sizeof(tsd_wrapper_t));
 		if (wrapper == NULL) {
 			malloc_write("<jemalloc>: Error allocating TSD\n");
 			abort();
@@ -102,12 +102,12 @@ tsd_boot0(void) {
 	}
 	_malloc_tsd_cleanup_register(&tsd_cleanup_wrapper);
 	tsd_wrapper_set(&tsd_boot_wrapper);
-#if JEMALLOC_WIN32_TLSGETVALUE2
+#	if JEMALLOC_WIN32_TLSGETVALUE2
 	tgv2_mod = LoadLibraryA("api-ms-win-core-processthreads-l1-1-8.dll");
 	if (tgv2_mod != NULL) {
 		tls_get_value2 = (TGV2)GetProcAddress(tgv2_mod, "TlsGetValue2");
 	}
-#endif
+#	endif
 	tsd_booted = true;
 	return false;
 }
@@ -115,8 +115,7 @@ tsd_boot0(void) {
 JEMALLOC_ALWAYS_INLINE void
 tsd_boot1(void) {
 	tsd_wrapper_t *wrapper;
-	wrapper = (tsd_wrapper_t *)
-	    malloc_tsd_malloc(sizeof(tsd_wrapper_t));
+	wrapper = (tsd_wrapper_t *)malloc_tsd_malloc(sizeof(tsd_wrapper_t));
 	if (wrapper == NULL) {
 		malloc_write("<jemalloc>: Error allocating TSD\n");
 		abort();
@@ -174,7 +173,7 @@ tsd_set(tsd_t *val) {
 
 #else // defined(JEMALLOC_LEGACY_WINDOWS_SUPPORT) || !defined(_MSC_VER)
 
-#define JEMALLOC_TSD_TYPE_ATTR(type) __declspec(thread) type
+#	define JEMALLOC_TSD_TYPE_ATTR(type) __declspec(thread) type
 
 extern JEMALLOC_TSD_TYPE_ATTR(tsd_wrapper_t) tsd_wrapper_tls;
 extern bool tsd_booted;
diff --git a/include/jemalloc/internal/typed_list.h b/include/jemalloc/internal/typed_list.h
index 7c4826fc..78704e48 100644
--- a/include/jemalloc/internal/typed_list.h
+++ b/include/jemalloc/internal/typed_list.h
@@ -6,54 +6,49 @@
  * bit easier to use; it handles ql_elm_new calls and provides type safety.
  */
 
-#define TYPED_LIST(list_type, el_type, linkage)				\
-typedef struct {							\
-	ql_head(el_type) head;						\
-} list_type##_t;							\
-static inline void							\
-list_type##_init(list_type##_t *list) {					\
-	ql_new(&list->head);						\
-}									\
-static inline el_type *							\
-list_type##_first(const list_type##_t *list) {				\
-	return ql_first(&list->head);					\
-}									\
-static inline el_type *							\
-list_type##_last(const list_type##_t *list) {				\
-	return ql_last(&list->head, linkage);				\
-}									\
-static inline el_type *							\
-list_type##_next(const list_type##_t *list, el_type *item) {		\
-	return ql_next(&list->head, item, linkage);			\
-}									\
-static inline void							\
-list_type##_append(list_type##_t *list, el_type *item) {		\
-	ql_elm_new(item, linkage);					\
-	ql_tail_insert(&list->head, item, linkage);			\
-}									\
-static inline void							\
-list_type##_prepend(list_type##_t *list, el_type *item) {		\
-	ql_elm_new(item, linkage);					\
-	ql_head_insert(&list->head, item, linkage);			\
-}									\
-static inline void							\
-list_type##_replace(list_type##_t *list, el_type *to_remove,		\
-    el_type *to_insert) {						\
-	ql_elm_new(to_insert, linkage);					\
-	ql_after_insert(to_remove, to_insert, linkage);			\
-	ql_remove(&list->head, to_remove, linkage);			\
-}									\
-static inline void							\
-list_type##_remove(list_type##_t *list, el_type *item) {		\
-	ql_remove(&list->head, item, linkage);				\
-}									\
-static inline bool							\
-list_type##_empty(list_type##_t *list) {				\
-	return ql_empty(&list->head);					\
-}									\
-static inline void							\
-list_type##_concat(list_type##_t *list_a, list_type##_t *list_b) {	\
-	ql_concat(&list_a->head, &list_b->head, linkage);		\
-}
+#define TYPED_LIST(list_type, el_type, linkage)                                \
+	typedef struct {                                                       \
+		ql_head(el_type) head;                                         \
+	} list_type##_t;                                                       \
+	static inline void list_type##_init(list_type##_t *list) {             \
+		ql_new(&list->head);                                           \
+	}                                                                      \
+	static inline el_type *list_type##_first(const list_type##_t *list) {  \
+		return ql_first(&list->head);                                  \
+	}                                                                      \
+	static inline el_type *list_type##_last(const list_type##_t *list) {   \
+		return ql_last(&list->head, linkage);                          \
+	}                                                                      \
+	static inline el_type *list_type##_next(                               \
+	    const list_type##_t *list, el_type *item) {                        \
+		return ql_next(&list->head, item, linkage);                    \
+	}                                                                      \
+	static inline void list_type##_append(                                 \
+	    list_type##_t *list, el_type *item) {                              \
+		ql_elm_new(item, linkage);                                     \
+		ql_tail_insert(&list->head, item, linkage);                    \
+	}                                                                      \
+	static inline void list_type##_prepend(                                \
+	    list_type##_t *list, el_type *item) {                              \
+		ql_elm_new(item, linkage);                                     \
+		ql_head_insert(&list->head, item, linkage);                    \
+	}                                                                      \
+	static inline void list_type##_replace(                                \
+	    list_type##_t *list, el_type *to_remove, el_type *to_insert) {     \
+		ql_elm_new(to_insert, linkage);                                \
+		ql_after_insert(to_remove, to_insert, linkage);                \
+		ql_remove(&list->head, to_remove, linkage);                    \
+	}                                                                      \
+	static inline void list_type##_remove(                                 \
+	    list_type##_t *list, el_type *item) {                              \
+		ql_remove(&list->head, item, linkage);                         \
+	}                                                                      \
+	static inline bool list_type##_empty(list_type##_t *list) {            \
+		return ql_empty(&list->head);                                  \
+	}                                                                      \
+	static inline void list_type##_concat(                                 \
+	    list_type##_t *list_a, list_type##_t *list_b) {                    \
+		ql_concat(&list_a->head, &list_b->head, linkage);              \
+	}
 
 #endif /* JEMALLOC_INTERNAL_TYPED_LIST_H */
diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h
index 35aa26e6..bf246c95 100644
--- a/include/jemalloc/internal/util.h
+++ b/include/jemalloc/internal/util.h
@@ -8,10 +8,10 @@
 
 /* Junk fill patterns. */
 #ifndef JEMALLOC_ALLOC_JUNK
-#  define JEMALLOC_ALLOC_JUNK	((uint8_t)0xa5)
+#	define JEMALLOC_ALLOC_JUNK ((uint8_t)0xa5)
 #endif
 #ifndef JEMALLOC_FREE_JUNK
-#  define JEMALLOC_FREE_JUNK	((uint8_t)0x5a)
+#	define JEMALLOC_FREE_JUNK ((uint8_t)0x5a)
 #endif
 
 /*
@@ -32,20 +32,20 @@
 #define JEMALLOC_CC_SILENCE_INIT(...) = __VA_ARGS__
 
 #ifdef __GNUC__
-#  define likely(x)   __builtin_expect(!!(x), 1)
-#  define unlikely(x) __builtin_expect(!!(x), 0)
+#	define likely(x) __builtin_expect(!!(x), 1)
+#	define unlikely(x) __builtin_expect(!!(x), 0)
 #else
-#  define likely(x)   !!(x)
-#  define unlikely(x) !!(x)
+#	define likely(x) !!(x)
+#	define unlikely(x) !!(x)
 #endif
 
 #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
-#include <stddef.h>
+#	include <stddef.h>
 #else
-#if !defined(JEMALLOC_INTERNAL_UNREACHABLE)
-#  error JEMALLOC_INTERNAL_UNREACHABLE should have been defined by configure
-#endif
-#define unreachable() JEMALLOC_INTERNAL_UNREACHABLE()
+#	if !defined(JEMALLOC_INTERNAL_UNREACHABLE)
+#		error JEMALLOC_INTERNAL_UNREACHABLE should have been defined by configure
+#	endif
+#	define unreachable() JEMALLOC_INTERNAL_UNREACHABLE()
 #endif
 
 /* Set error code. */
@@ -69,27 +69,27 @@ get_errno(void) {
 }
 
 #ifdef _MSC_VER
-#define util_assume __assume
-#elif defined(__clang__) && (__clang_major__ > 3 || \
-    (__clang_major__ == 3 && __clang_minor__ >= 6))
-#define util_assume __builtin_assume
+#	define util_assume __assume
+#elif defined(__clang__)                                                       \
+    && (__clang_major__ > 3 || (__clang_major__ == 3 && __clang_minor__ >= 6))
+#	define util_assume __builtin_assume
 #else
-#define util_assume(expr)		\
-	do {				\
-		if (!(expr)) {		\
-			unreachable();	\
-		}			\
-	} while(0)
+#	define util_assume(expr)                                              \
+		do {                                                           \
+			if (!(expr)) {                                         \
+				unreachable();                                 \
+			}                                                      \
+		} while (0)
 #endif
 
 /* Allows compiler constant folding on inlined paths. */
 #if defined(__has_builtin)
-#  if __has_builtin(__builtin_constant_p)
-#    define util_compile_time_const(x) __builtin_constant_p(x)
-#  endif
+#	if __has_builtin(__builtin_constant_p)
+#		define util_compile_time_const(x) __builtin_constant_p(x)
+#	endif
 #endif
 #ifndef util_compile_time_const
-#  define util_compile_time_const(x) (false)
+#	define util_compile_time_const(x) (false)
 #endif
 
 /* ptr should be valid. */
@@ -148,7 +148,6 @@ util_prefetch_write_range(void *ptr, size_t sz) {
  * key1-key2:value|key3-key4:value|...
  * Note it does not handle the ending '\0'.
  */
-bool
-multi_setting_parse_next(const char **setting_segment_cur, size_t *len_left,
-    size_t *key_start, size_t *key_end, size_t *value);
+bool multi_setting_parse_next(const char **setting_segment_cur,
+    size_t *len_left, size_t *key_start, size_t *key_end, size_t *value);
 #endif /* JEMALLOC_INTERNAL_UTIL_H */
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index acf7860d..73770713 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -66,8 +66,8 @@ enum witness_rank_e {
 	WITNESS_RANK_HOOK,
 	WITNESS_RANK_BIN,
 
-	WITNESS_RANK_LEAF=0x1000,
-	WITNESS_RANK_BATCHER=WITNESS_RANK_LEAF,
+	WITNESS_RANK_LEAF = 0x1000,
+	WITNESS_RANK_BATCHER = WITNESS_RANK_LEAF,
 	WITNESS_RANK_ARENA_STATS = WITNESS_RANK_LEAF,
 	WITNESS_RANK_COUNTER_ACCUM = WITNESS_RANK_LEAF,
 	WITNESS_RANK_DSS = WITNESS_RANK_LEAF,
@@ -86,38 +86,43 @@ typedef enum witness_rank_e witness_rank_t;
 /* PER-WITNESS DATA */
 /******************************************************************************/
 #if defined(JEMALLOC_DEBUG)
-#  define WITNESS_INITIALIZER(name, rank) {name, rank, NULL, NULL, {NULL, NULL}}
+#	define WITNESS_INITIALIZER(name, rank)                                \
+		{                                                              \
+			name, rank, NULL, NULL, {                              \
+				NULL, NULL                                     \
+			}                                                      \
+		}
 #else
-#  define WITNESS_INITIALIZER(name, rank)
+#	define WITNESS_INITIALIZER(name, rank)
 #endif
 
 typedef struct witness_s witness_t;
 typedef ql_head(witness_t) witness_list_t;
-typedef int witness_comp_t (const witness_t *, void *, const witness_t *,
-    void *);
+typedef int witness_comp_t(
+    const witness_t *, void *, const witness_t *, void *);
 
 struct witness_s {
 	/* Name, used for printing lock order reversal messages. */
-	const char		*name;
+	const char *name;
 
 	/*
 	 * Witness rank, where 0 is lowest and WITNESS_RANK_LEAF is highest.
 	 * Witnesses must be acquired in order of increasing rank.
 	 */
-	witness_rank_t		rank;
+	witness_rank_t rank;
 
 	/*
 	 * If two witnesses are of equal rank and they have the samp comp
 	 * function pointer, it is called as a last attempt to differentiate
 	 * between witnesses of equal rank.
 	 */
-	witness_comp_t		*comp;
+	witness_comp_t *comp;
 
 	/* Opaque data, passed to comp(). */
-	void			*opaque;
+	void *opaque;
 
 	/* Linkage for thread's currently owned locks. */
-	ql_elm(witness_t)	link;
+	ql_elm(witness_t) link;
 };
 
 /******************************************************************************/
@@ -126,10 +131,11 @@ struct witness_s {
 typedef struct witness_tsd_s witness_tsd_t;
 struct witness_tsd_s {
 	witness_list_t witnesses;
-	bool forking;
+	bool           forking;
 };
 
-#define WITNESS_TSD_INITIALIZER { ql_head_initializer(witnesses), false }
+#define WITNESS_TSD_INITIALIZER                                                \
+	{ ql_head_initializer(witnesses), false }
 #define WITNESS_TSDN_NULL ((witness_tsdn_t *)0)
 
 /******************************************************************************/
@@ -162,17 +168,17 @@ witness_tsdn_tsd(witness_tsdn_t *witness_tsdn) {
 void witness_init(witness_t *witness, const char *name, witness_rank_t rank,
     witness_comp_t *comp, void *opaque);
 
-typedef void (witness_lock_error_t)(const witness_list_t *, const witness_t *);
+typedef void(witness_lock_error_t)(const witness_list_t *, const witness_t *);
 extern witness_lock_error_t *JET_MUTABLE witness_lock_error;
 
-typedef void (witness_owner_error_t)(const witness_t *);
+typedef void(witness_owner_error_t)(const witness_t *);
 extern witness_owner_error_t *JET_MUTABLE witness_owner_error;
 
-typedef void (witness_not_owner_error_t)(const witness_t *);
+typedef void(witness_not_owner_error_t)(const witness_t *);
 extern witness_not_owner_error_t *JET_MUTABLE witness_not_owner_error;
 
-typedef void (witness_depth_error_t)(const witness_list_t *,
-    witness_rank_t rank_inclusive, unsigned depth);
+typedef void(witness_depth_error_t)(
+    const witness_list_t *, witness_rank_t rank_inclusive, unsigned depth);
 extern witness_depth_error_t *JET_MUTABLE witness_depth_error;
 
 void witnesses_cleanup(witness_tsd_t *witness_tsd);
@@ -184,12 +190,12 @@ void witness_postfork_child(witness_tsd_t *witness_tsd);
 static inline bool
 witness_owner(witness_tsd_t *witness_tsd, const witness_t *witness) {
 	witness_list_t *witnesses;
-	witness_t *w;
+	witness_t      *w;
 
 	cassert(config_debug);
 
 	witnesses = &witness_tsd->witnesses;
-	ql_foreach(w, witnesses, link) {
+	ql_foreach (w, witnesses, link) {
 		if (w == witness) {
 			return true;
 		}
@@ -221,11 +227,11 @@ witness_assert_owner(witness_tsdn_t *witness_tsdn, const witness_t *witness) {
 }
 
 static inline void
-witness_assert_not_owner(witness_tsdn_t *witness_tsdn,
-    const witness_t *witness) {
-	witness_tsd_t *witness_tsd;
+witness_assert_not_owner(
+    witness_tsdn_t *witness_tsdn, const witness_t *witness) {
+	witness_tsd_t  *witness_tsd;
 	witness_list_t *witnesses;
-	witness_t *w;
+	witness_t      *w;
 
 	if (!config_debug) {
 		return;
@@ -240,7 +246,7 @@ witness_assert_not_owner(witness_tsdn_t *witness_tsdn,
 	}
 
 	witnesses = &witness_tsd->witnesses;
-	ql_foreach(w, witnesses, link) {
+	ql_foreach (w, witnesses, link) {
 		if (w == witness) {
 			witness_not_owner_error(witness);
 		}
@@ -249,9 +255,9 @@ witness_assert_not_owner(witness_tsdn_t *witness_tsdn,
 
 /* Returns depth.  Not intended for direct use. */
 static inline unsigned
-witness_depth_to_rank(witness_list_t *witnesses, witness_rank_t rank_inclusive)
-{
-	unsigned d = 0;
+witness_depth_to_rank(
+    witness_list_t *witnesses, witness_rank_t rank_inclusive) {
+	unsigned   d = 0;
 	witness_t *w = ql_last(witnesses, link);
 
 	if (w != NULL) {
@@ -274,7 +280,7 @@ witness_assert_depth_to_rank(witness_tsdn_t *witness_tsdn,
 	}
 
 	witness_list_t *witnesses = &witness_tsdn_tsd(witness_tsdn)->witnesses;
-	unsigned d = witness_depth_to_rank(witnesses, rank_inclusive);
+	unsigned        d = witness_depth_to_rank(witnesses, rank_inclusive);
 
 	if (d != depth) {
 		witness_depth_error(witnesses, rank_inclusive, depth);
@@ -292,14 +298,14 @@ witness_assert_lockless(witness_tsdn_t *witness_tsdn) {
 }
 
 static inline void
-witness_assert_positive_depth_to_rank(witness_tsdn_t *witness_tsdn,
-    witness_rank_t rank_inclusive) {
+witness_assert_positive_depth_to_rank(
+    witness_tsdn_t *witness_tsdn, witness_rank_t rank_inclusive) {
 	if (!config_debug || witness_tsdn_null(witness_tsdn)) {
 		return;
 	}
 
 	witness_list_t *witnesses = &witness_tsdn_tsd(witness_tsdn)->witnesses;
-	unsigned d = witness_depth_to_rank(witnesses, rank_inclusive);
+	unsigned        d = witness_depth_to_rank(witnesses, rank_inclusive);
 
 	if (d == 0) {
 		witness_depth_error(witnesses, rank_inclusive, 1);
@@ -308,9 +314,9 @@ witness_assert_positive_depth_to_rank(witness_tsdn_t *witness_tsdn,
 
 static inline void
 witness_lock(witness_tsdn_t *witness_tsdn, witness_t *witness) {
-	witness_tsd_t *witness_tsd;
+	witness_tsd_t  *witness_tsd;
 	witness_list_t *witnesses;
-	witness_t *w;
+	witness_t      *w;
 
 	if (!config_debug) {
 		return;
@@ -335,9 +341,9 @@ witness_lock(witness_tsdn_t *witness_tsdn, witness_t *witness) {
 	} else if (w->rank > witness->rank) {
 		/* Not forking, rank order reversal. */
 		witness_lock_error(witnesses, witness);
-	} else if (w->rank == witness->rank && (w->comp == NULL || w->comp !=
-	    witness->comp || w->comp(w, w->opaque, witness, witness->opaque) >
-	    0)) {
+	} else if (w->rank == witness->rank
+	    && (w->comp == NULL || w->comp != witness->comp
+	        || w->comp(w, w->opaque, witness, witness->opaque) > 0)) {
 		/*
 		 * Missing/incompatible comparison function, or comparison
 		 * function indicates rank order reversal.
@@ -346,15 +352,15 @@ witness_lock(witness_tsdn_t *witness_tsdn, witness_t *witness) {
 	}
 
 	/* Suppress spurious warning from static analysis */
-	assert(ql_empty(witnesses) ||
-	    qr_prev(ql_first(witnesses), link) != NULL);
+	assert(
+	    ql_empty(witnesses) || qr_prev(ql_first(witnesses), link) != NULL);
 	ql_elm_new(witness, link);
 	ql_tail_insert(witnesses, witness, link);
 }
 
 static inline void
 witness_unlock(witness_tsdn_t *witness_tsdn, witness_t *witness) {
-	witness_tsd_t *witness_tsd;
+	witness_tsd_t  *witness_tsd;
 	witness_list_t *witnesses;
 
 	if (!config_debug) {
diff --git a/include/msvc_compat/C99/stdint.h b/include/msvc_compat/C99/stdint.h
index c66fbb81..5ee3992b 100644
--- a/include/msvc_compat/C99/stdint.h
+++ b/include/msvc_compat/C99/stdint.h
@@ -30,39 +30,39 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 #ifndef _MSC_VER // [
-#error "Use this header only with Microsoft Visual C++ compilers!"
+#	error "Use this header only with Microsoft Visual C++ compilers!"
 #endif // _MSC_VER ]
 
 #ifndef _MSC_STDINT_H_ // [
-#define _MSC_STDINT_H_
+#	define _MSC_STDINT_H_
 
-#if _MSC_VER > 1000
-#pragma once
-#endif
+#	if _MSC_VER > 1000
+#		pragma once
+#	endif
 
-#include <limits.h>
+#	include <limits.h>
 
 // For Visual Studio 6 in C++ mode and for many Visual Studio versions when
 // compiling for ARM we should wrap <wchar.h> include with 'extern "C++" {}'
 // or compiler give many errors like this:
 //   error C2733: second C linkage of overloaded function 'wmemchr' not allowed
-#ifdef __cplusplus
+#	ifdef __cplusplus
 extern "C" {
-#endif
-#  include <wchar.h>
-#ifdef __cplusplus
+#	endif
+#	include <wchar.h>
+#	ifdef __cplusplus
 }
-#endif
+#	endif
 
 // Define _W64 macros to mark types changing their size, like intptr_t.
-#ifndef _W64
-#  if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 1300
-#     define _W64 __w64
-#  else
-#     define _W64
-#  endif
-#endif
-
+#	ifndef _W64
+#		if !defined(__midl) && (defined(_X86_) || defined(_M_IX86))   \
+		    && _MSC_VER >= 1300
+#			define _W64 __w64
+#		else
+#			define _W64
+#		endif
+#	endif
 
 // 7.18.1 Integer types
 
@@ -71,177 +71,177 @@ extern "C" {
 // Visual Studio 6 and Embedded Visual C++ 4 doesn't
 // realize that, e.g. char has the same size as __int8
 // so we give up on __intX for them.
-#if (_MSC_VER < 1300)
-   typedef signed char       int8_t;
-   typedef signed short      int16_t;
-   typedef signed int        int32_t;
-   typedef unsigned char     uint8_t;
-   typedef unsigned short    uint16_t;
-   typedef unsigned int      uint32_t;
-#else
-   typedef signed __int8     int8_t;
-   typedef signed __int16    int16_t;
-   typedef signed __int32    int32_t;
-   typedef unsigned __int8   uint8_t;
-   typedef unsigned __int16  uint16_t;
-   typedef unsigned __int32  uint32_t;
-#endif
-typedef signed __int64       int64_t;
-typedef unsigned __int64     uint64_t;
-
+#	if (_MSC_VER < 1300)
+typedef signed char    int8_t;
+typedef signed short   int16_t;
+typedef signed int     int32_t;
+typedef unsigned char  uint8_t;
+typedef unsigned short uint16_t;
+typedef unsigned int   uint32_t;
+#        else
+typedef signed __int8    int8_t;
+typedef signed __int16   int16_t;
+typedef signed __int32   int32_t;
+typedef unsigned __int8  uint8_t;
+typedef unsigned __int16 uint16_t;
+typedef unsigned __int32 uint32_t;
+#        endif
+typedef signed __int64   int64_t;
+typedef unsigned __int64 uint64_t;
 
 // 7.18.1.2 Minimum-width integer types
-typedef int8_t    int_least8_t;
-typedef int16_t   int_least16_t;
-typedef int32_t   int_least32_t;
-typedef int64_t   int_least64_t;
-typedef uint8_t   uint_least8_t;
-typedef uint16_t  uint_least16_t;
-typedef uint32_t  uint_least32_t;
-typedef uint64_t  uint_least64_t;
+typedef int8_t   int_least8_t;
+typedef int16_t  int_least16_t;
+typedef int32_t  int_least32_t;
+typedef int64_t  int_least64_t;
+typedef uint8_t  uint_least8_t;
+typedef uint16_t uint_least16_t;
+typedef uint32_t uint_least32_t;
+typedef uint64_t uint_least64_t;
 
 // 7.18.1.3 Fastest minimum-width integer types
-typedef int8_t    int_fast8_t;
-typedef int16_t   int_fast16_t;
-typedef int32_t   int_fast32_t;
-typedef int64_t   int_fast64_t;
-typedef uint8_t   uint_fast8_t;
-typedef uint16_t  uint_fast16_t;
-typedef uint32_t  uint_fast32_t;
-typedef uint64_t  uint_fast64_t;
+typedef int8_t   int_fast8_t;
+typedef int16_t  int_fast16_t;
+typedef int32_t  int_fast32_t;
+typedef int64_t  int_fast64_t;
+typedef uint8_t  uint_fast8_t;
+typedef uint16_t uint_fast16_t;
+typedef uint32_t uint_fast32_t;
+typedef uint64_t uint_fast64_t;
 
 // 7.18.1.4 Integer types capable of holding object pointers
-#ifdef _WIN64 // [
-   typedef signed __int64    intptr_t;
-   typedef unsigned __int64  uintptr_t;
-#else // _WIN64 ][
-   typedef _W64 signed int   intptr_t;
-   typedef _W64 unsigned int uintptr_t;
-#endif // _WIN64 ]
+#	ifdef _WIN64 // [
+typedef signed __int64   intptr_t;
+typedef unsigned __int64 uintptr_t;
+#	else  // _WIN64 ][
+typedef _W64 signed int   intptr_t;
+typedef _W64 unsigned int uintptr_t;
+#	endif // _WIN64 ]
 
 // 7.18.1.5 Greatest-width integer types
-typedef int64_t   intmax_t;
-typedef uint64_t  uintmax_t;
-
+typedef int64_t  intmax_t;
+typedef uint64_t uintmax_t;
 
 // 7.18.2 Limits of specified-width integer types
 
-#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [   See footnote 220 at page 257 and footnote 221 at page 259
+#	if !defined(__cplusplus)                                              \
+	    || defined(                                                        \
+	        __STDC_LIMIT_MACROS) // [   See footnote 220 at page 257 and footnote 221 at page 259
 
 // 7.18.2.1 Limits of exact-width integer types
-#define INT8_MIN     ((int8_t)_I8_MIN)
-#define INT8_MAX     _I8_MAX
-#define INT16_MIN    ((int16_t)_I16_MIN)
-#define INT16_MAX    _I16_MAX
-#define INT32_MIN    ((int32_t)_I32_MIN)
-#define INT32_MAX    _I32_MAX
-#define INT64_MIN    ((int64_t)_I64_MIN)
-#define INT64_MAX    _I64_MAX
-#define UINT8_MAX    _UI8_MAX
-#define UINT16_MAX   _UI16_MAX
-#define UINT32_MAX   _UI32_MAX
-#define UINT64_MAX   _UI64_MAX
+#		define INT8_MIN ((int8_t)_I8_MIN)
+#		define INT8_MAX _I8_MAX
+#		define INT16_MIN ((int16_t)_I16_MIN)
+#		define INT16_MAX _I16_MAX
+#		define INT32_MIN ((int32_t)_I32_MIN)
+#		define INT32_MAX _I32_MAX
+#		define INT64_MIN ((int64_t)_I64_MIN)
+#		define INT64_MAX _I64_MAX
+#		define UINT8_MAX _UI8_MAX
+#		define UINT16_MAX _UI16_MAX
+#		define UINT32_MAX _UI32_MAX
+#		define UINT64_MAX _UI64_MAX
 
 // 7.18.2.2 Limits of minimum-width integer types
-#define INT_LEAST8_MIN    INT8_MIN
-#define INT_LEAST8_MAX    INT8_MAX
-#define INT_LEAST16_MIN   INT16_MIN
-#define INT_LEAST16_MAX   INT16_MAX
-#define INT_LEAST32_MIN   INT32_MIN
-#define INT_LEAST32_MAX   INT32_MAX
-#define INT_LEAST64_MIN   INT64_MIN
-#define INT_LEAST64_MAX   INT64_MAX
-#define UINT_LEAST8_MAX   UINT8_MAX
-#define UINT_LEAST16_MAX  UINT16_MAX
-#define UINT_LEAST32_MAX  UINT32_MAX
-#define UINT_LEAST64_MAX  UINT64_MAX
+#		define INT_LEAST8_MIN INT8_MIN
+#		define INT_LEAST8_MAX INT8_MAX
+#		define INT_LEAST16_MIN INT16_MIN
+#		define INT_LEAST16_MAX INT16_MAX
+#		define INT_LEAST32_MIN INT32_MIN
+#		define INT_LEAST32_MAX INT32_MAX
+#		define INT_LEAST64_MIN INT64_MIN
+#		define INT_LEAST64_MAX INT64_MAX
+#		define UINT_LEAST8_MAX UINT8_MAX
+#		define UINT_LEAST16_MAX UINT16_MAX
+#		define UINT_LEAST32_MAX UINT32_MAX
+#		define UINT_LEAST64_MAX UINT64_MAX
 
 // 7.18.2.3 Limits of fastest minimum-width integer types
-#define INT_FAST8_MIN    INT8_MIN
-#define INT_FAST8_MAX    INT8_MAX
-#define INT_FAST16_MIN   INT16_MIN
-#define INT_FAST16_MAX   INT16_MAX
-#define INT_FAST32_MIN   INT32_MIN
-#define INT_FAST32_MAX   INT32_MAX
-#define INT_FAST64_MIN   INT64_MIN
-#define INT_FAST64_MAX   INT64_MAX
-#define UINT_FAST8_MAX   UINT8_MAX
-#define UINT_FAST16_MAX  UINT16_MAX
-#define UINT_FAST32_MAX  UINT32_MAX
-#define UINT_FAST64_MAX  UINT64_MAX
+#		define INT_FAST8_MIN INT8_MIN
+#		define INT_FAST8_MAX INT8_MAX
+#		define INT_FAST16_MIN INT16_MIN
+#		define INT_FAST16_MAX INT16_MAX
+#		define INT_FAST32_MIN INT32_MIN
+#		define INT_FAST32_MAX INT32_MAX
+#		define INT_FAST64_MIN INT64_MIN
+#		define INT_FAST64_MAX INT64_MAX
+#		define UINT_FAST8_MAX UINT8_MAX
+#		define UINT_FAST16_MAX UINT16_MAX
+#		define UINT_FAST32_MAX UINT32_MAX
+#		define UINT_FAST64_MAX UINT64_MAX
 
 // 7.18.2.4 Limits of integer types capable of holding object pointers
-#ifdef _WIN64 // [
-#  define INTPTR_MIN   INT64_MIN
-#  define INTPTR_MAX   INT64_MAX
-#  define UINTPTR_MAX  UINT64_MAX
-#else // _WIN64 ][
-#  define INTPTR_MIN   INT32_MIN
-#  define INTPTR_MAX   INT32_MAX
-#  define UINTPTR_MAX  UINT32_MAX
-#endif // _WIN64 ]
+#		ifdef _WIN64 // [
+#			define INTPTR_MIN INT64_MIN
+#			define INTPTR_MAX INT64_MAX
+#			define UINTPTR_MAX UINT64_MAX
+#		else // _WIN64 ][
+#			define INTPTR_MIN INT32_MIN
+#			define INTPTR_MAX INT32_MAX
+#			define UINTPTR_MAX UINT32_MAX
+#		endif // _WIN64 ]
 
 // 7.18.2.5 Limits of greatest-width integer types
-#define INTMAX_MIN   INT64_MIN
-#define INTMAX_MAX   INT64_MAX
-#define UINTMAX_MAX  UINT64_MAX
+#		define INTMAX_MIN INT64_MIN
+#		define INTMAX_MAX INT64_MAX
+#		define UINTMAX_MAX UINT64_MAX
 
 // 7.18.3 Limits of other integer types
 
-#ifdef _WIN64 // [
-#  define PTRDIFF_MIN  _I64_MIN
-#  define PTRDIFF_MAX  _I64_MAX
-#else  // _WIN64 ][
-#  define PTRDIFF_MIN  _I32_MIN
-#  define PTRDIFF_MAX  _I32_MAX
-#endif  // _WIN64 ]
+#		ifdef _WIN64 // [
+#			define PTRDIFF_MIN _I64_MIN
+#			define PTRDIFF_MAX _I64_MAX
+#		else // _WIN64 ][
+#			define PTRDIFF_MIN _I32_MIN
+#			define PTRDIFF_MAX _I32_MAX
+#		endif // _WIN64 ]
 
-#define SIG_ATOMIC_MIN  INT_MIN
-#define SIG_ATOMIC_MAX  INT_MAX
+#		define SIG_ATOMIC_MIN INT_MIN
+#		define SIG_ATOMIC_MAX INT_MAX
 
-#ifndef SIZE_MAX // [
-#  ifdef _WIN64 // [
-#     define SIZE_MAX  _UI64_MAX
-#  else // _WIN64 ][
-#     define SIZE_MAX  _UI32_MAX
-#  endif // _WIN64 ]
-#endif // SIZE_MAX ]
+#		ifndef SIZE_MAX      // [
+#			ifdef _WIN64 // [
+#				define SIZE_MAX _UI64_MAX
+#			else // _WIN64 ][
+#				define SIZE_MAX _UI32_MAX
+#			endif // _WIN64 ]
+#		endif         // SIZE_MAX ]
 
 // WCHAR_MIN and WCHAR_MAX are also defined in <wchar.h>
-#ifndef WCHAR_MIN // [
-#  define WCHAR_MIN  0
-#endif  // WCHAR_MIN ]
-#ifndef WCHAR_MAX // [
-#  define WCHAR_MAX  _UI16_MAX
-#endif  // WCHAR_MAX ]
+#		ifndef WCHAR_MIN // [
+#			define WCHAR_MIN 0
+#		endif            // WCHAR_MIN ]
+#		ifndef WCHAR_MAX // [
+#			define WCHAR_MAX _UI16_MAX
+#		endif // WCHAR_MAX ]
 
-#define WINT_MIN  0
-#define WINT_MAX  _UI16_MAX
-
-#endif // __STDC_LIMIT_MACROS ]
+#		define WINT_MIN 0
+#		define WINT_MAX _UI16_MAX
 
+#	endif // __STDC_LIMIT_MACROS ]
 
 // 7.18.4 Limits of other integer types
 
-#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [   See footnote 224 at page 260
+#	if !defined(__cplusplus)                                              \
+	    || defined(                                                        \
+	        __STDC_CONSTANT_MACROS) // [   See footnote 224 at page 260
 
 // 7.18.4.1 Macros for minimum-width integer constants
 
-#define INT8_C(val)  val##i8
-#define INT16_C(val) val##i16
-#define INT32_C(val) val##i32
-#define INT64_C(val) val##i64
+#		define INT8_C(val) val##i8
+#		define INT16_C(val) val##i16
+#		define INT32_C(val) val##i32
+#		define INT64_C(val) val##i64
 
-#define UINT8_C(val)  val##ui8
-#define UINT16_C(val) val##ui16
-#define UINT32_C(val) val##ui32
-#define UINT64_C(val) val##ui64
+#		define UINT8_C(val) val##ui8
+#		define UINT16_C(val) val##ui16
+#		define UINT32_C(val) val##ui32
+#		define UINT64_C(val) val##ui64
 
 // 7.18.4.2 Macros for greatest-width integer constants
-#define INTMAX_C   INT64_C
-#define UINTMAX_C  UINT64_C
-
-#endif // __STDC_CONSTANT_MACROS ]
+#		define INTMAX_C INT64_C
+#		define UINTMAX_C UINT64_C
 
+#	endif // __STDC_CONSTANT_MACROS ]
 
 #endif // _MSC_STDINT_H_ ]
diff --git a/include/msvc_compat/strings.h b/include/msvc_compat/strings.h
index 996f256c..6a1acc0f 100644
--- a/include/msvc_compat/strings.h
+++ b/include/msvc_compat/strings.h
@@ -4,9 +4,10 @@
 /* MSVC doesn't define ffs/ffsl. This dummy strings.h header is provided
  * for both */
 #ifdef _MSC_VER
-#  include <intrin.h>
-#  pragma intrinsic(_BitScanForward)
-static __forceinline int ffsl(long x) {
+#	include <intrin.h>
+#	pragma intrinsic(_BitScanForward)
+static __forceinline int
+ffsl(long x) {
 	unsigned long i;
 
 	if (_BitScanForward(&i, x)) {
@@ -15,44 +16,46 @@ static __forceinline int ffsl(long x) {
 	return 0;
 }
 
-static __forceinline int ffs(int x) {
+static __forceinline int
+ffs(int x) {
 	return ffsl(x);
 }
 
-#  ifdef  _M_X64
-#    pragma intrinsic(_BitScanForward64)
-#  endif
+#	ifdef _M_X64
+#		pragma intrinsic(_BitScanForward64)
+#	endif
 
-static __forceinline int ffsll(unsigned __int64 x) {
+static __forceinline int
+ffsll(unsigned __int64 x) {
 	unsigned long i;
-#ifdef  _M_X64
+#	ifdef _M_X64
 	if (_BitScanForward64(&i, x)) {
 		return i + 1;
 	}
 	return 0;
-#else
-// Fallback for 32-bit build where 64-bit version not available
-// assuming little endian
+#	else
+	// Fallback for 32-bit build where 64-bit version not available
+	// assuming little endian
 	union {
 		unsigned __int64 ll;
-		unsigned   long l[2];
+		unsigned long    l[2];
 	} s;
 
 	s.ll = x;
 
 	if (_BitScanForward(&i, s.l[0])) {
 		return i + 1;
-	} else if(_BitScanForward(&i, s.l[1])) {
+	} else if (_BitScanForward(&i, s.l[1])) {
 		return i + 33;
 	}
 	return 0;
-#endif
+#	endif
 }
 
 #else
-#  define ffsll(x) __builtin_ffsll(x)
-#  define ffsl(x) __builtin_ffsl(x)
-#  define ffs(x) __builtin_ffs(x)
+#	define ffsll(x) __builtin_ffsll(x)
+#	define ffsl(x) __builtin_ffsl(x)
+#	define ffs(x) __builtin_ffs(x)
 #endif
 
 #endif /* strings_h */
diff --git a/msvc/test_threads/test_threads.cpp b/msvc/test_threads/test_threads.cpp
index 6eed028d..e709c177 100644
--- a/msvc/test_threads/test_threads.cpp
+++ b/msvc/test_threads/test_threads.cpp
@@ -12,78 +12,108 @@
 #define JEMALLOC_NO_DEMANGLE
 #include <jemalloc/jemalloc.h>
 
-using std::vector;
+using std::minstd_rand;
 using std::thread;
 using std::uniform_int_distribution;
-using std::minstd_rand;
+using std::vector;
 
-int test_threads() {
-  je_malloc_conf = "narenas:3";
-  int narenas = 0;
-  size_t sz = sizeof(narenas);
-  je_mallctl("opt.narenas", (void *)&narenas, &sz, NULL, 0);
-  if (narenas != 3) {
-    printf("Error: unexpected number of arenas: %d\n", narenas);
-    return 1;
-  }
-  static const int sizes[] = { 7, 16, 32, 60, 91, 100, 120, 144, 169, 199, 255, 400, 670, 900, 917, 1025, 3333, 5190, 13131, 49192, 99999, 123123, 255265, 2333111 };
-  static const int numSizes = (int)(sizeof(sizes) / sizeof(sizes[0]));
-  vector<thread> workers;
-  static const int numThreads = narenas + 1, numAllocsMax = 25, numIter1 = 50, numIter2 = 50;
-  je_malloc_stats_print(NULL, NULL, NULL);
-  size_t allocated1;
-  size_t sz1 = sizeof(allocated1);
-  je_mallctl("stats.active", (void *)&allocated1, &sz1, NULL, 0);
-  printf("\nPress Enter to start threads...\n");
-  getchar();
-  printf("Starting %d threads x %d x %d iterations...\n", numThreads, numIter1, numIter2);
-  for (int i = 0; i < numThreads; i++) {
-    workers.emplace_back([tid=i]() {
-      uniform_int_distribution<int> sizeDist(0, numSizes - 1);
-      minstd_rand rnd(tid * 17);
-      uint8_t* ptrs[numAllocsMax];
-      int ptrsz[numAllocsMax];
-      for (int i = 0; i < numIter1; ++i) {
-        thread t([&]() {
-          for (int i = 0; i < numIter2; ++i) {
-            const int numAllocs = numAllocsMax - sizeDist(rnd);
-            for (int j = 0; j < numAllocs; j += 64) {
-              const int x = sizeDist(rnd);
-              const int sz = sizes[x];
-              ptrsz[j] = sz;
-              ptrs[j] = (uint8_t*)je_malloc(sz);
-              if (!ptrs[j]) {
-                printf("Unable to allocate %d bytes in thread %d, iter %d, alloc %d. %d\n", sz, tid, i, j, x);
-                exit(1);
-              }
-              for (int k = 0; k < sz; k++)
-                ptrs[j][k] = tid + k;
-            }
-            for (int j = 0; j < numAllocs; j += 64) {
-              for (int k = 0, sz = ptrsz[j]; k < sz; k++)
-                if (ptrs[j][k] != (uint8_t)(tid + k)) {
-                  printf("Memory error in thread %d, iter %d, alloc %d @ %d : %02X!=%02X\n", tid, i, j, k, ptrs[j][k], (uint8_t)(tid + k));
-                  exit(1);
-                }
-              je_free(ptrs[j]);
-            }
-          }
-        });
-        t.join();
-      }
-    });
-  }
-  for (thread& t : workers) {
-    t.join();
-  }
-  je_malloc_stats_print(NULL, NULL, NULL);
-  size_t allocated2;
-  je_mallctl("stats.active", (void *)&allocated2, &sz1, NULL, 0);
-  size_t leaked = allocated2 - allocated1;
-  printf("\nDone. Leaked: %zd bytes\n", leaked);
-  bool failed = leaked > 65536; // in case C++ runtime allocated something (e.g. iostream locale or facet)
-  printf("\nTest %s!\n", (failed ? "FAILED" : "successful"));
-  printf("\nPress Enter to continue...\n");
-  getchar();
-  return failed ? 1 : 0;
+int
+test_threads() {
+	je_malloc_conf = "narenas:3";
+	int    narenas = 0;
+	size_t sz = sizeof(narenas);
+	je_mallctl("opt.narenas", (void *)&narenas, &sz, NULL, 0);
+	if (narenas != 3) {
+		printf("Error: unexpected number of arenas: %d\n", narenas);
+		return 1;
+	}
+	static const int sizes[] = {7, 16, 32, 60, 91, 100, 120, 144, 169, 199,
+	    255, 400, 670, 900, 917, 1025, 3333, 5190, 13131, 49192, 99999,
+	    123123, 255265, 2333111};
+	static const int numSizes = (int)(sizeof(sizes) / sizeof(sizes[0]));
+	vector<thread>   workers;
+	static const int numThreads = narenas + 1, numAllocsMax = 25,
+	                 numIter1 = 50, numIter2 = 50;
+	je_malloc_stats_print(NULL, NULL, NULL);
+	size_t allocated1;
+	size_t sz1 = sizeof(allocated1);
+	je_mallctl("stats.active", (void *)&allocated1, &sz1, NULL, 0);
+	printf("\nPress Enter to start threads...\n");
+	getchar();
+	printf("Starting %d threads x %d x %d iterations...\n", numThreads,
+	    numIter1, numIter2);
+	for (int i = 0; i < numThreads; i++) {
+		workers.emplace_back([tid = i]() {
+			uniform_int_distribution<int> sizeDist(0, numSizes - 1);
+			minstd_rand                   rnd(tid * 17);
+			uint8_t                      *ptrs[numAllocsMax];
+			int                           ptrsz[numAllocsMax];
+			for (int i = 0; i < numIter1; ++i) {
+				thread t([&]() {
+					for (int i = 0; i < numIter2; ++i) {
+						const int numAllocs =
+						    numAllocsMax
+						    - sizeDist(rnd);
+						for (int j = 0; j < numAllocs;
+						     j += 64) {
+							const int x = sizeDist(
+							    rnd);
+							const int sz = sizes[x];
+							ptrsz[j] = sz;
+							ptrs[j] = (uint8_t *)
+							    je_malloc(sz);
+							if (!ptrs[j]) {
+								printf(
+								    "Unable to allocate %d bytes in thread %d, iter %d, alloc %d. %d\n",
+								    sz, tid, i,
+								    j, x);
+								exit(1);
+							}
+							for (int k = 0; k < sz;
+							     k++)
+								ptrs[j][k] = tid
+								    + k;
+						}
+						for (int j = 0; j < numAllocs;
+						     j += 64) {
+							for (int k = 0,
+							         sz = ptrsz[j];
+							     k < sz; k++)
+								if (ptrs[j][k]
+								    != (uint8_t)(tid
+								        + k)) {
+									printf(
+									    "Memory error in thread %d, iter %d, alloc %d @ %d : %02X!=%02X\n",
+									    tid,
+									    i,
+									    j,
+									    k,
+									    ptrs[j]
+									        [k],
+									    (uint8_t)(tid
+									        + k));
+									exit(1);
+								}
+							je_free(ptrs[j]);
+						}
+					}
+				});
+				t.join();
+			}
+		});
+	}
+	for (thread &t : workers) {
+		t.join();
+	}
+	je_malloc_stats_print(NULL, NULL, NULL);
+	size_t allocated2;
+	je_mallctl("stats.active", (void *)&allocated2, &sz1, NULL, 0);
+	size_t leaked = allocated2 - allocated1;
+	printf("\nDone. Leaked: %zd bytes\n", leaked);
+	bool failed = leaked
+	    > 65536; // in case C++ runtime allocated something (e.g. iostream locale or facet)
+	printf("\nTest %s!\n", (failed ? "FAILED" : "successful"));
+	printf("\nPress Enter to continue...\n");
+	getchar();
+	return failed ? 1 : 0;
 }
diff --git a/msvc/test_threads/test_threads_main.cpp b/msvc/test_threads/test_threads_main.cpp
index 0a022fba..3e88c286 100644
--- a/msvc/test_threads/test_threads_main.cpp
+++ b/msvc/test_threads/test_threads_main.cpp
@@ -5,7 +5,8 @@
 
 using namespace std::chrono_literals;
 
-int main(int argc, char** argv) {
-  int rc = test_threads();
-  return rc;
+int
+main(int argc, char **argv) {
+	int rc = test_threads();
+	return rc;
 }
diff --git a/src/arena.c b/src/arena.c
index 1586ee91..2f58b038 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -22,12 +22,7 @@ JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
  * options and mallctl processing are straightforward.
  */
 const char *const percpu_arena_mode_names[] = {
-	"percpu",
-	"phycpu",
-	"disabled",
-	"percpu",
-	"phycpu"
-};
+    "percpu", "phycpu", "disabled", "percpu", "phycpu"};
 percpu_arena_mode_t opt_percpu_arena = PERCPU_ARENA_DEFAULT;
 
 ssize_t opt_dirty_decay_ms = DIRTY_DECAY_MS_DEFAULT;
@@ -36,7 +31,7 @@ ssize_t opt_muzzy_decay_ms = MUZZY_DECAY_MS_DEFAULT;
 static atomic_zd_t dirty_decay_ms_default;
 static atomic_zd_t muzzy_decay_ms_default;
 
-emap_t arena_emap_global;
+emap_t              arena_emap_global;
 static pa_central_t arena_pa_central_global;
 
 div_info_t arena_binind_div_info[SC_NBINS];
@@ -51,14 +46,15 @@ uint32_t arena_bin_offsets[SC_NBINS];
  * that,the huge_arena_ind is updated to point to the actual huge arena,
  * which is the last one of the auto arenas.
  */
-unsigned huge_arena_ind = 0;
-bool opt_huge_arena_pac_thp = false;
+unsigned  huge_arena_ind = 0;
+bool      opt_huge_arena_pac_thp = false;
 pac_thp_t huge_arena_pac_thp = {.thp_madvise = false,
-    .auto_thp_switched = false, .n_thp_lazy = ATOMIC_INIT(0)};
+    .auto_thp_switched = false,
+    .n_thp_lazy = ATOMIC_INIT(0)};
 
 const arena_config_t arena_config_default = {
-	/* .extent_hooks = */ (extent_hooks_t *)&ehooks_default_extent_hooks,
-	/* .metadata_use_hooks = */ true,
+    /* .extent_hooks = */ (extent_hooks_t *)&ehooks_default_extent_hooks,
+    /* .metadata_use_hooks = */ true,
 };
 
 /******************************************************************************/
@@ -67,13 +63,12 @@ const arena_config_t arena_config_default = {
  * definition.
  */
 
-static bool arena_decay_dirty(tsdn_t *tsdn, arena_t *arena,
-    bool is_background_thread, bool all);
-static void arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, edata_t *slab,
-    bin_t *bin);
-static void
-arena_maybe_do_deferred_work(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
-    size_t npages_new);
+static bool arena_decay_dirty(
+    tsdn_t *tsdn, arena_t *arena, bool is_background_thread, bool all);
+static void arena_bin_lower_slab(
+    tsdn_t *tsdn, arena_t *arena, edata_t *slab, bin_t *bin);
+static void arena_maybe_do_deferred_work(
+    tsdn_t *tsdn, arena_t *arena, decay_t *decay, size_t npages_new);
 
 /******************************************************************************/
 
@@ -92,8 +87,8 @@ void
 arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
     size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
-    bin_stats_data_t *bstats, arena_stats_large_t *lstats,
-    pac_estats_t *estats, hpa_shard_stats_t *hpastats, sec_stats_t *secstats) {
+    bin_stats_data_t *bstats, arena_stats_large_t *lstats, pac_estats_t *estats,
+    hpa_shard_stats_t *hpastats, sec_stats_t *secstats) {
 	cassert(config_stats);
 
 	arena_basic_stats_merge(tsdn, arena, nthreads, dss, dirty_decay_ms,
@@ -135,8 +130,8 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 		uint64_t nrequests = locked_read_u64(tsdn,
 		    LOCKEDINT_MTX(arena->stats.mtx),
 		    &arena->stats.lstats[i].nrequests);
-		locked_inc_u64_unsynchronized(&lstats[i].nrequests,
-		    nmalloc + nrequests);
+		locked_inc_u64_unsynchronized(
+		    &lstats[i].nrequests, nmalloc + nrequests);
 		astats->nrequests_large += nmalloc + nrequests;
 
 		/* nfill == nmalloc for large currently. */
@@ -172,7 +167,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	astats->tcache_stashed_bytes = 0;
 	malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
 	cache_bin_array_descriptor_t *descriptor;
-	ql_foreach(descriptor, &arena->cache_bin_array_descriptor_ql, link) {
+	ql_foreach (descriptor, &arena->cache_bin_array_descriptor_ql, link) {
 		for (szind_t i = 0; i < TCACHE_NBINS_MAX; i++) {
 			cache_bin_t *cache_bin = &descriptor->bins[i];
 			if (cache_bin_disabled(cache_bin)) {
@@ -180,10 +175,11 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 			}
 
 			cache_bin_sz_t ncached, nstashed;
-			cache_bin_nitems_get_remote(cache_bin, &ncached, &nstashed);
+			cache_bin_nitems_get_remote(
+			    cache_bin, &ncached, &nstashed);
 			astats->tcache_bytes += ncached * sz_index2size(i);
-			astats->tcache_stashed_bytes += nstashed *
-			    sz_index2size(i);
+			astats->tcache_stashed_bytes += nstashed
+			    * sz_index2size(i);
 		}
 	}
 	malloc_mutex_prof_read(tsdn,
@@ -191,19 +187,18 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	    &arena->tcache_ql_mtx);
 	malloc_mutex_unlock(tsdn, &arena->tcache_ql_mtx);
 
-#define READ_ARENA_MUTEX_PROF_DATA(mtx, ind)				\
-    malloc_mutex_lock(tsdn, &arena->mtx);				\
-    malloc_mutex_prof_read(tsdn, &astats->mutex_prof_data[ind],		\
-        &arena->mtx);							\
-    malloc_mutex_unlock(tsdn, &arena->mtx);
+#define READ_ARENA_MUTEX_PROF_DATA(mtx, ind)                                   \
+	malloc_mutex_lock(tsdn, &arena->mtx);                                  \
+	malloc_mutex_prof_read(                                                \
+	    tsdn, &astats->mutex_prof_data[ind], &arena->mtx);                 \
+	malloc_mutex_unlock(tsdn, &arena->mtx);
 
 	/* Gather per arena mutex profiling data. */
 	READ_ARENA_MUTEX_PROF_DATA(large_mtx, arena_prof_mutex_large);
-	READ_ARENA_MUTEX_PROF_DATA(base->mtx,
-	    arena_prof_mutex_base);
+	READ_ARENA_MUTEX_PROF_DATA(base->mtx, arena_prof_mutex_base);
 #undef READ_ARENA_MUTEX_PROF_DATA
-	pa_shard_mtx_stats_read(tsdn, &arena->pa_shard,
-	    astats->mutex_prof_data);
+	pa_shard_mtx_stats_read(
+	    tsdn, &arena->pa_shard, astats->mutex_prof_data);
 
 	nstime_copy(&astats->uptime, &arena->create_time);
 	nstime_update(&astats->uptime);
@@ -211,32 +206,33 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 
 	for (szind_t i = 0; i < SC_NBINS; i++) {
 		for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
-			bin_stats_merge(tsdn, &bstats[i],
-			    arena_get_bin(arena, i, j));
+			bin_stats_merge(
+			    tsdn, &bstats[i], arena_get_bin(arena, i, j));
 		}
 	}
 }
 
 static void
-arena_background_thread_inactivity_check(tsdn_t *tsdn, arena_t *arena,
-    bool is_background_thread) {
+arena_background_thread_inactivity_check(
+    tsdn_t *tsdn, arena_t *arena, bool is_background_thread) {
 	if (!background_thread_enabled() || is_background_thread) {
 		return;
 	}
-	background_thread_info_t *info =
-	    arena_background_thread_info_get(arena);
+	background_thread_info_t *info = arena_background_thread_info_get(
+	    arena);
 	if (background_thread_indefinite_sleep(info)) {
-		arena_maybe_do_deferred_work(tsdn, arena,
-		    &arena->pa_shard.pac.decay_dirty, 0);
+		arena_maybe_do_deferred_work(
+		    tsdn, arena, &arena->pa_shard.pac.decay_dirty, 0);
 	}
 }
 
 /*
  * React to deferred work generated by a PAI function.
  */
-void arena_handle_deferred_work(tsdn_t *tsdn, arena_t *arena) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+void
+arena_handle_deferred_work(tsdn_t *tsdn, arena_t *arena) {
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 
 	if (decay_immediately(&arena->pa_shard.pac.decay_dirty)) {
 		arena_decay_dirty(tsdn, arena, false, true);
@@ -246,34 +242,34 @@ void arena_handle_deferred_work(tsdn_t *tsdn, arena_t *arena) {
 
 static void *
 arena_slab_reg_alloc(edata_t *slab, const bin_info_t *bin_info) {
-	void *ret;
+	void        *ret;
 	slab_data_t *slab_data = edata_slab_data_get(slab);
-	size_t regind;
+	size_t       regind;
 
 	assert(edata_nfree_get(slab) > 0);
 	assert(!bitmap_full(slab_data->bitmap, &bin_info->bitmap_info));
 
 	regind = bitmap_sfu(slab_data->bitmap, &bin_info->bitmap_info);
-	ret = (void *)((byte_t *)edata_addr_get(slab) +
-	    (uintptr_t)(bin_info->reg_size * regind));
+	ret = (void *)((byte_t *)edata_addr_get(slab)
+	    + (uintptr_t)(bin_info->reg_size * regind));
 	edata_nfree_dec(slab);
 	return ret;
 }
 
 static void
-arena_slab_reg_alloc_batch(edata_t *slab, const bin_info_t *bin_info,
-			   unsigned cnt, void** ptrs) {
+arena_slab_reg_alloc_batch(
+    edata_t *slab, const bin_info_t *bin_info, unsigned cnt, void **ptrs) {
 	slab_data_t *slab_data = edata_slab_data_get(slab);
 
 	assert(edata_nfree_get(slab) >= cnt);
 	assert(!bitmap_full(slab_data->bitmap, &bin_info->bitmap_info));
 
-#if (! defined JEMALLOC_INTERNAL_POPCOUNTL) || (defined BITMAP_USE_TREE)
+#if (!defined JEMALLOC_INTERNAL_POPCOUNTL) || (defined BITMAP_USE_TREE)
 	for (unsigned i = 0; i < cnt; i++) {
-		size_t regind = bitmap_sfu(slab_data->bitmap,
-					   &bin_info->bitmap_info);
-		*(ptrs + i) = (void *)((uintptr_t)edata_addr_get(slab) +
-		    (uintptr_t)(bin_info->reg_size * regind));
+		size_t regind = bitmap_sfu(
+		    slab_data->bitmap, &bin_info->bitmap_info);
+		*(ptrs + i) = (void *)((uintptr_t)edata_addr_get(slab)
+		    + (uintptr_t)(bin_info->reg_size * regind));
 	}
 #else
 	unsigned group = 0;
@@ -327,10 +323,9 @@ arena_large_malloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) {
 		szind_t hindex = index - SC_NBINS;
 		LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-			&arena->stats.lstats[hindex].nmalloc, 1);
+		    &arena->stats.lstats[hindex].nmalloc, 1);
 		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-		    &arena->stats.lstats[hindex].active_bytes,
-		    usize);
+		    &arena->stats.lstats[hindex].active_bytes, usize);
 		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 	}
 }
@@ -353,30 +348,29 @@ arena_large_dalloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) {
 		szind_t hindex = index - SC_NBINS;
 		LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-			&arena->stats.lstats[hindex].ndalloc, 1);
+		    &arena->stats.lstats[hindex].ndalloc, 1);
 		locked_dec_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-		    &arena->stats.lstats[hindex].active_bytes,
-		    usize);
+		    &arena->stats.lstats[hindex].active_bytes, usize);
 		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 	}
 }
 
 static void
-arena_large_ralloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t oldusize,
-    size_t usize) {
+arena_large_ralloc_stats_update(
+    tsdn_t *tsdn, arena_t *arena, size_t oldusize, size_t usize) {
 	arena_large_malloc_stats_update(tsdn, arena, usize);
 	arena_large_dalloc_stats_update(tsdn, arena, oldusize);
 }
 
 edata_t *
-arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
-    size_t alignment, bool zero) {
-	bool deferred_work_generated = false;
+arena_extent_alloc_large(
+    tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, bool zero) {
+	bool    deferred_work_generated = false;
 	szind_t szind = sz_size2index(usize);
-	size_t esize = usize + sz_large_pad;
+	size_t  esize = usize + sz_large_pad;
 
-	bool guarded = san_large_extent_decide_guard(tsdn,
-	    arena_get_ehooks(arena), esize, alignment);
+	bool guarded = san_large_extent_decide_guard(
+	    tsdn, arena_get_ehooks(arena), esize, alignment);
 
 	/*
 	 * - if usize >= opt_calloc_madvise_threshold,
@@ -406,7 +400,7 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 	 * if cache_oblivious is enabled.
 	 */
 	if (zero && !zero_override && !edata_zeroed_get(edata)) {
-		void *addr = edata_addr_get(edata);
+		void  *addr = edata_addr_get(edata);
 		size_t usize = edata_usize_get(edata);
 		memset(addr, 0, usize);
 	}
@@ -417,14 +411,14 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 void
 arena_extent_dalloc_large_prep(tsdn_t *tsdn, arena_t *arena, edata_t *edata) {
 	if (config_stats) {
-		arena_large_dalloc_stats_update(tsdn, arena,
-		    edata_usize_get(edata));
+		arena_large_dalloc_stats_update(
+		    tsdn, arena, edata_usize_get(edata));
 	}
 }
 
 void
-arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
-    size_t oldusize) {
+arena_extent_ralloc_large_shrink(
+    tsdn_t *tsdn, arena_t *arena, edata_t *edata, size_t oldusize) {
 	size_t usize = edata_usize_get(edata);
 
 	if (config_stats) {
@@ -433,8 +427,8 @@ arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
 }
 
 void
-arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
-    size_t oldusize) {
+arena_extent_ralloc_large_expand(
+    tsdn_t *tsdn, arena_t *arena, edata_t *edata, size_t oldusize) {
 	size_t usize = edata_usize_get(edata);
 
 	if (config_stats) {
@@ -459,12 +453,12 @@ arena_decide_unforced_purge_eagerness(bool is_background_thread) {
 }
 
 bool
-arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, extent_state_t state,
-    ssize_t decay_ms) {
+arena_decay_ms_set(
+    tsdn_t *tsdn, arena_t *arena, extent_state_t state, ssize_t decay_ms) {
 	pac_purge_eagerness_t eagerness = arena_decide_unforced_purge_eagerness(
 	    /* is_background_thread */ false);
-	return pa_decay_ms_set(tsdn, &arena->pa_shard, state, decay_ms,
-	    eagerness);
+	return pa_decay_ms_set(
+	    tsdn, &arena->pa_shard, state, decay_ms, eagerness);
 }
 
 ssize_t
@@ -474,8 +468,8 @@ arena_decay_ms_get(arena_t *arena, extent_state_t state) {
 
 static bool
 arena_decay_impl(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
-    pac_decay_stats_t *decay_stats, ecache_t *ecache,
-    bool is_background_thread, bool all) {
+    pac_decay_stats_t *decay_stats, ecache_t *ecache, bool is_background_thread,
+    bool all) {
 	if (all) {
 		malloc_mutex_lock(tsdn, &decay->mtx);
 		pac_decay_all(tsdn, &arena->pa_shard.pac, decay, decay_stats,
@@ -488,10 +482,10 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 		/* No need to wait if another thread is in progress. */
 		return true;
 	}
-	pac_purge_eagerness_t eagerness =
-	    arena_decide_unforced_purge_eagerness(is_background_thread);
-	bool epoch_advanced = pac_maybe_decay_purge(tsdn, &arena->pa_shard.pac,
-	    decay, decay_stats, ecache, eagerness);
+	pac_purge_eagerness_t eagerness = arena_decide_unforced_purge_eagerness(
+	    is_background_thread);
+	bool epoch_advanced = pac_maybe_decay_purge(
+	    tsdn, &arena->pa_shard.pac, decay, decay_stats, ecache, eagerness);
 	size_t npages_new JEMALLOC_CLANG_ANALYZER_SILENCE_INIT(0);
 	if (epoch_advanced) {
 		/* Backlog is updated on epoch advance. */
@@ -499,8 +493,8 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 	}
 	malloc_mutex_unlock(tsdn, &decay->mtx);
 
-	if (have_background_thread && background_thread_enabled() &&
-	    epoch_advanced && !is_background_thread) {
+	if (have_background_thread && background_thread_enabled()
+	    && epoch_advanced && !is_background_thread) {
 		arena_maybe_do_deferred_work(tsdn, arena, decay, npages_new);
 	}
 
@@ -508,16 +502,16 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 }
 
 static bool
-arena_decay_dirty(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
-    bool all) {
+arena_decay_dirty(
+    tsdn_t *tsdn, arena_t *arena, bool is_background_thread, bool all) {
 	return arena_decay_impl(tsdn, arena, &arena->pa_shard.pac.decay_dirty,
 	    &arena->pa_shard.pac.stats->decay_dirty,
 	    &arena->pa_shard.pac.ecache_dirty, is_background_thread, all);
 }
 
 static bool
-arena_decay_muzzy(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
-    bool all) {
+arena_decay_muzzy(
+    tsdn_t *tsdn, arena_t *arena, bool is_background_thread, bool all) {
 	if (pa_shard_dont_decay_muzzy(&arena->pa_shard)) {
 		return false;
 	}
@@ -564,13 +558,13 @@ arena_should_decay_early(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 	}
 	nstime_subtract(remaining_sleep, &decay->epoch);
 	if (npages_new > 0) {
-		uint64_t npurge_new = decay_npages_purge_in(decay,
-		    remaining_sleep, npages_new);
+		uint64_t npurge_new = decay_npages_purge_in(
+		    decay, remaining_sleep, npages_new);
 		info->npages_to_purge_new += npurge_new;
 	}
 	malloc_mutex_unlock(tsdn, &decay->mtx);
-	return info->npages_to_purge_new >
-	    ARENA_DEFERRED_PURGE_NPAGES_THRESHOLD;
+	return info->npages_to_purge_new
+	    > ARENA_DEFERRED_PURGE_NPAGES_THRESHOLD;
 }
 
 /*
@@ -582,8 +576,8 @@ arena_should_decay_early(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
  * deferred work has been generated.
  */
 static void
-arena_maybe_do_deferred_work(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
-    size_t npages_new) {
+arena_maybe_do_deferred_work(
+    tsdn_t *tsdn, arena_t *arena, decay_t *decay, size_t npages_new) {
 	background_thread_info_t *info = arena_background_thread_info_get(
 	    arena);
 	if (malloc_mutex_trylock(tsdn, &info->mtx)) {
@@ -603,7 +597,7 @@ arena_maybe_do_deferred_work(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 	if (background_thread_indefinite_sleep(info)) {
 		background_thread_wakeup_early(info, NULL);
 	} else if (arena_should_decay_early(tsdn, arena, decay, info,
-	    &remaining_sleep, npages_new)) {
+	               &remaining_sleep, npages_new)) {
 		info->npages_to_purge_new = 0;
 		background_thread_wakeup_early(info, &remaining_sleep);
 	}
@@ -687,8 +681,8 @@ arena_bin_reset(tsd_t *tsd, arena_t *arena, bin_t *bin, unsigned binind) {
 
 	if (arena_bin_has_batch(binind)) {
 		bin_with_batch_t *batched_bin = (bin_with_batch_t *)bin;
-		batcher_init(&batched_bin->remote_frees,
-		    BIN_REMOTE_FREE_ELEMS_MAX);
+		batcher_init(
+		    &batched_bin->remote_frees, BIN_REMOTE_FREE_ELEMS_MAX);
 	}
 
 	if (bin->slabcur != NULL) {
@@ -743,8 +737,8 @@ arena_prof_demote(tsdn_t *tsdn, edata_t *edata, const void *ptr) {
 	assert(ptr != NULL);
 	size_t usize = isalloc(tsdn, ptr);
 	size_t bumped_usize = sz_sa2u(usize, PROF_SAMPLE_ALIGNMENT);
-	assert(bumped_usize <= SC_LARGE_MINCLASS &&
-	    PAGE_CEILING(bumped_usize) == bumped_usize);
+	assert(bumped_usize <= SC_LARGE_MINCLASS
+	    && PAGE_CEILING(bumped_usize) == bumped_usize);
 	assert(edata_size_get(edata) - bumped_usize <= sz_large_pad);
 	szind_t szind = sz_size2index(bumped_usize);
 
@@ -757,8 +751,8 @@ arena_prof_demote(tsdn_t *tsdn, edata_t *edata, const void *ptr) {
 }
 
 static void
-arena_dalloc_promoted_impl(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
-    bool slow_path, edata_t *edata) {
+arena_dalloc_promoted_impl(
+    tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path, edata_t *edata) {
 	cassert(config_prof);
 	assert(opt_prof);
 
@@ -772,20 +766,20 @@ arena_dalloc_promoted_impl(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 		safety_check_verify_redzone(ptr, usize, bumped_usize);
 	}
 	szind_t bumped_ind = sz_size2index(bumped_usize);
-	if (bumped_usize >= SC_LARGE_MINCLASS &&
-	    tcache != NULL && bumped_ind < TCACHE_NBINS_MAX &&
-	    !tcache_bin_disabled(bumped_ind, &tcache->bins[bumped_ind],
-	    tcache->tcache_slow)) {
-		tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr, bumped_ind,
-		    slow_path);
+	if (bumped_usize >= SC_LARGE_MINCLASS && tcache != NULL
+	    && bumped_ind < TCACHE_NBINS_MAX
+	    && !tcache_bin_disabled(
+	        bumped_ind, &tcache->bins[bumped_ind], tcache->tcache_slow)) {
+		tcache_dalloc_large(
+		    tsdn_tsd(tsdn), tcache, ptr, bumped_ind, slow_path);
 	} else {
 		large_dalloc(tsdn, edata);
 	}
 }
 
 void
-arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
-    bool slow_path) {
+arena_dalloc_promoted(
+    tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path) {
 	edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
 	arena_dalloc_promoted_impl(tsdn, ptr, tcache, slow_path, edata);
 }
@@ -810,14 +804,14 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 	malloc_mutex_lock(tsd_tsdn(tsd), &arena->large_mtx);
 
 	for (edata_t *edata = edata_list_active_first(&arena->large);
-	    edata != NULL; edata = edata_list_active_first(&arena->large)) {
-		void *ptr = edata_base_get(edata);
+	     edata != NULL; edata = edata_list_active_first(&arena->large)) {
+		void  *ptr = edata_base_get(edata);
 		size_t usize;
 
 		malloc_mutex_unlock(tsd_tsdn(tsd), &arena->large_mtx);
 		emap_alloc_ctx_t alloc_ctx;
-		emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr,
-		    &alloc_ctx);
+		emap_alloc_ctx_lookup(
+		    tsd_tsdn(tsd), &arena_emap_global, ptr, &alloc_ctx);
 		assert(alloc_ctx.szind != SC_NSIZES);
 
 		if (config_stats || (config_prof && opt_prof)) {
@@ -841,16 +835,16 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 	/* Bins. */
 	for (unsigned i = 0; i < SC_NBINS; i++) {
 		for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
-			arena_bin_reset(tsd, arena, arena_get_bin(arena, i, j),
-			    i);
+			arena_bin_reset(
+			    tsd, arena, arena_get_bin(arena, i, j), i);
 		}
 	}
 	pa_shard_reset(tsd_tsdn(tsd), &arena->pa_shard);
 }
 
 static void
-arena_prepare_base_deletion_sync_finish(tsd_t *tsd, malloc_mutex_t **mutexes,
-    unsigned n_mtx) {
+arena_prepare_base_deletion_sync_finish(
+    tsd_t *tsd, malloc_mutex_t **mutexes, unsigned n_mtx) {
 	for (unsigned i = 0; i < n_mtx; i++) {
 		malloc_mutex_lock(tsd_tsdn(tsd), mutexes[i]);
 		malloc_mutex_unlock(tsd_tsdn(tsd), mutexes[i]);
@@ -909,9 +903,9 @@ arena_prepare_base_deletion(tsd_t *tsd, base_t *base_to_destroy) {
 	unsigned destroy_ind = base_ind_get(base_to_destroy);
 	assert(destroy_ind >= manual_arena_base);
 
-	tsdn_t *tsdn = tsd_tsdn(tsd);
+	tsdn_t         *tsdn = tsd_tsdn(tsd);
 	malloc_mutex_t *delayed_mtx[ARENA_DESTROY_MAX_DELAYED_MTX];
-	unsigned n_delayed = 0, total = narenas_total_get();
+	unsigned        n_delayed = 0, total = narenas_total_get();
 	for (unsigned i = 0; i < total; i++) {
 		if (i == destroy_ind) {
 			continue;
@@ -921,12 +915,12 @@ arena_prepare_base_deletion(tsd_t *tsd, base_t *base_to_destroy) {
 			continue;
 		}
 		pac_t *pac = &arena->pa_shard.pac;
-		arena_prepare_base_deletion_sync(tsd, &pac->ecache_dirty.mtx,
-		    delayed_mtx, &n_delayed);
-		arena_prepare_base_deletion_sync(tsd, &pac->ecache_muzzy.mtx,
-		    delayed_mtx, &n_delayed);
-		arena_prepare_base_deletion_sync(tsd, &pac->ecache_retained.mtx,
-		    delayed_mtx, &n_delayed);
+		arena_prepare_base_deletion_sync(
+		    tsd, &pac->ecache_dirty.mtx, delayed_mtx, &n_delayed);
+		arena_prepare_base_deletion_sync(
+		    tsd, &pac->ecache_muzzy.mtx, delayed_mtx, &n_delayed);
+		arena_prepare_base_deletion_sync(
+		    tsd, &pac->ecache_retained.mtx, delayed_mtx, &n_delayed);
 	}
 	arena_prepare_base_deletion_sync_finish(tsd, delayed_mtx, n_delayed);
 }
@@ -968,17 +962,17 @@ arena_destroy(tsd_t *tsd, arena_t *arena) {
 }
 
 static edata_t *
-arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard,
-    const bin_info_t *bin_info) {
+arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind,
+    unsigned binshard, const bin_info_t *bin_info) {
 	bool deferred_work_generated = false;
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 
-	bool guarded = san_slab_extent_decide_guard(tsdn,
-	    arena_get_ehooks(arena));
+	bool guarded = san_slab_extent_decide_guard(
+	    tsdn, arena_get_ehooks(arena));
 	edata_t *slab = pa_alloc(tsdn, &arena->pa_shard, bin_info->slab_size,
 	    /* alignment */ PAGE, /* slab */ true, /* szind */ binind,
-	     /* zero */ false, guarded, &deferred_work_generated);
+	    /* zero */ false, guarded, &deferred_work_generated);
 
 	if (deferred_work_generated) {
 		arena_handle_deferred_work(tsdn, arena);
@@ -1024,15 +1018,15 @@ static void *
 arena_bin_malloc_with_fresh_slab(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
     szind_t binind, edata_t *fresh_slab) {
 	malloc_mutex_assert_owner(tsdn, &bin->lock);
-	arena_bin_refill_slabcur_with_fresh_slab(tsdn, arena, bin, binind,
-	    fresh_slab);
+	arena_bin_refill_slabcur_with_fresh_slab(
+	    tsdn, arena, bin, binind, fresh_slab);
 
 	return arena_slab_reg_alloc(bin->slabcur, &bin_infos[binind]);
 }
 
 static bool
-arena_bin_refill_slabcur_no_fresh_slab(tsdn_t *tsdn, arena_t *arena,
-    bin_t *bin) {
+arena_bin_refill_slabcur_no_fresh_slab(
+    tsdn_t *tsdn, arena_t *arena, bin_t *bin) {
 	malloc_mutex_assert_owner(tsdn, &bin->lock);
 	/* Only called after arena_slab_reg_alloc[_batch] failed. */
 	assert(bin->slabcur == NULL || edata_nfree_get(bin->slabcur) == 0);
@@ -1049,8 +1043,8 @@ arena_bin_refill_slabcur_no_fresh_slab(tsdn_t *tsdn, arena_t *arena,
 }
 
 bin_t *
-arena_bin_choose(tsdn_t *tsdn, arena_t *arena, szind_t binind,
-    unsigned *binshard_p) {
+arena_bin_choose(
+    tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned *binshard_p) {
 	unsigned binshard;
 	if (tsdn_null(tsdn) || tsd_arena_get(tsdn_tsd(tsdn)) == NULL) {
 		binshard = 0;
@@ -1065,8 +1059,8 @@ arena_bin_choose(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 }
 
 void
-arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena,
-    cache_bin_t *cache_bin, szind_t binind, const cache_bin_sz_t nfill_min,
+arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena, cache_bin_t *cache_bin,
+    szind_t binind, const cache_bin_sz_t nfill_min,
     const cache_bin_sz_t nfill_max) {
 	assert(cache_bin_ncached_get_local(cache_bin) == 0);
 	assert(nfill_min > 0 && nfill_min <= nfill_max);
@@ -1102,12 +1096,12 @@ arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena,
 	 * local exhausted, b) unlock and slab_alloc returns null, c) re-lock
 	 * and bin local fails again.
 	 */
-	bool made_progress = true;
-	edata_t *fresh_slab = NULL;
-	bool alloc_and_retry = false;
+	bool           made_progress = true;
+	edata_t       *fresh_slab = NULL;
+	bool           alloc_and_retry = false;
 	cache_bin_sz_t filled = 0;
-	unsigned binshard;
-	bin_t *bin = arena_bin_choose(tsdn, arena, binind, &binshard);
+	unsigned       binshard;
+	bin_t         *bin = arena_bin_choose(tsdn, arena, binind, &binshard);
 
 	/*
 	 * This has some fields that are conditionally initialized down batch
@@ -1120,7 +1114,8 @@ arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena,
 	    JEMALLOC_CLANG_ANALYZER_SILENCE_INIT({0});
 label_refill:
 	malloc_mutex_lock(tsdn, &bin->lock);
-	arena_bin_flush_batch_after_lock(tsdn, arena, bin, binind, &batch_flush_state);
+	arena_bin_flush_batch_after_lock(
+	    tsdn, arena, bin, binind, &batch_flush_state);
 
 	while (filled < nfill_min) {
 		/* Try batch-fill from slabcur first. */
@@ -1136,8 +1131,8 @@ label_refill:
 				cnt = nfill_min - filled;
 			}
 
-			arena_slab_reg_alloc_batch(slabcur, bin_info, cnt,
-			    &ptrs.ptr[filled]);
+			arena_slab_reg_alloc_batch(
+			    slabcur, bin_info, cnt, &ptrs.ptr[filled]);
 			made_progress = true;
 			filled += cnt;
 			continue;
@@ -1150,8 +1145,8 @@ label_refill:
 
 		/* Then see if a new slab was reserved already. */
 		if (fresh_slab != NULL) {
-			arena_bin_refill_slabcur_with_fresh_slab(tsdn, arena,
-			    bin, binind, fresh_slab);
+			arena_bin_refill_slabcur_with_fresh_slab(
+			    tsdn, arena, bin, binind, fresh_slab);
 			assert(bin->slabcur != NULL);
 			fresh_slab = NULL;
 			continue;
@@ -1181,27 +1176,27 @@ label_refill:
 		cache_bin->tstats.nrequests = 0;
 	}
 
-	arena_bin_flush_batch_before_unlock(tsdn, arena, bin, binind,
-	    &batch_flush_state);
+	arena_bin_flush_batch_before_unlock(
+	    tsdn, arena, bin, binind, &batch_flush_state);
 	malloc_mutex_unlock(tsdn, &bin->lock);
-	arena_bin_flush_batch_after_unlock(tsdn, arena, bin, binind,
-	    &batch_flush_state);
+	arena_bin_flush_batch_after_unlock(
+	    tsdn, arena, bin, binind, &batch_flush_state);
 
 	if (alloc_and_retry) {
 		assert(fresh_slab == NULL);
 		assert(filled < nfill_min);
 		assert(made_progress);
 
-		fresh_slab = arena_slab_alloc(tsdn, arena, binind, binshard,
-		    bin_info);
+		fresh_slab = arena_slab_alloc(
+		    tsdn, arena, binind, binshard, bin_info);
 		/* fresh_slab NULL case handled in the for loop. */
 
 		alloc_and_retry = false;
 		made_progress = false;
 		goto label_refill;
 	}
-	assert((filled >= nfill_min && filled <= nfill_max) ||
-	    (fresh_slab == NULL && !made_progress));
+	assert((filled >= nfill_min && filled <= nfill_max)
+	    || (fresh_slab == NULL && !made_progress));
 
 	/* Release if allocated but not used. */
 	if (fresh_slab != NULL) {
@@ -1219,22 +1214,24 @@ arena_fill_small_fresh(tsdn_t *tsdn, arena_t *arena, szind_t binind,
     void **ptrs, size_t nfill, bool zero) {
 	assert(binind < SC_NBINS);
 	const bin_info_t *bin_info = &bin_infos[binind];
-	const size_t nregs = bin_info->nregs;
+	const size_t      nregs = bin_info->nregs;
 	assert(nregs > 0);
 	const size_t usize = bin_info->reg_size;
 
 	const bool manual_arena = !arena_is_auto(arena);
-	unsigned binshard;
-	bin_t *bin = arena_bin_choose(tsdn, arena, binind, &binshard);
+	unsigned   binshard;
+	bin_t     *bin = arena_bin_choose(tsdn, arena, binind, &binshard);
 
-	size_t nslab = 0;
-	size_t filled = 0;
-	edata_t *slab = NULL;
+	size_t              nslab = 0;
+	size_t              filled = 0;
+	edata_t            *slab = NULL;
 	edata_list_active_t fulls;
 	edata_list_active_init(&fulls);
 
-	while (filled < nfill && (slab = arena_slab_alloc(tsdn, arena, binind,
-	    binshard, bin_info)) != NULL) {
+	while (filled < nfill
+	    && (slab = arena_slab_alloc(
+	            tsdn, arena, binind, binshard, bin_info))
+	        != NULL) {
 		assert((size_t)edata_nfree_get(slab) == nregs);
 		++nslab;
 		size_t batch = nfill - filled;
@@ -1242,8 +1239,8 @@ arena_fill_small_fresh(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 			batch = nregs;
 		}
 		assert(batch > 0);
-		arena_slab_reg_alloc_batch(slab, bin_info, (unsigned)batch,
-		    &ptrs[filled]);
+		arena_slab_reg_alloc_batch(
+		    slab, bin_info, (unsigned)batch, &ptrs[filled]);
 		assert(edata_addr_get(slab) == ptrs[filled]);
 		if (zero) {
 			memset(ptrs[filled], 0, batch * usize);
@@ -1287,8 +1284,8 @@ arena_fill_small_fresh(tsdn_t *tsdn, arena_t *arena, szind_t binind,
  * bin->slabcur if necessary.
  */
 static void *
-arena_bin_malloc_no_fresh_slab(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
-    szind_t binind) {
+arena_bin_malloc_no_fresh_slab(
+    tsdn_t *tsdn, arena_t *arena, bin_t *bin, szind_t binind) {
 	malloc_mutex_assert_owner(tsdn, &bin->lock);
 	if (bin->slabcur == NULL || edata_nfree_get(bin->slabcur) == 0) {
 		if (arena_bin_refill_slabcur_no_fresh_slab(tsdn, arena, bin)) {
@@ -1304,18 +1301,18 @@ static void *
 arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) {
 	assert(binind < SC_NBINS);
 	const bin_info_t *bin_info = &bin_infos[binind];
-	size_t usize = sz_index2size(binind);
-	unsigned binshard;
+	size_t            usize = sz_index2size(binind);
+	unsigned          binshard;
 	bin_t *bin = arena_bin_choose(tsdn, arena, binind, &binshard);
 
 	malloc_mutex_lock(tsdn, &bin->lock);
 	edata_t *fresh_slab = NULL;
-	void *ret = arena_bin_malloc_no_fresh_slab(tsdn, arena, bin, binind);
+	void    *ret = arena_bin_malloc_no_fresh_slab(tsdn, arena, bin, binind);
 	if (ret == NULL) {
 		malloc_mutex_unlock(tsdn, &bin->lock);
 		/******************************/
-		fresh_slab = arena_slab_alloc(tsdn, arena, binind, binshard,
-		    bin_info);
+		fresh_slab = arena_slab_alloc(
+		    tsdn, arena, binind, binshard, bin_info);
 		/********************************/
 		malloc_mutex_lock(tsdn, &bin->lock);
 		/* Retry since the lock was dropped. */
@@ -1326,8 +1323,8 @@ arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) {
 				malloc_mutex_unlock(tsdn, &bin->lock);
 				return NULL;
 			}
-			ret = arena_bin_malloc_with_fresh_slab(tsdn, arena, bin,
-			    binind, fresh_slab);
+			ret = arena_bin_malloc_with_fresh_slab(
+			    tsdn, arena, bin, binind, fresh_slab);
 			fresh_slab = NULL;
 		}
 	}
@@ -1390,7 +1387,8 @@ arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 		if (likely(alignment <= CACHELINE)) {
 			return large_malloc(tsdn, arena, usize, zero);
 		} else {
-			return large_palloc(tsdn, arena, usize, alignment, zero);
+			return large_palloc(
+			    tsdn, arena, usize, alignment, zero);
 		}
 	}
 }
@@ -1401,7 +1399,7 @@ arena_dissociate_bin_slab(arena_t *arena, edata_t *slab, bin_t *bin) {
 	if (slab == bin->slabcur) {
 		bin->slabcur = NULL;
 	} else {
-		szind_t binind = edata_szind_get(slab);
+		szind_t           binind = edata_szind_get(slab);
 		const bin_info_t *bin_info = &bin_infos[binind];
 
 		/*
@@ -1418,8 +1416,7 @@ arena_dissociate_bin_slab(arena_t *arena, edata_t *slab, bin_t *bin) {
 }
 
 static void
-arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, edata_t *slab,
-    bin_t *bin) {
+arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, edata_t *slab, bin_t *bin) {
 	assert(edata_nfree_get(slab) > 0);
 
 	/*
@@ -1455,24 +1452,24 @@ arena_dalloc_bin_slab_prepare(tsdn_t *tsdn, edata_t *slab, bin_t *bin) {
 }
 
 void
-arena_dalloc_bin_locked_handle_newly_empty(tsdn_t *tsdn, arena_t *arena,
-    edata_t *slab, bin_t *bin) {
+arena_dalloc_bin_locked_handle_newly_empty(
+    tsdn_t *tsdn, arena_t *arena, edata_t *slab, bin_t *bin) {
 	arena_dissociate_bin_slab(arena, slab, bin);
 	arena_dalloc_bin_slab_prepare(tsdn, slab, bin);
 }
 
 void
-arena_dalloc_bin_locked_handle_newly_nonempty(tsdn_t *tsdn, arena_t *arena,
-    edata_t *slab, bin_t *bin) {
+arena_dalloc_bin_locked_handle_newly_nonempty(
+    tsdn_t *tsdn, arena_t *arena, edata_t *slab, bin_t *bin) {
 	arena_bin_slabs_full_remove(arena, bin, slab);
 	arena_bin_lower_slab(tsdn, arena, slab, bin);
 }
 
 static void
 arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, edata_t *edata, void *ptr) {
-	szind_t binind = edata_szind_get(edata);
+	szind_t  binind = edata_szind_get(edata);
 	unsigned binshard = edata_binshard_get(edata);
-	bin_t *bin = arena_get_bin(arena, binind, binshard);
+	bin_t   *bin = arena_get_bin(arena, binind, binshard);
 
 	malloc_mutex_lock(tsdn, &bin->lock);
 	arena_dalloc_bin_locked_info_t info;
@@ -1515,16 +1512,15 @@ arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
 
 	size_t usize_min = sz_s2u(size);
 	size_t usize_max = sz_s2u(size + extra);
-	if (likely(oldsize <= SC_SMALL_MAXCLASS && usize_min
-	    <= SC_SMALL_MAXCLASS)) {
+	if (likely(oldsize <= SC_SMALL_MAXCLASS
+	        && usize_min <= SC_SMALL_MAXCLASS)) {
 		/*
 		 * Avoid moving the allocation if the size class can be left the
 		 * same.
 		 */
-		assert(bin_infos[sz_size2index(oldsize)].reg_size ==
-		    oldsize);
+		assert(bin_infos[sz_size2index(oldsize)].reg_size == oldsize);
 		if ((usize_max > SC_SMALL_MAXCLASS
-		    || sz_size2index(usize_max) != sz_size2index(oldsize))
+		        || sz_size2index(usize_max) != sz_size2index(oldsize))
 		    && (size > oldsize || usize_max < oldsize)) {
 			ret = true;
 			goto done;
@@ -1535,8 +1531,8 @@ arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
 		ret = false;
 	} else if (oldsize >= SC_LARGE_MINCLASS
 	    && usize_max >= SC_LARGE_MINCLASS) {
-		ret = large_ralloc_no_move(tsdn, edata, usize_min, usize_max,
-		    zero);
+		ret = large_ralloc_no_move(
+		    tsdn, edata, usize_min, usize_max, zero);
 	} else {
 		ret = true;
 	}
@@ -1558,8 +1554,8 @@ arena_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize,
 	if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) {
 		return NULL;
 	}
-	return ipalloct_explicit_slab(tsdn, usize, alignment, zero, slab,
-	    tcache, arena);
+	return ipalloct_explicit_slab(
+	    tsdn, usize, alignment, zero, slab, tcache, arena);
 }
 
 void *
@@ -1575,37 +1571,38 @@ arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
 		assert(sz_can_use_slab(usize));
 		/* Try to avoid moving the allocation. */
 		UNUSED size_t newsize;
-		if (!arena_ralloc_no_move(tsdn, ptr, oldsize, usize, 0, zero,
-		    &newsize)) {
+		if (!arena_ralloc_no_move(
+		        tsdn, ptr, oldsize, usize, 0, zero, &newsize)) {
 			hook_invoke_expand(hook_args->is_realloc
-			    ? hook_expand_realloc : hook_expand_rallocx,
+			        ? hook_expand_realloc
+			        : hook_expand_rallocx,
 			    ptr, oldsize, usize, (uintptr_t)ptr,
 			    hook_args->args);
 			return ptr;
 		}
 	}
 
-	if (oldsize >= SC_LARGE_MINCLASS
-	    && usize >= SC_LARGE_MINCLASS) {
-		return large_ralloc(tsdn, arena, ptr, usize,
-		    alignment, zero, tcache, hook_args);
+	if (oldsize >= SC_LARGE_MINCLASS && usize >= SC_LARGE_MINCLASS) {
+		return large_ralloc(tsdn, arena, ptr, usize, alignment, zero,
+		    tcache, hook_args);
 	}
 
 	/*
 	 * size and oldsize are different enough that we need to move the
 	 * object.  In that case, fall back to allocating new space and copying.
 	 */
-	void *ret = arena_ralloc_move_helper(tsdn, arena, usize, alignment,
-	    zero, slab, tcache);
+	void *ret = arena_ralloc_move_helper(
+	    tsdn, arena, usize, alignment, zero, slab, tcache);
 	if (ret == NULL) {
 		return NULL;
 	}
 
-	hook_invoke_alloc(hook_args->is_realloc
-	    ? hook_alloc_realloc : hook_alloc_rallocx, ret, (uintptr_t)ret,
-	    hook_args->args);
-	hook_invoke_dalloc(hook_args->is_realloc
-	    ? hook_dalloc_realloc : hook_dalloc_rallocx, ptr, hook_args->args);
+	hook_invoke_alloc(
+	    hook_args->is_realloc ? hook_alloc_realloc : hook_alloc_rallocx,
+	    ret, (uintptr_t)ret, hook_args->args);
+	hook_invoke_dalloc(
+	    hook_args->is_realloc ? hook_dalloc_realloc : hook_dalloc_rallocx,
+	    ptr, hook_args->args);
 
 	/*
 	 * Junk/zero-filling were already done by
@@ -1623,8 +1620,8 @@ arena_get_ehooks(arena_t *arena) {
 }
 
 extent_hooks_t *
-arena_set_extent_hooks(tsd_t *tsd, arena_t *arena,
-    extent_hooks_t *extent_hooks) {
+arena_set_extent_hooks(
+    tsd_t *tsd, arena_t *arena, extent_hooks_t *extent_hooks) {
 	background_thread_info_t *info;
 	if (have_background_thread) {
 		info = arena_background_thread_info_get(arena);
@@ -1699,11 +1696,11 @@ arena_muzzy_decay_ms_default_set(ssize_t decay_ms) {
 }
 
 bool
-arena_retain_grow_limit_get_set(tsd_t *tsd, arena_t *arena, size_t *old_limit,
-    size_t *new_limit) {
+arena_retain_grow_limit_get_set(
+    tsd_t *tsd, arena_t *arena, size_t *old_limit, size_t *new_limit) {
 	assert(opt_retain);
-	return pac_retain_grow_limit_get_set(tsd_tsdn(tsd),
-	    &arena->pa_shard.pac, old_limit, new_limit);
+	return pac_retain_grow_limit_get_set(
+	    tsd_tsdn(tsd), &arena->pa_shard.pac, old_limit, new_limit);
 }
 
 unsigned
@@ -1724,7 +1721,7 @@ arena_nthreads_dec(arena_t *arena, bool internal) {
 arena_t *
 arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) {
 	arena_t *arena;
-	base_t *base;
+	base_t  *base;
 
 	if (ind == 0) {
 		base = b0get();
@@ -1736,8 +1733,8 @@ arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) {
 		}
 	}
 
-	size_t arena_size = ALIGNMENT_CEILING(sizeof(arena_t), CACHELINE) +
-	    sizeof(bin_with_batch_t) * bin_info_nbatched_bins
+	size_t arena_size = ALIGNMENT_CEILING(sizeof(arena_t), CACHELINE)
+	    + sizeof(bin_with_batch_t) * bin_info_nbatched_bins
 	    + sizeof(bin_t) * bin_info_nunbatched_bins;
 	arena = (arena_t *)base_alloc(tsdn, base, arena_size, CACHELINE);
 	if (arena == NULL) {
@@ -1756,27 +1753,27 @@ arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) {
 		ql_new(&arena->tcache_ql);
 		ql_new(&arena->cache_bin_array_descriptor_ql);
 		if (malloc_mutex_init(&arena->tcache_ql_mtx, "tcache_ql",
-		    WITNESS_RANK_TCACHE_QL, malloc_mutex_rank_exclusive)) {
+		        WITNESS_RANK_TCACHE_QL, malloc_mutex_rank_exclusive)) {
 			goto label_error;
 		}
 	}
 
-	atomic_store_u(&arena->dss_prec, (unsigned)extent_dss_prec_get(),
-	    ATOMIC_RELAXED);
+	atomic_store_u(
+	    &arena->dss_prec, (unsigned)extent_dss_prec_get(), ATOMIC_RELAXED);
 
 	edata_list_active_init(&arena->large);
 	if (malloc_mutex_init(&arena->large_mtx, "arena_large",
-	    WITNESS_RANK_ARENA_LARGE, malloc_mutex_rank_exclusive)) {
+	        WITNESS_RANK_ARENA_LARGE, malloc_mutex_rank_exclusive)) {
 		goto label_error;
 	}
 
 	nstime_t cur_time;
 	nstime_init_update(&cur_time);
 	if (pa_shard_init(tsdn, &arena->pa_shard, &arena_pa_central_global,
-	    &arena_emap_global, base, ind, &arena->stats.pa_shard_stats,
-	    LOCKEDINT_MTX(arena->stats.mtx), &cur_time, oversize_threshold,
-	    arena_dirty_decay_ms_default_get(),
-	    arena_muzzy_decay_ms_default_get())) {
+	        &arena_emap_global, base, ind, &arena->stats.pa_shard_stats,
+	        LOCKEDINT_MTX(arena->stats.mtx), &cur_time, oversize_threshold,
+	        arena_dirty_decay_ms_default_get(),
+	        arena_muzzy_decay_ms_default_get())) {
 		goto label_error;
 	}
 
@@ -1785,7 +1782,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) {
 	for (unsigned i = 0; i < SC_NBINS; i++) {
 		for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
 			bin_t *bin = arena_get_bin(arena, i, j);
-			bool err = bin_init(bin, i);
+			bool   err = bin_init(bin, i);
 			if (err) {
 				goto label_error;
 			}
@@ -1814,8 +1811,8 @@ arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) {
 	if (opt_hpa && ehooks_are_default(base_ehooks_get(base)) && ind != 0) {
 		hpa_shard_opts_t hpa_shard_opts = opt_hpa_opts;
 		hpa_shard_opts.deferral_allowed = background_thread_enabled();
-		if (pa_shard_enable_hpa(tsdn, &arena->pa_shard,
-		    &hpa_shard_opts, &opt_hpa_sec_opts)) {
+		if (pa_shard_enable_hpa(tsdn, &arena->pa_shard, &hpa_shard_opts,
+		        &opt_hpa_sec_opts)) {
 			goto label_error;
 		}
 	}
@@ -1866,13 +1863,13 @@ arena_create_huge_arena(tsd_t *tsd, unsigned ind) {
 	 */
 	if (!background_thread_enabled()
 	    && arena_dirty_decay_ms_default_get() > 0) {
-		arena_decay_ms_set(tsd_tsdn(tsd), huge_arena,
-		    extent_state_dirty, 0);
+		arena_decay_ms_set(
+		    tsd_tsdn(tsd), huge_arena, extent_state_dirty, 0);
 	}
 	if (!background_thread_enabled()
-	    &&arena_muzzy_decay_ms_default_get() > 0) {
-		arena_decay_ms_set(tsd_tsdn(tsd), huge_arena,
-		    extent_state_muzzy, 0);
+	    && arena_muzzy_decay_ms_default_get() > 0) {
+		arena_decay_ms_set(
+		    tsd_tsdn(tsd), huge_arena, extent_state_muzzy, 0);
 	}
 
 	return huge_arena;
@@ -1900,8 +1897,8 @@ arena_init_huge(tsdn_t *tsdn, arena_t *a0) {
 	assert(huge_arena_ind == 0);
 
 	/* The threshold should be large size class. */
-	if (opt_oversize_threshold > SC_LARGE_MAXCLASS ||
-	    opt_oversize_threshold < SC_LARGE_MINCLASS) {
+	if (opt_oversize_threshold > SC_LARGE_MAXCLASS
+	    || opt_oversize_threshold < SC_LARGE_MINCLASS) {
 		opt_oversize_threshold = 0;
 		oversize_threshold = SC_LARGE_MAXCLASS + PAGE;
 		huge_enabled = false;
@@ -1917,10 +1914,11 @@ arena_init_huge(tsdn_t *tsdn, arena_t *a0) {
 		base_t *b0 = a0->base;
 		/* Make sure that b0 thp auto-switch won't happen concurrently here. */
 		malloc_mutex_lock(tsdn, &b0->mtx);
-		(&huge_arena_pac_thp)->thp_madvise = opt_huge_arena_pac_thp &&
-		    metadata_thp_enabled() && (opt_thp == thp_mode_default) &&
-		    (init_system_thp_mode == thp_mode_default);
-		(&huge_arena_pac_thp)->auto_thp_switched = b0->auto_thp_switched;
+		(&huge_arena_pac_thp)->thp_madvise = opt_huge_arena_pac_thp
+		    && metadata_thp_enabled() && (opt_thp == thp_mode_default)
+		    && (init_system_thp_mode == thp_mode_default);
+		(&huge_arena_pac_thp)->auto_thp_switched =
+		    b0->auto_thp_switched;
 		malloc_mutex_init(&(&huge_arena_pac_thp)->lock, "pac_thp",
 		    WITNESS_RANK_LEAF, malloc_mutex_rank_exclusive);
 		edata_list_active_init(&(&huge_arena_pac_thp)->thp_lazy_list);
@@ -1942,16 +1940,16 @@ arena_boot(sc_data_t *sc_data, base_t *base, bool hpa) {
 	}
 
 	JEMALLOC_SUPPRESS_WARN_ON_USAGE(
-	uint32_t cur_offset = (uint32_t)offsetof(arena_t, all_bins);
-	)
+	    uint32_t cur_offset = (uint32_t)offsetof(arena_t, all_bins);)
 	for (szind_t i = 0; i < SC_NBINS; i++) {
 		arena_bin_offsets[i] = cur_offset;
 		uint32_t bin_sz = (i < bin_info_nbatched_sizes
-		    ? sizeof(bin_with_batch_t) : sizeof(bin_t));
+		        ? sizeof(bin_with_batch_t)
+		        : sizeof(bin_t));
 		cur_offset += (uint32_t)bin_infos[i].n_shards * bin_sz;
 	}
-	return pa_central_init(&arena_pa_central_global, base, hpa,
-	    &hpa_hooks_default);
+	return pa_central_init(
+	    &arena_pa_central_global, base, hpa, &hpa_hooks_default);
 }
 
 void
diff --git a/src/background_thread.c b/src/background_thread.c
index 511febac..2eb08dd2 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -11,15 +11,15 @@ JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
 /* This option should be opt-in only. */
 #define BACKGROUND_THREAD_DEFAULT false
 /* Read-only after initialization. */
-bool opt_background_thread = BACKGROUND_THREAD_DEFAULT;
+bool   opt_background_thread = BACKGROUND_THREAD_DEFAULT;
 size_t opt_max_background_threads = MAX_BACKGROUND_THREAD_LIMIT + 1;
 
 /* Used for thread creation, termination and stats. */
 malloc_mutex_t background_thread_lock;
 /* Indicates global state.  Atomic because decay reads this w/o locking. */
 atomic_b_t background_thread_enabled_state;
-size_t n_background_threads;
-size_t max_background_threads;
+size_t     n_background_threads;
+size_t     max_background_threads;
 /* Thread info per-index. */
 background_thread_info_t *background_thread_info;
 
@@ -32,11 +32,11 @@ static int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *,
 
 static void
 pthread_create_wrapper_init(void) {
-#ifdef JEMALLOC_LAZY_LOCK
+#	ifdef JEMALLOC_LAZY_LOCK
 	if (!isthreaded) {
 		isthreaded = true;
 	}
-#endif
+#	endif
 }
 
 int
@@ -47,9 +47,9 @@ pthread_create_wrapper(pthread_t *__restrict thread, const pthread_attr_t *attr,
 	return pthread_create_fptr(thread, attr, start_routine, arg);
 }
 
-#ifdef JEMALLOC_HAVE_DLSYM
-#include <dlfcn.h>
-#endif
+#	ifdef JEMALLOC_HAVE_DLSYM
+#		include <dlfcn.h>
+#	endif
 
 static bool
 pthread_create_fptr_init(void) {
@@ -61,17 +61,18 @@ pthread_create_fptr_init(void) {
 	 * wrapper for pthread_create; and 2) application may define its own
 	 * wrapper as well (and can call malloc within the wrapper).
 	 */
-#ifdef JEMALLOC_HAVE_DLSYM
+#	ifdef JEMALLOC_HAVE_DLSYM
 	pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create");
 	if (pthread_create_fptr == NULL) {
 		pthread_create_fptr = dlsym(RTLD_DEFAULT, "pthread_create");
 	}
-#else
+#	else
 	pthread_create_fptr = NULL;
-#endif
+#	endif
 	if (pthread_create_fptr == NULL) {
 		if (config_lazy_lock) {
-			malloc_write("<jemalloc>: Error in dlsym(RTLD_NEXT, "
+			malloc_write(
+			    "<jemalloc>: Error in dlsym(RTLD_NEXT, "
 			    "\"pthread_create\")\n");
 			abort();
 		} else {
@@ -85,21 +86,24 @@ pthread_create_fptr_init(void) {
 #endif /* JEMALLOC_PTHREAD_CREATE_WRAPPER */
 
 #ifndef JEMALLOC_BACKGROUND_THREAD
-#define NOT_REACHED { not_reached(); }
-bool background_thread_create(tsd_t *tsd, unsigned arena_ind) NOT_REACHED
-bool background_threads_enable(tsd_t *tsd) NOT_REACHED
-bool background_threads_disable(tsd_t *tsd) NOT_REACHED
-bool background_thread_is_started(background_thread_info_t *info) NOT_REACHED
-void background_thread_wakeup_early(background_thread_info_t *info,
-    nstime_t *remaining_sleep) NOT_REACHED
-void background_thread_prefork0(tsdn_t *tsdn) NOT_REACHED
-void background_thread_prefork1(tsdn_t *tsdn) NOT_REACHED
-void background_thread_postfork_parent(tsdn_t *tsdn) NOT_REACHED
-void background_thread_postfork_child(tsdn_t *tsdn) NOT_REACHED
-bool background_thread_stats_read(tsdn_t *tsdn,
-    background_thread_stats_t *stats) NOT_REACHED
-void background_thread_ctl_init(tsdn_t *tsdn) NOT_REACHED
-#undef NOT_REACHED
+#	define NOT_REACHED                                                    \
+		{ not_reached(); }
+bool
+background_thread_create(tsd_t *tsd, unsigned arena_ind) NOT_REACHED
+    bool background_threads_enable(tsd_t *tsd) NOT_REACHED
+    bool background_threads_disable(tsd_t *tsd) NOT_REACHED
+    bool background_thread_is_started(
+        background_thread_info_t *info) NOT_REACHED
+    void background_thread_wakeup_early(
+        background_thread_info_t *info, nstime_t *remaining_sleep) NOT_REACHED
+    void background_thread_prefork0(tsdn_t *tsdn) NOT_REACHED
+    void background_thread_prefork1(tsdn_t *tsdn) NOT_REACHED
+    void background_thread_postfork_parent(tsdn_t *tsdn) NOT_REACHED
+    void background_thread_postfork_child(tsdn_t *tsdn) NOT_REACHED
+    bool background_thread_stats_read(
+        tsdn_t *tsdn, background_thread_stats_t *stats) NOT_REACHED
+    void background_thread_ctl_init(tsdn_t *tsdn) NOT_REACHED
+#	undef NOT_REACHED
 #else
 
 static bool background_thread_enabled_at_fork;
@@ -116,49 +120,50 @@ background_thread_info_init(tsdn_t *tsdn, background_thread_info_t *info) {
 
 static inline bool
 set_current_thread_affinity(int cpu) {
-#if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY) || defined(JEMALLOC_HAVE_PTHREAD_SETAFFINITY_NP)
-#if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
+#	if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)                           \
+	    || defined(JEMALLOC_HAVE_PTHREAD_SETAFFINITY_NP)
+#		if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
 	cpu_set_t cpuset;
-#else
-#  ifndef __NetBSD__
+#		else
+#			ifndef __NetBSD__
 	cpuset_t cpuset;
-#  else
+#			else
 	cpuset_t *cpuset;
-#  endif
-#endif
+#			endif
+#		endif
 
-#ifndef __NetBSD__
+#		ifndef __NetBSD__
 	CPU_ZERO(&cpuset);
 	CPU_SET(cpu, &cpuset);
-#else
+#		else
 	cpuset = cpuset_create();
-#endif
+#		endif
 
-#if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
+#		if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
 	return (sched_setaffinity(0, sizeof(cpu_set_t), &cpuset) != 0);
-#else
-#  ifndef __NetBSD__
-	int ret = pthread_setaffinity_np(pthread_self(), sizeof(cpuset_t),
-	    &cpuset);
-#  else
-	int ret = pthread_setaffinity_np(pthread_self(), cpuset_size(cpuset),
-	    cpuset);
+#		else
+#			ifndef __NetBSD__
+	int ret = pthread_setaffinity_np(
+	    pthread_self(), sizeof(cpuset_t), &cpuset);
+#			else
+	int ret = pthread_setaffinity_np(
+	    pthread_self(), cpuset_size(cpuset), cpuset);
 	cpuset_destroy(cpuset);
-#  endif
+#			endif
 	return ret != 0;
-#endif
-#else
-        return false;
-#endif
+#		endif
+#	else
+	return false;
+#	endif
 }
 
-#define BILLION UINT64_C(1000000000)
+#	define BILLION UINT64_C(1000000000)
 /* Minimal sleep interval 100 ms. */
-#define BACKGROUND_THREAD_MIN_INTERVAL_NS (BILLION / 10)
+#	define BACKGROUND_THREAD_MIN_INTERVAL_NS (BILLION / 10)
 
 static int
-background_thread_cond_wait(background_thread_info_t *info,
-    struct timespec *ts) {
+background_thread_cond_wait(
+    background_thread_info_t *info, struct timespec *ts) {
 	int ret;
 
 	/*
@@ -177,8 +182,8 @@ background_thread_cond_wait(background_thread_info_t *info,
 }
 
 static void
-background_thread_sleep(tsdn_t *tsdn, background_thread_info_t *info,
-    uint64_t interval) {
+background_thread_sleep(
+    tsdn_t *tsdn, background_thread_info_t *info, uint64_t interval) {
 	if (config_stats) {
 		info->tot_n_runs++;
 	}
@@ -192,21 +197,21 @@ background_thread_sleep(tsdn_t *tsdn, background_thread_info_t *info,
 
 	int ret;
 	if (interval == BACKGROUND_THREAD_INDEFINITE_SLEEP) {
-		background_thread_wakeup_time_set(tsdn, info,
-		    BACKGROUND_THREAD_INDEFINITE_SLEEP);
+		background_thread_wakeup_time_set(
+		    tsdn, info, BACKGROUND_THREAD_INDEFINITE_SLEEP);
 		ret = background_thread_cond_wait(info, NULL);
 		assert(ret == 0);
 	} else {
-		assert(interval >= BACKGROUND_THREAD_MIN_INTERVAL_NS &&
-		    interval <= BACKGROUND_THREAD_INDEFINITE_SLEEP);
+		assert(interval >= BACKGROUND_THREAD_MIN_INTERVAL_NS
+		    && interval <= BACKGROUND_THREAD_INDEFINITE_SLEEP);
 		/* We need malloc clock (can be different from tv). */
 		nstime_t next_wakeup;
 		nstime_init_update(&next_wakeup);
 		nstime_iadd(&next_wakeup, interval);
-		assert(nstime_ns(&next_wakeup) <
-		    BACKGROUND_THREAD_INDEFINITE_SLEEP);
-		background_thread_wakeup_time_set(tsdn, info,
-		    nstime_ns(&next_wakeup));
+		assert(nstime_ns(&next_wakeup)
+		    < BACKGROUND_THREAD_INDEFINITE_SLEEP);
+		background_thread_wakeup_time_set(
+		    tsdn, info, nstime_ns(&next_wakeup));
 
 		nstime_t ts_wakeup;
 		nstime_copy(&ts_wakeup, &before_sleep);
@@ -245,11 +250,11 @@ background_thread_pause_check(tsdn_t *tsdn, background_thread_info_t *info) {
 }
 
 static inline void
-background_work_sleep_once(tsdn_t *tsdn, background_thread_info_t *info,
-    unsigned ind) {
+background_work_sleep_once(
+    tsdn_t *tsdn, background_thread_info_t *info, unsigned ind) {
 	uint64_t ns_until_deferred = BACKGROUND_THREAD_DEFERRED_MAX;
 	unsigned narenas = narenas_total_get();
-	bool slept_indefinitely = background_thread_indefinite_sleep(info);
+	bool     slept_indefinitely = background_thread_indefinite_sleep(info);
 
 	for (unsigned i = ind; i < narenas; i += max_background_threads) {
 		arena_t *arena = arena_get(tsdn, i, false);
@@ -279,11 +284,10 @@ background_work_sleep_once(tsdn_t *tsdn, background_thread_info_t *info,
 	if (ns_until_deferred == BACKGROUND_THREAD_DEFERRED_MAX) {
 		sleep_ns = BACKGROUND_THREAD_INDEFINITE_SLEEP;
 	} else {
-		sleep_ns =
-		    (ns_until_deferred < BACKGROUND_THREAD_MIN_INTERVAL_NS)
+		sleep_ns = (ns_until_deferred
+		               < BACKGROUND_THREAD_MIN_INTERVAL_NS)
 		    ? BACKGROUND_THREAD_MIN_INTERVAL_NS
 		    : ns_until_deferred;
-
 	}
 
 	background_thread_sleep(tsdn, info, sleep_ns);
@@ -292,11 +296,11 @@ background_work_sleep_once(tsdn_t *tsdn, background_thread_info_t *info,
 static bool
 background_threads_disable_single(tsd_t *tsd, background_thread_info_t *info) {
 	if (info == &background_thread_info[0]) {
-		malloc_mutex_assert_owner(tsd_tsdn(tsd),
-		    &background_thread_lock);
+		malloc_mutex_assert_owner(
+		    tsd_tsdn(tsd), &background_thread_lock);
 	} else {
-		malloc_mutex_assert_not_owner(tsd_tsdn(tsd),
-		    &background_thread_lock);
+		malloc_mutex_assert_not_owner(
+		    tsd_tsdn(tsd), &background_thread_lock);
 	}
 
 	pre_reentrancy(tsd, NULL);
@@ -340,21 +344,23 @@ background_thread_create_signals_masked(pthread_t *thread,
 	sigset_t set;
 	sigfillset(&set);
 	sigset_t oldset;
-	int mask_err = pthread_sigmask(SIG_SETMASK, &set, &oldset);
+	int      mask_err = pthread_sigmask(SIG_SETMASK, &set, &oldset);
 	if (mask_err != 0) {
 		return mask_err;
 	}
-	int create_err = pthread_create_wrapper(thread, attr, start_routine,
-	    arg);
+	int create_err = pthread_create_wrapper(
+	    thread, attr, start_routine, arg);
 	/*
 	 * Restore the signal mask.  Failure to restore the signal mask here
 	 * changes program behavior.
 	 */
 	int restore_err = pthread_sigmask(SIG_SETMASK, &oldset, NULL);
 	if (restore_err != 0) {
-		malloc_printf("<jemalloc>: background thread creation "
+		malloc_printf(
+		    "<jemalloc>: background thread creation "
 		    "failed (%d), and signal mask restoration failed "
-		    "(%d)\n", create_err, restore_err);
+		    "(%d)\n",
+		    create_err, restore_err);
 		if (opt_abort) {
 			abort();
 		}
@@ -364,8 +370,8 @@ background_thread_create_signals_masked(pthread_t *thread,
 
 static bool
 check_background_thread_creation(tsd_t *tsd,
-    const size_t const_max_background_threads,
-    unsigned *n_created, bool *created_threads) {
+    const size_t const_max_background_threads, unsigned *n_created,
+    bool *created_threads) {
 	bool ret = false;
 	if (likely(*n_created == n_background_threads)) {
 		return ret;
@@ -391,7 +397,7 @@ check_background_thread_creation(tsd_t *tsd,
 
 		pre_reentrancy(tsd, NULL);
 		int err = background_thread_create_signals_masked(&info->thread,
-			/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
+		    /* NOLINTNEXTLINE(performance-no-int-to-ptr) */
 		    NULL, background_thread_entry, (void *)(uintptr_t)i);
 		post_reentrancy(tsd);
 
@@ -399,8 +405,10 @@ check_background_thread_creation(tsd_t *tsd,
 			(*n_created)++;
 			created_threads[i] = true;
 		} else {
-			malloc_printf("<jemalloc>: background thread "
-			    "creation failed (%d)\n", err);
+			malloc_printf(
+			    "<jemalloc>: background thread "
+			    "creation failed (%d)\n",
+			    err);
 			if (opt_abort) {
 				abort();
 			}
@@ -434,16 +442,17 @@ background_thread0_work(tsd_t *tsd) {
 	/* Start working, and create more threads when asked. */
 	unsigned n_created = 1;
 	while (background_thread_info[0].state != background_thread_stopped) {
-		if (background_thread_pause_check(tsd_tsdn(tsd),
-		    &background_thread_info[0])) {
+		if (background_thread_pause_check(
+		        tsd_tsdn(tsd), &background_thread_info[0])) {
 			continue;
 		}
-		if (check_background_thread_creation(tsd, const_max_background_threads,
-		    &n_created, (bool *)&created_threads)) {
+		if (check_background_thread_creation(tsd,
+		        const_max_background_threads, &n_created,
+		        (bool *)&created_threads)) {
 			continue;
 		}
-		background_work_sleep_once(tsd_tsdn(tsd),
-		    &background_thread_info[0], 0);
+		background_work_sleep_once(
+		    tsd_tsdn(tsd), &background_thread_info[0], 0);
 	}
 
 	/*
@@ -460,8 +469,8 @@ background_thread0_work(tsd_t *tsd) {
 			malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
 			if (info->state != background_thread_stopped) {
 				/* The thread was not created. */
-				assert(info->state ==
-				    background_thread_started);
+				assert(
+				    info->state == background_thread_started);
 				n_background_threads--;
 				info->state = background_thread_stopped;
 			}
@@ -477,14 +486,14 @@ background_work(tsd_t *tsd, unsigned ind) {
 	background_thread_info_t *info = &background_thread_info[ind];
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
-	background_thread_wakeup_time_set(tsd_tsdn(tsd), info,
-	    BACKGROUND_THREAD_INDEFINITE_SLEEP);
+	background_thread_wakeup_time_set(
+	    tsd_tsdn(tsd), info, BACKGROUND_THREAD_INDEFINITE_SLEEP);
 	if (ind == 0) {
 		background_thread0_work(tsd);
 	} else {
 		while (info->state != background_thread_stopped) {
-			if (background_thread_pause_check(tsd_tsdn(tsd),
-			    info)) {
+			if (background_thread_pause_check(
+			        tsd_tsdn(tsd), info)) {
 				continue;
 			}
 			background_work_sleep_once(tsd_tsdn(tsd), info, ind);
@@ -499,11 +508,11 @@ static void *
 background_thread_entry(void *ind_arg) {
 	unsigned thread_ind = (unsigned)(uintptr_t)ind_arg;
 	assert(thread_ind < max_background_threads);
-#ifdef JEMALLOC_HAVE_PTHREAD_SETNAME_NP
+#	ifdef JEMALLOC_HAVE_PTHREAD_SETNAME_NP
 	pthread_setname_np(pthread_self(), "jemalloc_bg_thd");
-#elif defined(JEMALLOC_HAVE_PTHREAD_SET_NAME_NP)
+#	elif defined(JEMALLOC_HAVE_PTHREAD_SET_NAME_NP)
 	pthread_set_name_np(pthread_self(), "jemalloc_bg_thd");
-#endif
+#	endif
 	if (opt_percpu_arena != percpu_arena_disabled) {
 		set_current_thread_affinity((int)thread_ind);
 	}
@@ -513,8 +522,8 @@ background_thread_entry(void *ind_arg) {
 	 * turn triggers another background thread creation).
 	 */
 	background_work(tsd_internal_fetch(), thread_ind);
-	assert(pthread_equal(pthread_self(),
-	    background_thread_info[thread_ind].thread));
+	assert(pthread_equal(
+	    pthread_self(), background_thread_info[thread_ind].thread));
 
 	return NULL;
 }
@@ -538,8 +547,8 @@ background_thread_create_locked(tsd_t *tsd, unsigned arena_ind) {
 
 	bool need_new_thread;
 	malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
-	need_new_thread = background_thread_enabled() &&
-	    (info->state == background_thread_stopped);
+	need_new_thread = background_thread_enabled()
+	    && (info->state == background_thread_stopped);
 	if (need_new_thread) {
 		background_thread_init(tsd, info);
 	}
@@ -564,13 +573,15 @@ background_thread_create_locked(tsd_t *tsd, unsigned arena_ind) {
 	 * background threads with the underlying pthread_create.
 	 */
 	int err = background_thread_create_signals_masked(&info->thread, NULL,
-		/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
+	    /* NOLINTNEXTLINE(performance-no-int-to-ptr) */
 	    background_thread_entry, (void *)thread_ind);
 	post_reentrancy(tsd);
 
 	if (err != 0) {
-		malloc_printf("<jemalloc>: arena 0 background thread creation "
-		    "failed (%d)\n", err);
+		malloc_printf(
+		    "<jemalloc>: arena 0 background thread creation "
+		    "failed (%d)\n",
+		    err);
 		malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
 		info->state = background_thread_stopped;
 		n_background_threads--;
@@ -612,12 +623,12 @@ background_threads_enable(tsd_t *tsd) {
 	/* Mark the threads we need to create for thread 0. */
 	unsigned narenas = narenas_total_get();
 	for (unsigned i = 1; i < narenas; i++) {
-		if (marked[i % max_background_threads] ||
-		    arena_get(tsd_tsdn(tsd), i, false) == NULL) {
+		if (marked[i % max_background_threads]
+		    || arena_get(tsd_tsdn(tsd), i, false) == NULL) {
 			continue;
 		}
-		background_thread_info_t *info = &background_thread_info[
-		    i % max_background_threads];
+		background_thread_info_t *info =
+		    &background_thread_info[i % max_background_threads];
 		malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
 		assert(info->state == background_thread_stopped);
 		background_thread_init(tsd, info);
@@ -635,8 +646,8 @@ background_threads_enable(tsd_t *tsd) {
 	for (unsigned i = 0; i < narenas; i++) {
 		arena_t *arena = arena_get(tsd_tsdn(tsd), i, false);
 		if (arena != NULL) {
-			pa_shard_set_deferral_allowed(tsd_tsdn(tsd),
-			    &arena->pa_shard, true);
+			pa_shard_set_deferral_allowed(
+			    tsd_tsdn(tsd), &arena->pa_shard, true);
 		}
 	}
 	return false;
@@ -648,8 +659,8 @@ background_threads_disable(tsd_t *tsd) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
 
 	/* Thread 0 will be responsible for terminating other threads. */
-	if (background_threads_disable_single(tsd,
-	    &background_thread_info[0])) {
+	if (background_threads_disable_single(
+	        tsd, &background_thread_info[0])) {
 		return true;
 	}
 	assert(n_background_threads == 0);
@@ -657,8 +668,8 @@ background_threads_disable(tsd_t *tsd) {
 	for (unsigned i = 0; i < narenas; i++) {
 		arena_t *arena = arena_get(tsd_tsdn(tsd), i, false);
 		if (arena != NULL) {
-			pa_shard_set_deferral_allowed(tsd_tsdn(tsd),
-			    &arena->pa_shard, false);
+			pa_shard_set_deferral_allowed(
+			    tsd_tsdn(tsd), &arena->pa_shard, false);
 		}
 	}
 
@@ -671,15 +682,15 @@ background_thread_is_started(background_thread_info_t *info) {
 }
 
 void
-background_thread_wakeup_early(background_thread_info_t *info,
-    nstime_t *remaining_sleep) {
+background_thread_wakeup_early(
+    background_thread_info_t *info, nstime_t *remaining_sleep) {
 	/*
 	 * This is an optimization to increase batching. At this point
 	 * we know that background thread wakes up soon, so the time to cache
 	 * the just freed memory is bounded and low.
 	 */
-	if (remaining_sleep != NULL && nstime_ns(remaining_sleep) <
-	    BACKGROUND_THREAD_MIN_INTERVAL_NS) {
+	if (remaining_sleep != NULL
+	    && nstime_ns(remaining_sleep) < BACKGROUND_THREAD_MIN_INTERVAL_NS) {
 		return;
 	}
 	pthread_cond_signal(&info->cond);
@@ -701,8 +712,8 @@ background_thread_prefork1(tsdn_t *tsdn) {
 void
 background_thread_postfork_parent(tsdn_t *tsdn) {
 	for (unsigned i = 0; i < max_background_threads; i++) {
-		malloc_mutex_postfork_parent(tsdn,
-		    &background_thread_info[i].mtx);
+		malloc_mutex_postfork_parent(
+		    tsdn, &background_thread_info[i].mtx);
 	}
 	malloc_mutex_postfork_parent(tsdn, &background_thread_lock);
 }
@@ -710,8 +721,8 @@ background_thread_postfork_parent(tsdn_t *tsdn) {
 void
 background_thread_postfork_child(tsdn_t *tsdn) {
 	for (unsigned i = 0; i < max_background_threads; i++) {
-		malloc_mutex_postfork_child(tsdn,
-		    &background_thread_info[i].mtx);
+		malloc_mutex_postfork_child(
+		    tsdn, &background_thread_info[i].mtx);
 	}
 	malloc_mutex_postfork_child(tsdn, &background_thread_lock);
 	if (!background_thread_enabled_at_fork) {
@@ -760,8 +771,8 @@ background_thread_stats_read(tsdn_t *tsdn, background_thread_stats_t *stats) {
 		if (info->state != background_thread_stopped) {
 			num_runs += info->tot_n_runs;
 			nstime_add(&stats->run_interval, &info->tot_sleep_time);
-			malloc_mutex_prof_max_update(tsdn,
-			    &stats->max_counter_per_bg_thd, &info->mtx);
+			malloc_mutex_prof_max_update(
+			    tsdn, &stats->max_counter_per_bg_thd, &info->mtx);
 		}
 		malloc_mutex_unlock(tsdn, &info->mtx);
 	}
@@ -774,9 +785,9 @@ background_thread_stats_read(tsdn_t *tsdn, background_thread_stats_t *stats) {
 	return false;
 }
 
-#undef BACKGROUND_THREAD_NPAGES_THRESHOLD
-#undef BILLION
-#undef BACKGROUND_THREAD_MIN_INTERVAL_NS
+#	undef BACKGROUND_THREAD_NPAGES_THRESHOLD
+#	undef BILLION
+#	undef BACKGROUND_THREAD_MIN_INTERVAL_NS
 
 /*
  * When lazy lock is enabled, we need to make sure setting isthreaded before
@@ -787,24 +798,24 @@ background_thread_stats_read(tsdn_t *tsdn, background_thread_stats_t *stats) {
 void
 background_thread_ctl_init(tsdn_t *tsdn) {
 	malloc_mutex_assert_not_owner(tsdn, &background_thread_lock);
-#ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
+#	ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
 	pthread_create_fptr_init();
 	pthread_create_wrapper_init();
-#endif
+#	endif
 }
 
 #endif /* defined(JEMALLOC_BACKGROUND_THREAD) */
 
-bool
-background_thread_boot0(void) {
+    bool background_thread_boot0(void) {
 	if (!have_background_thread && opt_background_thread) {
-		malloc_printf("<jemalloc>: option background_thread currently "
+		malloc_printf(
+		    "<jemalloc>: option background_thread currently "
 		    "supports pthread only\n");
 		return true;
 	}
 #ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
-	if ((config_lazy_lock || opt_background_thread) &&
-	    pthread_create_fptr_init()) {
+	if ((config_lazy_lock || opt_background_thread)
+	    && pthread_create_fptr_init()) {
 		return true;
 	}
 #endif
@@ -823,15 +834,15 @@ background_thread_boot1(tsdn_t *tsdn, base_t *base) {
 	max_background_threads = opt_max_background_threads;
 
 	if (malloc_mutex_init(&background_thread_lock,
-	    "background_thread_global",
-	    WITNESS_RANK_BACKGROUND_THREAD_GLOBAL,
-	    malloc_mutex_rank_exclusive)) {
+	        "background_thread_global",
+	        WITNESS_RANK_BACKGROUND_THREAD_GLOBAL,
+	        malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 
 	background_thread_info = (background_thread_info_t *)base_alloc(tsdn,
-	    base, opt_max_background_threads *
-	    sizeof(background_thread_info_t), CACHELINE);
+	    base, opt_max_background_threads * sizeof(background_thread_info_t),
+	    CACHELINE);
 	if (background_thread_info == NULL) {
 		return true;
 	}
@@ -840,8 +851,8 @@ background_thread_boot1(tsdn_t *tsdn, base_t *base) {
 		background_thread_info_t *info = &background_thread_info[i];
 		/* Thread mutex is rank_inclusive because of thread0. */
 		if (malloc_mutex_init(&info->mtx, "background_thread",
-		    WITNESS_RANK_BACKGROUND_THREAD,
-		    malloc_mutex_address_ordered)) {
+		        WITNESS_RANK_BACKGROUND_THREAD,
+		        malloc_mutex_address_ordered)) {
 			return true;
 		}
 		if (pthread_cond_init(&info->cond, NULL)) {
diff --git a/src/base.c b/src/base.c
index 52f3d1d3..c494556c 100644
--- a/src/base.c
+++ b/src/base.c
@@ -12,7 +12,7 @@
  * of metadata), since more metadata (e.g. rtree nodes) come from a0's base.
  */
 
-#define BASE_AUTO_THP_THRESHOLD    2
+#define BASE_AUTO_THP_THRESHOLD 2
 #define BASE_AUTO_THP_THRESHOLD_A0 5
 
 /******************************************************************************/
@@ -22,25 +22,21 @@ static base_t *b0;
 
 metadata_thp_mode_t opt_metadata_thp = METADATA_THP_DEFAULT;
 
-const char *const metadata_thp_mode_names[] = {
-	"disabled",
-	"auto",
-	"always"
-};
+const char *const metadata_thp_mode_names[] = {"disabled", "auto", "always"};
 
 /******************************************************************************/
 
 static inline bool
 metadata_thp_madvise(void) {
-	return (metadata_thp_enabled() &&
-	    (init_system_thp_mode == thp_mode_default));
+	return (metadata_thp_enabled()
+	    && (init_system_thp_mode == thp_mode_default));
 }
 
 static void *
 base_map(tsdn_t *tsdn, ehooks_t *ehooks, unsigned ind, size_t size) {
 	void *addr;
-	bool zero = true;
-	bool commit = true;
+	bool  zero = true;
+	bool  commit = true;
 
 	/*
 	 * Use huge page sizes and alignment when opt_metadata_thp is enabled
@@ -56,16 +52,16 @@ base_map(tsdn_t *tsdn, ehooks_t *ehooks, unsigned ind, size_t size) {
 	if (ehooks_are_default(ehooks)) {
 		addr = extent_alloc_mmap(NULL, size, alignment, &zero, &commit);
 	} else {
-		addr = ehooks_alloc(tsdn, ehooks, NULL, size, alignment, &zero,
-		    &commit);
+		addr = ehooks_alloc(
+		    tsdn, ehooks, NULL, size, alignment, &zero, &commit);
 	}
 
 	return addr;
 }
 
 static void
-base_unmap(tsdn_t *tsdn, ehooks_t *ehooks, unsigned ind, void *addr,
-    size_t size) {
+base_unmap(
+    tsdn_t *tsdn, ehooks_t *ehooks, unsigned ind, void *addr, size_t size) {
 	/*
 	 * Cascade through dalloc, decommit, purge_forced, and purge_lazy,
 	 * stopping at first success.  This cascade is performed for consistency
@@ -109,8 +105,8 @@ base_unmap(tsdn_t *tsdn, ehooks_t *ehooks, unsigned ind, void *addr,
 label_done:
 	if (metadata_thp_madvise()) {
 		/* Set NOHUGEPAGE after unmap to avoid kernel defrag. */
-		assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 &&
-		    (size & HUGEPAGE_MASK) == 0);
+		assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0
+		    && (size & HUGEPAGE_MASK) == 0);
 		pages_nohuge(addr, size);
 	}
 }
@@ -126,8 +122,8 @@ base_edata_is_reused(edata_t *edata) {
 }
 
 static void
-base_edata_init(size_t *extent_sn_next, edata_t *edata, void *addr,
-    size_t size) {
+base_edata_init(
+    size_t *extent_sn_next, edata_t *edata, void *addr, size_t size) {
 	size_t sn;
 
 	sn = *extent_sn_next;
@@ -174,9 +170,9 @@ huge_arena_auto_thp_switch(tsdn_t *tsdn, pac_thp_t *pac_thp) {
 
 	unsigned cnt = 0;
 	edata_t *edata;
-	ql_foreach(edata, &pending_list->head, ql_link_active) {
+	ql_foreach (edata, &pending_list->head, ql_link_active) {
 		assert(edata != NULL);
-		void *addr = edata_addr_get(edata);
+		void  *addr = edata_addr_get(edata);
 		size_t size = edata_size_get(edata);
 		assert(HUGEPAGE_ADDR2BASE(addr) == addr);
 		assert(HUGEPAGE_CEILING(size) == size && size != 0);
@@ -196,11 +192,11 @@ base_auto_thp_switch(tsdn_t *tsdn, base_t *base) {
 	/* Called when adding a new block. */
 	bool should_switch;
 	if (base_ind_get(base) != 0) {
-		should_switch = (base_get_num_blocks(base, true) ==
-		    BASE_AUTO_THP_THRESHOLD);
+		should_switch = (base_get_num_blocks(base, true)
+		    == BASE_AUTO_THP_THRESHOLD);
 	} else {
-		should_switch = (base_get_num_blocks(base, true) ==
-		    BASE_AUTO_THP_THRESHOLD_A0);
+		should_switch = (base_get_num_blocks(base, true)
+		    == BASE_AUTO_THP_THRESHOLD_A0);
 	}
 	if (!should_switch) {
 		return;
@@ -214,8 +210,9 @@ base_auto_thp_switch(tsdn_t *tsdn, base_t *base) {
 		assert((block->size & HUGEPAGE_MASK) == 0);
 		pages_huge(block, block->size);
 		if (config_stats) {
-			base->n_thp += HUGEPAGE_CEILING(block->size -
-			    edata_bsize_get(&block->edata)) >> LG_HUGEPAGE;
+			base->n_thp += HUGEPAGE_CEILING(block->size
+			                   - edata_bsize_get(&block->edata))
+			    >> LG_HUGEPAGE;
 		}
 		block = block->next;
 		assert(block == NULL || (base_ind_get(base) == 0));
@@ -242,20 +239,22 @@ base_auto_thp_switch(tsdn_t *tsdn, base_t *base) {
 }
 
 static void *
-base_extent_bump_alloc_helper(edata_t *edata, size_t *gap_size, size_t size,
-    size_t alignment) {
+base_extent_bump_alloc_helper(
+    edata_t *edata, size_t *gap_size, size_t size, size_t alignment) {
 	void *ret;
 
 	assert(alignment == ALIGNMENT_CEILING(alignment, QUANTUM));
 	assert(size == ALIGNMENT_CEILING(size, alignment));
 
-	*gap_size = ALIGNMENT_CEILING((uintptr_t)edata_addr_get(edata),
-	    alignment) - (uintptr_t)edata_addr_get(edata);
+	*gap_size = ALIGNMENT_CEILING(
+	                (uintptr_t)edata_addr_get(edata), alignment)
+	    - (uintptr_t)edata_addr_get(edata);
 	ret = (void *)((byte_t *)edata_addr_get(edata) + *gap_size);
 	assert(edata_bsize_get(edata) >= *gap_size + size);
-	edata_binit(edata, (void *)((byte_t *)edata_addr_get(edata) +
-	    *gap_size + size), edata_bsize_get(edata) - *gap_size - size,
-	    edata_sn_get(edata), base_edata_is_reused(edata));
+	edata_binit(edata,
+	    (void *)((byte_t *)edata_addr_get(edata) + *gap_size + size),
+	    edata_bsize_get(edata) - *gap_size - size, edata_sn_get(edata),
+	    base_edata_is_reused(edata));
 	return ret;
 }
 
@@ -312,24 +311,26 @@ base_extent_bump_alloc_post(tsdn_t *tsdn, base_t *base, edata_t *edata,
 		 * crossed by the new allocation. Adjust n_thp similarly when
 		 * metadata_thp is enabled.
 		 */
-		base->resident += PAGE_CEILING((uintptr_t)addr + size) -
-		    PAGE_CEILING((uintptr_t)addr - gap_size);
+		base->resident += PAGE_CEILING((uintptr_t)addr + size)
+		    - PAGE_CEILING((uintptr_t)addr - gap_size);
 		assert(base->allocated <= base->resident);
 		assert(base->resident <= base->mapped);
-		if (metadata_thp_madvise() && (opt_metadata_thp ==
-		    metadata_thp_always || base->auto_thp_switched)) {
+		if (metadata_thp_madvise()
+		    && (opt_metadata_thp == metadata_thp_always
+		        || base->auto_thp_switched)) {
 			base->n_thp += (HUGEPAGE_CEILING((uintptr_t)addr + size)
-			    - HUGEPAGE_CEILING((uintptr_t)addr - gap_size)) >>
-			    LG_HUGEPAGE;
+			                   - HUGEPAGE_CEILING(
+			                       (uintptr_t)addr - gap_size))
+			    >> LG_HUGEPAGE;
 			assert(base->mapped >= base->n_thp << LG_HUGEPAGE);
 		}
 	}
 }
 
 static void *
-base_extent_bump_alloc(tsdn_t *tsdn, base_t *base, edata_t *edata, size_t size,
-    size_t alignment) {
-	void *ret;
+base_extent_bump_alloc(
+    tsdn_t *tsdn, base_t *base, edata_t *edata, size_t size, size_t alignment) {
+	void  *ret;
 	size_t gap_size;
 
 	ret = base_extent_bump_alloc_helper(edata, &gap_size, size, alignment);
@@ -339,9 +340,9 @@ base_extent_bump_alloc(tsdn_t *tsdn, base_t *base, edata_t *edata, size_t size,
 
 static size_t
 base_block_size_ceil(size_t block_size) {
-	return opt_metadata_thp == metadata_thp_disabled ?
-	    ALIGNMENT_CEILING(block_size, BASE_BLOCK_MIN_ALIGN) :
-	    HUGEPAGE_CEILING(block_size);
+	return opt_metadata_thp == metadata_thp_disabled
+	    ? ALIGNMENT_CEILING(block_size, BASE_BLOCK_MIN_ALIGN)
+	    : HUGEPAGE_CEILING(block_size);
 }
 
 /*
@@ -356,8 +357,8 @@ base_block_alloc(tsdn_t *tsdn, base_t *base, ehooks_t *ehooks, unsigned ind,
 	alignment = ALIGNMENT_CEILING(alignment, QUANTUM);
 	size_t usize = ALIGNMENT_CEILING(size, alignment);
 	size_t header_size = sizeof(base_block_t);
-	size_t gap_size = ALIGNMENT_CEILING(header_size, alignment) -
-	    header_size;
+	size_t gap_size = ALIGNMENT_CEILING(header_size, alignment)
+	    - header_size;
 	/*
 	 * Create increasingly larger blocks in order to limit the total number
 	 * of disjoint virtual memory ranges.  Choose the next size in the page
@@ -365,27 +366,29 @@ base_block_alloc(tsdn_t *tsdn, base_t *base, ehooks_t *ehooks, unsigned ind,
 	 * HUGEPAGE when using metadata_thp), or a size large enough to satisfy
 	 * the requested size and alignment, whichever is larger.
 	 */
-	size_t min_block_size = base_block_size_ceil(sz_psz2u(header_size +
-	    gap_size + usize));
-	pszind_t pind_next = (*pind_last + 1 < sz_psz2ind(SC_LARGE_MAXCLASS)) ?
-	    *pind_last + 1 : *pind_last;
-	size_t next_block_size = base_block_size_ceil(sz_pind2sz(pind_next));
-	size_t block_size = (min_block_size > next_block_size) ? min_block_size
-	    : next_block_size;
-	base_block_t *block = (base_block_t *)base_map(tsdn, ehooks, ind,
-	    block_size);
+	size_t min_block_size = base_block_size_ceil(
+	    sz_psz2u(header_size + gap_size + usize));
+	pszind_t pind_next = (*pind_last + 1 < sz_psz2ind(SC_LARGE_MAXCLASS))
+	    ? *pind_last + 1
+	    : *pind_last;
+	size_t   next_block_size = base_block_size_ceil(sz_pind2sz(pind_next));
+	size_t   block_size = (min_block_size > next_block_size)
+	      ? min_block_size
+	      : next_block_size;
+	base_block_t *block = (base_block_t *)base_map(
+	    tsdn, ehooks, ind, block_size);
 	if (block == NULL) {
 		return NULL;
 	}
 
 	if (metadata_thp_madvise()) {
 		void *addr = (void *)block;
-		assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 &&
-		    (block_size & HUGEPAGE_MASK) == 0);
+		assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0
+		    && (block_size & HUGEPAGE_MASK) == 0);
 		if (opt_metadata_thp == metadata_thp_always) {
 			pages_huge(addr, block_size);
-		} else if (opt_metadata_thp == metadata_thp_auto &&
-		    base != NULL) {
+		} else if (opt_metadata_thp == metadata_thp_auto
+		    && base != NULL) {
 			/* base != NULL indicates this is not a new base. */
 			malloc_mutex_lock(tsdn, &base->mtx);
 			base_auto_thp_switch(tsdn, base);
@@ -432,12 +435,12 @@ base_extent_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
 		base->allocated += sizeof(base_block_t);
 		base->resident += PAGE_CEILING(sizeof(base_block_t));
 		base->mapped += block->size;
-		if (metadata_thp_madvise() &&
-		    !(opt_metadata_thp == metadata_thp_auto
-		      && !base->auto_thp_switched)) {
+		if (metadata_thp_madvise()
+		    && !(opt_metadata_thp == metadata_thp_auto
+		        && !base->auto_thp_switched)) {
 			assert(base->n_thp > 0);
-			base->n_thp += HUGEPAGE_CEILING(sizeof(base_block_t)) >>
-			    LG_HUGEPAGE;
+			base->n_thp += HUGEPAGE_CEILING(sizeof(base_block_t))
+			    >> LG_HUGEPAGE;
 		}
 		assert(base->allocated <= base->resident);
 		assert(base->resident <= base->mapped);
@@ -455,7 +458,7 @@ base_t *
 base_new(tsdn_t *tsdn, unsigned ind, const extent_hooks_t *extent_hooks,
     bool metadata_use_hooks) {
 	pszind_t pind_last = 0;
-	size_t extent_sn_next = 0;
+	size_t   extent_sn_next = 0;
 
 	/*
 	 * The base will contain the ehooks eventually, but it itself is
@@ -463,9 +466,10 @@ base_new(tsdn_t *tsdn, unsigned ind, const extent_hooks_t *extent_hooks,
 	 * memory, and then initialize the ehooks within the base_t.
 	 */
 	ehooks_t fake_ehooks;
-	ehooks_init(&fake_ehooks, metadata_use_hooks ?
-	    (extent_hooks_t *)extent_hooks :
-	    (extent_hooks_t *)&ehooks_default_extent_hooks, ind);
+	ehooks_init(&fake_ehooks,
+	    metadata_use_hooks ? (extent_hooks_t *)extent_hooks
+	                       : (extent_hooks_t *)&ehooks_default_extent_hooks,
+	    ind);
 
 	base_block_t *block = base_block_alloc(tsdn, NULL, &fake_ehooks, ind,
 	    &pind_last, &extent_sn_next, sizeof(base_t), QUANTUM);
@@ -473,17 +477,18 @@ base_new(tsdn_t *tsdn, unsigned ind, const extent_hooks_t *extent_hooks,
 		return NULL;
 	}
 
-	size_t gap_size;
-	size_t base_alignment = CACHELINE;
-	size_t base_size = ALIGNMENT_CEILING(sizeof(base_t), base_alignment);
-	base_t *base = (base_t *)base_extent_bump_alloc_helper(&block->edata,
-	    &gap_size, base_size, base_alignment);
+	size_t  gap_size;
+	size_t  base_alignment = CACHELINE;
+	size_t  base_size = ALIGNMENT_CEILING(sizeof(base_t), base_alignment);
+	base_t *base = (base_t *)base_extent_bump_alloc_helper(
+	    &block->edata, &gap_size, base_size, base_alignment);
 	ehooks_init(&base->ehooks, (extent_hooks_t *)extent_hooks, ind);
-	ehooks_init(&base->ehooks_base, metadata_use_hooks ?
-	    (extent_hooks_t *)extent_hooks :
-	    (extent_hooks_t *)&ehooks_default_extent_hooks, ind);
+	ehooks_init(&base->ehooks_base,
+	    metadata_use_hooks ? (extent_hooks_t *)extent_hooks
+	                       : (extent_hooks_t *)&ehooks_default_extent_hooks,
+	    ind);
 	if (malloc_mutex_init(&base->mtx, "base", WITNESS_RANK_BASE,
-	    malloc_mutex_rank_exclusive)) {
+	        malloc_mutex_rank_exclusive)) {
 		base_unmap(tsdn, &fake_ehooks, ind, block, block->size);
 		return NULL;
 	}
@@ -502,9 +507,10 @@ base_new(tsdn_t *tsdn, unsigned ind, const extent_hooks_t *extent_hooks,
 		base->allocated = sizeof(base_block_t);
 		base->resident = PAGE_CEILING(sizeof(base_block_t));
 		base->mapped = block->size;
-		base->n_thp = (opt_metadata_thp == metadata_thp_always) &&
-		    metadata_thp_madvise() ? HUGEPAGE_CEILING(sizeof(base_block_t))
-		    >> LG_HUGEPAGE : 0;
+		base->n_thp = (opt_metadata_thp == metadata_thp_always)
+		        && metadata_thp_madvise()
+		    ? HUGEPAGE_CEILING(sizeof(base_block_t)) >> LG_HUGEPAGE
+		    : 0;
 		assert(base->allocated <= base->resident);
 		assert(base->resident <= base->mapped);
 		assert(base->n_thp << LG_HUGEPAGE <= base->mapped);
@@ -512,8 +518,8 @@ base_new(tsdn_t *tsdn, unsigned ind, const extent_hooks_t *extent_hooks,
 
 	/* Locking here is only necessary because of assertions. */
 	malloc_mutex_lock(tsdn, &base->mtx);
-	base_extent_bump_alloc_post(tsdn, base, &block->edata, gap_size, base,
-	    base_size);
+	base_extent_bump_alloc_post(
+	    tsdn, base, &block->edata, gap_size, base, base_size);
 	malloc_mutex_unlock(tsdn, &base->mtx);
 
 	return base;
@@ -521,13 +527,13 @@ base_new(tsdn_t *tsdn, unsigned ind, const extent_hooks_t *extent_hooks,
 
 void
 base_delete(tsdn_t *tsdn, base_t *base) {
-	ehooks_t *ehooks = base_ehooks_get_for_metadata(base);
+	ehooks_t     *ehooks = base_ehooks_get_for_metadata(base);
 	base_block_t *next = base->blocks;
 	do {
 		base_block_t *block = next;
 		next = block->next;
-		base_unmap(tsdn, ehooks, base_ind_get(base), block,
-		    block->size);
+		base_unmap(
+		    tsdn, ehooks, base_ind_get(base), block, block->size);
 	} while (next != NULL);
 }
 
@@ -543,8 +549,8 @@ base_ehooks_get_for_metadata(base_t *base) {
 
 extent_hooks_t *
 base_extent_hooks_set(base_t *base, extent_hooks_t *extent_hooks) {
-	extent_hooks_t *old_extent_hooks =
-	    ehooks_get_extent_hooks_ptr(&base->ehooks);
+	extent_hooks_t *old_extent_hooks = ehooks_get_extent_hooks_ptr(
+	    &base->ehooks);
 	ehooks_init(&base->ehooks, extent_hooks, ehooks_ind_get(&base->ehooks));
 	return old_extent_hooks;
 }
@@ -602,9 +608,9 @@ base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
 
 edata_t *
 base_alloc_edata(tsdn_t *tsdn, base_t *base) {
-	size_t esn, usize;
-	edata_t *edata = base_alloc_impl(tsdn, base, sizeof(edata_t),
-	    EDATA_ALIGNMENT, &esn, &usize);
+	size_t   esn, usize;
+	edata_t *edata = base_alloc_impl(
+	    tsdn, base, sizeof(edata_t), EDATA_ALIGNMENT, &esn, &usize);
 	if (edata == NULL) {
 		return NULL;
 	}
@@ -618,8 +624,8 @@ base_alloc_edata(tsdn_t *tsdn, base_t *base) {
 void *
 base_alloc_rtree(tsdn_t *tsdn, base_t *base, size_t size) {
 	size_t usize;
-	void *rtree = base_alloc_impl(tsdn, base, size, CACHELINE, NULL,
-	    &usize);
+	void  *rtree = base_alloc_impl(
+            tsdn, base, size, CACHELINE, NULL, &usize);
 	if (rtree == NULL) {
 		return NULL;
 	}
@@ -632,8 +638,8 @@ base_alloc_rtree(tsdn_t *tsdn, base_t *base, size_t size) {
 static inline void
 b0_alloc_header_size(size_t *header_size, size_t *alignment) {
 	*alignment = QUANTUM;
-	*header_size = QUANTUM > sizeof(edata_t *) ? QUANTUM :
-	    sizeof(edata_t *);
+	*header_size = QUANTUM > sizeof(edata_t *) ? QUANTUM
+	                                           : sizeof(edata_t *);
 }
 
 /*
@@ -645,7 +651,7 @@ b0_alloc_header_size(size_t *header_size, size_t *alignment) {
  */
 void *
 b0_alloc_tcache_stack(tsdn_t *tsdn, size_t stack_size) {
-	base_t *base = b0get();
+	base_t  *base = b0get();
 	edata_t *edata = base_alloc_base_edata(tsdn, base);
 	if (edata == NULL) {
 		return NULL;
@@ -662,8 +668,8 @@ b0_alloc_tcache_stack(tsdn_t *tsdn, size_t stack_size) {
 	b0_alloc_header_size(&header_size, &alignment);
 
 	size_t alloc_size = sz_s2u(stack_size + header_size);
-	void *addr = base_alloc_impl(tsdn, base, alloc_size, alignment, &esn,
-	    NULL);
+	void  *addr = base_alloc_impl(
+            tsdn, base, alloc_size, alignment, &esn, NULL);
 	if (addr == NULL) {
 		edata_avail_insert(&base->edata_avail, edata);
 		return NULL;
@@ -683,8 +689,8 @@ b0_dalloc_tcache_stack(tsdn_t *tsdn, void *tcache_stack) {
 	b0_alloc_header_size(&header_size, &alignment);
 
 	edata_t *edata = *(edata_t **)((byte_t *)tcache_stack - header_size);
-	void *addr = edata_addr_get(edata);
-	size_t bsize = edata_bsize_get(edata);
+	void    *addr = edata_addr_get(edata);
+	size_t   bsize = edata_bsize_get(edata);
 	/* Marked as "reused" to avoid double counting stats. */
 	assert(base_edata_is_reused(edata));
 	assert(addr != NULL && bsize > 0);
@@ -707,7 +713,8 @@ base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated,
 	malloc_mutex_lock(tsdn, &base->mtx);
 	assert(base->allocated <= base->resident);
 	assert(base->resident <= base->mapped);
-	assert(base->edata_allocated + base->rtree_allocated <= base->allocated);
+	assert(
+	    base->edata_allocated + base->rtree_allocated <= base->allocated);
 	*allocated = base->allocated;
 	*edata_allocated = base->edata_allocated;
 	*rtree_allocated = base->rtree_allocated;
diff --git a/src/batcher.c b/src/batcher.c
index 2570b3a9..af71dae5 100644
--- a/src/batcher.c
+++ b/src/batcher.c
@@ -18,8 +18,8 @@ batcher_init(batcher_t *batcher, size_t nelems_max) {
  * Returns an index (into some user-owned array) to use for pushing, or
  * BATCHER_NO_IDX if no index is free.
  */
-size_t batcher_push_begin(tsdn_t *tsdn, batcher_t *batcher,
-    size_t elems_to_push) {
+size_t
+batcher_push_begin(tsdn_t *tsdn, batcher_t *batcher, size_t elems_to_push) {
 	assert(elems_to_push > 0);
 	size_t nelems_guess = atomic_load_zu(&batcher->nelems, ATOMIC_RELAXED);
 	if (nelems_guess + elems_to_push > batcher->nelems_max) {
@@ -37,7 +37,8 @@ size_t batcher_push_begin(tsdn_t *tsdn, batcher_t *batcher,
 	 * racing accesses of the batcher can fail fast instead of trying to
 	 * acquire a mutex only to discover that there's no space for them.
 	 */
-	atomic_store_zu(&batcher->nelems, nelems + elems_to_push, ATOMIC_RELAXED);
+	atomic_store_zu(
+	    &batcher->nelems, nelems + elems_to_push, ATOMIC_RELAXED);
 	batcher->npushes++;
 	return nelems;
 }
@@ -75,7 +76,8 @@ batcher_pop_begin(tsdn_t *tsdn, batcher_t *batcher) {
 	return nelems;
 }
 
-void batcher_pop_end(tsdn_t *tsdn, batcher_t *batcher) {
+void
+batcher_pop_end(tsdn_t *tsdn, batcher_t *batcher) {
 	assert(atomic_load_zu(&batcher->nelems, ATOMIC_RELAXED) == 0);
 	malloc_mutex_unlock(tsdn, &batcher->mtx);
 }
diff --git a/src/bin.c b/src/bin.c
index 267aa0f3..98d1da02 100644
--- a/src/bin.c
+++ b/src/bin.c
@@ -10,8 +10,8 @@
 unsigned bin_batching_test_ndalloc_slabs_max = (unsigned)-1;
 void (*bin_batching_test_after_push_hook)(size_t push_idx);
 void (*bin_batching_test_mid_pop_hook)(size_t nelems_to_pop);
-void (*bin_batching_test_after_unlock_hook)(unsigned slab_dalloc_count,
-    bool list_empty);
+void (*bin_batching_test_after_unlock_hook)(
+    unsigned slab_dalloc_count, bool list_empty);
 #endif
 
 bool
@@ -49,7 +49,7 @@ bin_shard_sizes_boot(unsigned bin_shard_sizes[SC_NBINS]) {
 bool
 bin_init(bin_t *bin, unsigned binind) {
 	if (malloc_mutex_init(&bin->lock, "bin", WITNESS_RANK_BIN,
-	    malloc_mutex_rank_exclusive)) {
+	        malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 	bin->slabcur = NULL;
@@ -60,8 +60,8 @@ bin_init(bin_t *bin, unsigned binind) {
 	}
 	if (arena_bin_has_batch(binind)) {
 		bin_with_batch_t *batched_bin = (bin_with_batch_t *)bin;
-		batcher_init(&batched_bin->remote_frees,
-		    opt_bin_info_remote_free_max);
+		batcher_init(
+		    &batched_bin->remote_frees, opt_bin_info_remote_free_max);
 	}
 	return false;
 }
diff --git a/src/bin_info.c b/src/bin_info.c
index f8a64ae3..de93418a 100644
--- a/src/bin_info.c
+++ b/src/bin_info.c
@@ -19,7 +19,7 @@ size_t opt_bin_info_remote_free_max = BIN_REMOTE_FREE_ELEMS_MAX;
 
 bin_info_t bin_infos[SC_NBINS];
 
-szind_t bin_info_nbatched_sizes;
+szind_t  bin_info_nbatched_sizes;
 unsigned bin_info_nbatched_bins;
 unsigned bin_info_nunbatched_bins;
 
@@ -28,12 +28,12 @@ bin_infos_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
     bin_info_t infos[SC_NBINS]) {
 	for (unsigned i = 0; i < SC_NBINS; i++) {
 		bin_info_t *bin_info = &infos[i];
-		sc_t *sc = &sc_data->sc[i];
+		sc_t       *sc = &sc_data->sc[i];
 		bin_info->reg_size = ((size_t)1U << sc->lg_base)
 		    + ((size_t)sc->ndelta << sc->lg_delta);
 		bin_info->slab_size = (sc->pgs << LG_PAGE);
-		bin_info->nregs =
-		    (uint32_t)(bin_info->slab_size / bin_info->reg_size);
+		bin_info->nregs = (uint32_t)(bin_info->slab_size
+		    / bin_info->reg_size);
 		bin_info->n_shards = bin_shard_sizes[i];
 		bitmap_info_t bitmap_info = BITMAP_INFO_INITIALIZER(
 		    bin_info->nregs);
diff --git a/src/bitmap.c b/src/bitmap.c
index 0ccedc5d..8ac81a67 100644
--- a/src/bitmap.c
+++ b/src/bitmap.c
@@ -10,7 +10,7 @@
 void
 bitmap_info_init(bitmap_info_t *binfo, size_t nbits) {
 	unsigned i;
-	size_t group_count;
+	size_t   group_count;
 
 	assert(nbits > 0);
 	assert(nbits <= (ZU(1) << LG_BITMAP_MAXBITS));
@@ -24,11 +24,11 @@ bitmap_info_init(bitmap_info_t *binfo, size_t nbits) {
 	group_count = BITMAP_BITS2GROUPS(nbits);
 	for (i = 1; group_count > 1; i++) {
 		assert(i < BITMAP_MAX_LEVELS);
-		binfo->levels[i].group_offset = binfo->levels[i-1].group_offset
-		    + group_count;
+		binfo->levels[i].group_offset =
+		    binfo->levels[i - 1].group_offset + group_count;
 		group_count = BITMAP_BITS2GROUPS(group_count);
 	}
-	binfo->levels[i].group_offset = binfo->levels[i-1].group_offset
+	binfo->levels[i].group_offset = binfo->levels[i - 1].group_offset
 	    + group_count;
 	assert(binfo->levels[i].group_offset <= BITMAP_GROUPS_MAX);
 	binfo->nlevels = i;
@@ -42,7 +42,7 @@ bitmap_info_ngroups(const bitmap_info_t *binfo) {
 
 void
 bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo, bool fill) {
-	size_t extra;
+	size_t   extra;
 	unsigned i;
 
 	/*
@@ -69,12 +69,13 @@ bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo, bool fill) {
 		bitmap[binfo->levels[1].group_offset - 1] >>= extra;
 	}
 	for (i = 1; i < binfo->nlevels; i++) {
-		size_t group_count = binfo->levels[i].group_offset -
-		    binfo->levels[i-1].group_offset;
-		extra = (BITMAP_GROUP_NBITS - (group_count &
-		    BITMAP_GROUP_NBITS_MASK)) & BITMAP_GROUP_NBITS_MASK;
+		size_t group_count = binfo->levels[i].group_offset
+		    - binfo->levels[i - 1].group_offset;
+		extra = (BITMAP_GROUP_NBITS
+		            - (group_count & BITMAP_GROUP_NBITS_MASK))
+		    & BITMAP_GROUP_NBITS_MASK;
 		if (extra != 0) {
-			bitmap[binfo->levels[i+1].group_offset - 1] >>= extra;
+			bitmap[binfo->levels[i + 1].group_offset - 1] >>= extra;
 		}
 	}
 }
diff --git a/src/buf_writer.c b/src/buf_writer.c
index 7c6f7940..3c298502 100644
--- a/src/buf_writer.c
+++ b/src/buf_writer.c
@@ -43,8 +43,9 @@ buf_writer_init(tsdn_t *tsdn, buf_writer_t *buf_writer, write_cb_t *write_cb,
 	if (write_cb != NULL) {
 		buf_writer->write_cb = write_cb;
 	} else {
-		buf_writer->write_cb = je_malloc_message != NULL ?
-		    je_malloc_message : wrtmessage;
+		buf_writer->write_cb = je_malloc_message != NULL
+		    ? je_malloc_message
+		    : wrtmessage;
 	}
 	buf_writer->cbopaque = cbopaque;
 	assert(buf_len >= 2);
@@ -52,8 +53,8 @@ buf_writer_init(tsdn_t *tsdn, buf_writer_t *buf_writer, write_cb_t *write_cb,
 		buf_writer->buf = buf;
 		buf_writer->internal_buf = false;
 	} else {
-		buf_writer->buf = buf_writer_allocate_internal_buf(tsdn,
-		    buf_len);
+		buf_writer->buf = buf_writer_allocate_internal_buf(
+		    tsdn, buf_len);
 		buf_writer->internal_buf = true;
 	}
 	if (buf_writer->buf != NULL) {
@@ -111,13 +112,13 @@ buf_writer_terminate(tsdn_t *tsdn, buf_writer_t *buf_writer) {
 }
 
 void
-buf_writer_pipe(buf_writer_t *buf_writer, read_cb_t *read_cb,
-    void *read_cbopaque) {
+buf_writer_pipe(
+    buf_writer_t *buf_writer, read_cb_t *read_cb, void *read_cbopaque) {
 	/*
 	 * A tiny local buffer in case the buffered writer failed to allocate
 	 * at init.
 	 */
-	static char backup_buf[16];
+	static char         backup_buf[16];
 	static buf_writer_t backup_buf_writer;
 
 	buf_writer_assert(buf_writer);
diff --git a/src/cache_bin.c b/src/cache_bin.c
index 2f5afeb9..ec677948 100644
--- a/src/cache_bin.c
+++ b/src/cache_bin.c
@@ -8,8 +8,7 @@
 const uintptr_t disabled_bin = JUNK_ADDR;
 
 void
-cache_bin_info_init(cache_bin_info_t *info,
-    cache_bin_sz_t ncached_max) {
+cache_bin_info_init(cache_bin_info_t *info, cache_bin_sz_t ncached_max) {
 	assert(ncached_max <= CACHE_BIN_NCACHED_MAX);
 	size_t stack_size = (size_t)ncached_max * sizeof(void *);
 	assert(stack_size < ((size_t)1 << (sizeof(cache_bin_sz_t) * 8)));
@@ -51,27 +50,26 @@ cache_bin_info_compute_alloc(const cache_bin_info_t *infos, szind_t ninfos,
 }
 
 void
-cache_bin_preincrement(const cache_bin_info_t *infos, szind_t ninfos, void *alloc,
-    size_t *cur_offset) {
+cache_bin_preincrement(const cache_bin_info_t *infos, szind_t ninfos,
+    void *alloc, size_t *cur_offset) {
 	if (config_debug) {
 		size_t computed_size;
 		size_t computed_alignment;
 
 		/* Pointer should be as aligned as we asked for. */
-		cache_bin_info_compute_alloc(infos, ninfos, &computed_size,
-		    &computed_alignment);
+		cache_bin_info_compute_alloc(
+		    infos, ninfos, &computed_size, &computed_alignment);
 		assert(((uintptr_t)alloc & (computed_alignment - 1)) == 0);
 	}
 
-	*(uintptr_t *)((byte_t *)alloc + *cur_offset) =
-	    cache_bin_preceding_junk;
+	*(uintptr_t *)((byte_t *)alloc
+	    + *cur_offset) = cache_bin_preceding_junk;
 	*cur_offset += sizeof(void *);
 }
 
 void
 cache_bin_postincrement(void *alloc, size_t *cur_offset) {
-	*(uintptr_t *)((byte_t *)alloc + *cur_offset) =
-	    cache_bin_trailing_junk;
+	*(uintptr_t *)((byte_t *)alloc + *cur_offset) = cache_bin_trailing_junk;
 	*cur_offset += sizeof(void *);
 }
 
@@ -83,8 +81,8 @@ cache_bin_init(cache_bin_t *bin, const cache_bin_info_t *info, void *alloc,
 	 * will access the slots toward higher addresses (for the benefit of
 	 * adjacent prefetch).
 	 */
-	void *stack_cur = (void *)((byte_t *)alloc + *cur_offset);
-	void *full_position = stack_cur;
+	void          *stack_cur = (void *)((byte_t *)alloc + *cur_offset);
+	void          *full_position = stack_cur;
 	cache_bin_sz_t bin_stack_size = info->ncached_max * sizeof(void *);
 
 	*cur_offset += bin_stack_size;
@@ -96,8 +94,8 @@ cache_bin_init(cache_bin_t *bin, const cache_bin_info_t *info, void *alloc,
 	bin->low_bits_full = (cache_bin_sz_t)(uintptr_t)full_position;
 	bin->low_bits_empty = (cache_bin_sz_t)(uintptr_t)empty_position;
 	cache_bin_info_init(&bin->bin_info, info->ncached_max);
-	cache_bin_sz_t free_spots = cache_bin_diff(bin,
-	    bin->low_bits_full, (cache_bin_sz_t)(uintptr_t)bin->stack_head);
+	cache_bin_sz_t free_spots = cache_bin_diff(bin, bin->low_bits_full,
+	    (cache_bin_sz_t)(uintptr_t)bin->stack_head);
 	assert(free_spots == bin_stack_size);
 	if (!cache_bin_disabled(bin)) {
 		assert(cache_bin_ncached_get_local(bin) == 0);
@@ -109,8 +107,8 @@ cache_bin_init(cache_bin_t *bin, const cache_bin_info_t *info, void *alloc,
 
 void
 cache_bin_init_disabled(cache_bin_t *bin, cache_bin_sz_t ncached_max) {
-	const void *fake_stack = cache_bin_disabled_bin_stack();
-	size_t fake_offset = 0;
+	const void      *fake_stack = cache_bin_disabled_bin_stack();
+	size_t           fake_offset = 0;
 	cache_bin_info_t fake_info;
 	cache_bin_info_init(&fake_info, 0);
 	cache_bin_init(bin, &fake_info, (void *)fake_stack, &fake_offset);
diff --git a/src/ckh.c b/src/ckh.c
index 8db4319c..80688162 100644
--- a/src/ckh.c
+++ b/src/ckh.c
@@ -49,8 +49,8 @@
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */
 
-static bool	ckh_grow(tsd_t *tsd, ckh_t *ckh);
-static void	ckh_shrink(tsd_t *tsd, ckh_t *ckh);
+static bool ckh_grow(tsd_t *tsd, ckh_t *ckh);
+static void ckh_shrink(tsd_t *tsd, ckh_t *ckh);
 
 /******************************************************************************/
 
@@ -60,7 +60,7 @@ static void	ckh_shrink(tsd_t *tsd, ckh_t *ckh);
  */
 static size_t
 ckh_bucket_search(ckh_t *ckh, size_t bucket, const void *key) {
-	ckhc_t *cell;
+	ckhc_t  *cell;
 	unsigned i;
 
 	for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) {
@@ -98,20 +98,20 @@ ckh_isearch(ckh_t *ckh, const void *key) {
 }
 
 static bool
-ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key,
-    const void *data) {
-	ckhc_t *cell;
+ckh_try_bucket_insert(
+    ckh_t *ckh, size_t bucket, const void *key, const void *data) {
+	ckhc_t  *cell;
 	unsigned offset, i;
 
 	/*
 	 * Cycle through the cells in the bucket, starting at a random position.
 	 * The randomness avoids worst-case search overhead as buckets fill up.
 	 */
-	offset = (unsigned)prng_lg_range_u64(&ckh->prng_state,
-	    LG_CKH_BUCKET_CELLS);
+	offset = (unsigned)prng_lg_range_u64(
+	    &ckh->prng_state, LG_CKH_BUCKET_CELLS);
 	for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) {
-		cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) +
-		    ((i + offset) & ((ZU(1) << LG_CKH_BUCKET_CELLS) - 1))];
+		cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS)
+		    + ((i + offset) & ((ZU(1) << LG_CKH_BUCKET_CELLS) - 1))];
 		if (cell->key == NULL) {
 			cell->key = key;
 			cell->data = data;
@@ -130,12 +130,12 @@ ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key,
  * eviction/relocation bucket cycle.
  */
 static bool
-ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey,
-    void const **argdata) {
+ckh_evict_reloc_insert(
+    ckh_t *ckh, size_t argbucket, void const **argkey, void const **argdata) {
 	const void *key, *data, *tkey, *tdata;
-	ckhc_t *cell;
-	size_t hashes[2], bucket, tbucket;
-	unsigned i;
+	ckhc_t     *cell;
+	size_t      hashes[2], bucket, tbucket;
+	unsigned    i;
 
 	bucket = argbucket;
 	key = *argkey;
@@ -149,15 +149,18 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey,
 		 * were an item for which both hashes indicated the same
 		 * bucket.
 		 */
-		i = (unsigned)prng_lg_range_u64(&ckh->prng_state,
-		    LG_CKH_BUCKET_CELLS);
+		i = (unsigned)prng_lg_range_u64(
+		    &ckh->prng_state, LG_CKH_BUCKET_CELLS);
 		cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i];
 		assert(cell->key != NULL);
 
 		/* Swap cell->{key,data} and {key,data} (evict). */
-		tkey = cell->key; tdata = cell->data;
-		cell->key = key; cell->data = data;
-		key = tkey; data = tdata;
+		tkey = cell->key;
+		tdata = cell->data;
+		cell->key = key;
+		cell->data = data;
+		key = tkey;
+		data = tdata;
 
 #ifdef CKH_COUNT
 		ckh->nrelocs++;
@@ -167,8 +170,8 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey,
 		ckh->hash(key, hashes);
 		tbucket = hashes[1] & ((ZU(1) << ckh->lg_curbuckets) - 1);
 		if (tbucket == bucket) {
-			tbucket = hashes[0] & ((ZU(1) << ckh->lg_curbuckets)
-			    - 1);
+			tbucket = hashes[0]
+			    & ((ZU(1) << ckh->lg_curbuckets) - 1);
 			/*
 			 * It may be that (tbucket == bucket) still, if the
 			 * item's hashes both indicate this bucket.  However,
@@ -201,8 +204,8 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey,
 }
 
 static bool
-ckh_try_insert(ckh_t *ckh, void const**argkey, void const**argdata) {
-	size_t hashes[2], bucket;
+ckh_try_insert(ckh_t *ckh, void const **argkey, void const **argdata) {
+	size_t      hashes[2], bucket;
 	const void *key = *argkey;
 	const void *data = *argdata;
 
@@ -232,7 +235,7 @@ ckh_try_insert(ckh_t *ckh, void const**argkey, void const**argdata) {
  */
 static bool
 ckh_rebuild(ckh_t *ckh, ckhc_t *aTab) {
-	size_t count, i, nins;
+	size_t      count, i, nins;
 	const void *key, *data;
 
 	count = ckh->count;
@@ -254,8 +257,8 @@ ckh_rebuild(ckh_t *ckh, ckhc_t *aTab) {
 
 static bool
 ckh_grow(tsd_t *tsd, ckh_t *ckh) {
-	bool ret;
-	ckhc_t *tab, *ttab;
+	bool     ret;
+	ckhc_t  *tab, *ttab;
 	unsigned lg_prevbuckets, lg_curcells;
 
 #ifdef CKH_COUNT
@@ -274,8 +277,7 @@ ckh_grow(tsd_t *tsd, ckh_t *ckh) {
 
 		lg_curcells++;
 		usize = sz_sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE);
-		if (unlikely(usize == 0
-		    || usize > SC_LARGE_MAXCLASS)) {
+		if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) {
 			ret = true;
 			goto label_return;
 		}
@@ -309,8 +311,8 @@ label_return:
 
 static void
 ckh_shrink(tsd_t *tsd, ckh_t *ckh) {
-	ckhc_t *tab, *ttab;
-	size_t usize;
+	ckhc_t  *tab, *ttab;
+	size_t   usize;
 	unsigned lg_prevbuckets, lg_curcells;
 
 	/*
@@ -358,8 +360,8 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh) {
 bool
 ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *ckh_hash,
     ckh_keycomp_t *keycomp) {
-	bool ret;
-	size_t mincells, usize;
+	bool     ret;
+	size_t   mincells, usize;
 	unsigned lg_mincells;
 
 	assert(minitems > 0);
@@ -386,8 +388,7 @@ ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *ckh_hash,
 	assert(LG_CKH_BUCKET_CELLS > 0);
 	mincells = ((minitems + (3 - (minitems % 3))) / 3) << 2;
 	for (lg_mincells = LG_CKH_BUCKET_CELLS;
-	    (ZU(1) << lg_mincells) < mincells;
-	    lg_mincells++) {
+	     (ZU(1) << lg_mincells) < mincells; lg_mincells++) {
 		/* Do nothing. */
 	}
 	ckh->lg_minbuckets = lg_mincells - LG_CKH_BUCKET_CELLS;
@@ -417,11 +418,12 @@ ckh_delete(tsd_t *tsd, ckh_t *ckh) {
 	assert(ckh != NULL);
 
 #ifdef CKH_VERBOSE
-	malloc_printf(
-	    "%s(%p): ngrows: %"FMTu64", nshrinks: %"FMTu64","
-	    " nshrinkfails: %"FMTu64", ninserts: %"FMTu64","
-	    " nrelocs: %"FMTu64"\n", __func__, ckh,
-	    (unsigned long long)ckh->ngrows,
+	malloc_printf("%s(%p): ngrows: %" FMTu64 ", nshrinks: %" FMTu64
+	              ","
+	              " nshrinkfails: %" FMTu64 ", ninserts: %" FMTu64
+	              ","
+	              " nrelocs: %" FMTu64 "\n",
+	    __func__, ckh, (unsigned long long)ckh->ngrows,
 	    (unsigned long long)ckh->nshrinks,
 	    (unsigned long long)ckh->nshrinkfails,
 	    (unsigned long long)ckh->ninserts,
@@ -445,8 +447,9 @@ bool
 ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data) {
 	size_t i, ncells;
 
-	for (i = *tabind, ncells = (ZU(1) << (ckh->lg_curbuckets +
-	    LG_CKH_BUCKET_CELLS)); i < ncells; i++) {
+	for (i = *tabind,
+	    ncells = (ZU(1) << (ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS));
+	     i < ncells; i++) {
 		if (ckh->tab[i].key != NULL) {
 			if (key != NULL) {
 				*key = (void *)ckh->tab[i].key;
@@ -486,8 +489,8 @@ label_return:
 }
 
 bool
-ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key,
-    void **data) {
+ckh_remove(
+    tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key, void **data) {
 	size_t cell;
 
 	assert(ckh != NULL);
@@ -505,9 +508,9 @@ ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key,
 
 		ckh->count--;
 		/* Try to halve the table if it is less than 1/4 full. */
-		if (ckh->count < (ZU(1) << (ckh->lg_curbuckets
-		    + LG_CKH_BUCKET_CELLS - 2)) && ckh->lg_curbuckets
-		    > ckh->lg_minbuckets) {
+		if (ckh->count < (ZU(1)
+		        << (ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS - 2))
+		    && ckh->lg_curbuckets > ckh->lg_minbuckets) {
 			/* Ignore error due to OOM. */
 			ckh_shrink(tsd, ckh);
 		}
@@ -554,8 +557,8 @@ ckh_string_keycomp(const void *k1, const void *k2) {
 void
 ckh_pointer_hash(const void *key, size_t r_hash[2]) {
 	union {
-		const void	*v;
-		size_t		i;
+		const void *v;
+		size_t      i;
 	} u;
 
 	assert(sizeof(u.v) == sizeof(u.i));
diff --git a/src/counter.c b/src/counter.c
index 8f1ae3af..8257a062 100644
--- a/src/counter.c
+++ b/src/counter.c
@@ -6,7 +6,7 @@
 bool
 counter_accum_init(counter_accum_t *counter, uint64_t interval) {
 	if (LOCKEDINT_MTX_INIT(counter->mtx, "counter_accum",
-	    WITNESS_RANK_COUNTER_ACCUM, malloc_mutex_rank_exclusive)) {
+	        WITNESS_RANK_COUNTER_ACCUM, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 	locked_init_u64_unsynchronized(&counter->accumbytes, 0);
diff --git a/src/ctl.c b/src/ctl.c
index 4f06363a..9e9a4b43 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -25,10 +25,10 @@
  * ctl_mtx protects the following:
  * - ctl_stats->*
  */
-static malloc_mutex_t	ctl_mtx;
-static bool		ctl_initialized;
-static ctl_stats_t	*ctl_stats;
-static ctl_arenas_t	*ctl_arenas;
+static malloc_mutex_t ctl_mtx;
+static bool           ctl_initialized;
+static ctl_stats_t   *ctl_stats;
+static ctl_arenas_t  *ctl_arenas;
 
 /******************************************************************************/
 /* Helpers for named and indexed nodes. */
@@ -53,13 +53,13 @@ ctl_indexed_node(const ctl_node_t *node) {
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */
 
-#define CTL_PROTO(n)							\
-static int	n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,	\
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen);
+#define CTL_PROTO(n)                                                           \
+	static int n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,       \
+	    void *oldp, size_t *oldlenp, void *newp, size_t newlen);
 
-#define INDEX_PROTO(n)							\
-static const ctl_named_node_t	*n##_index(tsdn_t *tsdn,		\
-    const size_t *mib, size_t miblen, size_t i);
+#define INDEX_PROTO(n)                                                         \
+	static const ctl_named_node_t *n##_index(                              \
+	    tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i);
 
 CTL_PROTO(version)
 CTL_PROTO(epoch)
@@ -374,14 +374,14 @@ CTL_PROTO(experimental_prof_recent_alloc_dump)
 CTL_PROTO(experimental_batch_alloc)
 CTL_PROTO(experimental_arenas_create_ext)
 
-#define MUTEX_STATS_CTL_PROTO_GEN(n)					\
-CTL_PROTO(stats_##n##_num_ops)						\
-CTL_PROTO(stats_##n##_num_wait)						\
-CTL_PROTO(stats_##n##_num_spin_acq)					\
-CTL_PROTO(stats_##n##_num_owner_switch)					\
-CTL_PROTO(stats_##n##_total_wait_time)					\
-CTL_PROTO(stats_##n##_max_wait_time)					\
-CTL_PROTO(stats_##n##_max_num_thds)
+#define MUTEX_STATS_CTL_PROTO_GEN(n)                                           \
+	CTL_PROTO(stats_##n##_num_ops)                                         \
+	CTL_PROTO(stats_##n##_num_wait)                                        \
+	CTL_PROTO(stats_##n##_num_spin_acq)                                    \
+	CTL_PROTO(stats_##n##_num_owner_switch)                                \
+	CTL_PROTO(stats_##n##_total_wait_time)                                 \
+	CTL_PROTO(stats_##n##_max_wait_time)                                   \
+	CTL_PROTO(stats_##n##_max_num_thds)
 
 /* Global mutexes. */
 #define OP(mtx) MUTEX_STATS_CTL_PROTO_GEN(mutexes_##mtx)
@@ -402,542 +402,448 @@ CTL_PROTO(stats_mutexes_reset)
 /******************************************************************************/
 /* mallctl tree. */
 
-#define NAME(n)	{true},	n
-#define CHILD(t, c)							\
-	sizeof(c##_node) / sizeof(ctl_##t##_node_t),			\
-	(ctl_node_t *)c##_node,						\
-	NULL
-#define CTL(c)	0, NULL, c##_ctl
+#define NAME(n) {true}, n
+#define CHILD(t, c)                                                            \
+	sizeof(c##_node) / sizeof(ctl_##t##_node_t), (ctl_node_t *)c##_node,   \
+	    NULL
+#define CTL(c) 0, NULL, c##_ctl
 
 /*
  * Only handles internal indexed nodes, since there are currently no external
  * ones.
  */
-#define INDEX(i)	{false},	i##_index
+#define INDEX(i) {false}, i##_index
 
-static const ctl_named_node_t	thread_tcache_ncached_max_node[] = {
-	{NAME("read_sizeclass"),
-	    CTL(thread_tcache_ncached_max_read_sizeclass)},
-	{NAME("write"),		CTL(thread_tcache_ncached_max_write)}
+static const ctl_named_node_t thread_tcache_ncached_max_node[] = {
+    {NAME("read_sizeclass"), CTL(thread_tcache_ncached_max_read_sizeclass)},
+    {NAME("write"), CTL(thread_tcache_ncached_max_write)}};
+
+static const ctl_named_node_t thread_tcache_node[] = {
+    {NAME("enabled"), CTL(thread_tcache_enabled)},
+    {NAME("max"), CTL(thread_tcache_max)},
+    {NAME("flush"), CTL(thread_tcache_flush)},
+    {NAME("ncached_max"), CHILD(named, thread_tcache_ncached_max)}};
+
+static const ctl_named_node_t thread_peak_node[] = {
+    {NAME("read"), CTL(thread_peak_read)},
+    {NAME("reset"), CTL(thread_peak_reset)},
 };
 
-static const ctl_named_node_t	thread_tcache_node[] = {
-	{NAME("enabled"),	CTL(thread_tcache_enabled)},
-	{NAME("max"),		CTL(thread_tcache_max)},
-	{NAME("flush"),		CTL(thread_tcache_flush)},
-	{NAME("ncached_max"),	CHILD(named, thread_tcache_ncached_max)}
-};
+static const ctl_named_node_t thread_prof_node[] = {
+    {NAME("name"), CTL(thread_prof_name)},
+    {NAME("active"), CTL(thread_prof_active)}};
 
-static const ctl_named_node_t	thread_peak_node[] = {
-	{NAME("read"),		CTL(thread_peak_read)},
-	{NAME("reset"),		CTL(thread_peak_reset)},
-};
+static const ctl_named_node_t thread_node[] = {
+    {NAME("arena"), CTL(thread_arena)},
+    {NAME("allocated"), CTL(thread_allocated)},
+    {NAME("allocatedp"), CTL(thread_allocatedp)},
+    {NAME("deallocated"), CTL(thread_deallocated)},
+    {NAME("deallocatedp"), CTL(thread_deallocatedp)},
+    {NAME("tcache"), CHILD(named, thread_tcache)},
+    {NAME("peak"), CHILD(named, thread_peak)},
+    {NAME("prof"), CHILD(named, thread_prof)},
+    {NAME("idle"), CTL(thread_idle)}};
 
-static const ctl_named_node_t	thread_prof_node[] = {
-	{NAME("name"),		CTL(thread_prof_name)},
-	{NAME("active"),	CTL(thread_prof_active)}
-};
-
-static const ctl_named_node_t	thread_node[] = {
-	{NAME("arena"),		CTL(thread_arena)},
-	{NAME("allocated"),	CTL(thread_allocated)},
-	{NAME("allocatedp"),	CTL(thread_allocatedp)},
-	{NAME("deallocated"),	CTL(thread_deallocated)},
-	{NAME("deallocatedp"),	CTL(thread_deallocatedp)},
-	{NAME("tcache"),	CHILD(named, thread_tcache)},
-	{NAME("peak"),		CHILD(named, thread_peak)},
-	{NAME("prof"),		CHILD(named, thread_prof)},
-	{NAME("idle"),		CTL(thread_idle)}
-};
-
-static const ctl_named_node_t	config_node[] = {
-	{NAME("cache_oblivious"), CTL(config_cache_oblivious)},
-	{NAME("debug"),		CTL(config_debug)},
-	{NAME("fill"),		CTL(config_fill)},
-	{NAME("lazy_lock"),	CTL(config_lazy_lock)},
-	{NAME("malloc_conf"),	CTL(config_malloc_conf)},
-	{NAME("opt_safety_checks"),	CTL(config_opt_safety_checks)},
-	{NAME("prof"),		CTL(config_prof)},
-	{NAME("prof_libgcc"),	CTL(config_prof_libgcc)},
-	{NAME("prof_libunwind"), CTL(config_prof_libunwind)},
-	{NAME("prof_frameptr"), CTL(config_prof_frameptr)},
-	{NAME("stats"),		CTL(config_stats)},
-	{NAME("utrace"),	CTL(config_utrace)},
-	{NAME("xmalloc"),	CTL(config_xmalloc)}
-};
+static const ctl_named_node_t config_node[] = {
+    {NAME("cache_oblivious"), CTL(config_cache_oblivious)},
+    {NAME("debug"), CTL(config_debug)}, {NAME("fill"), CTL(config_fill)},
+    {NAME("lazy_lock"), CTL(config_lazy_lock)},
+    {NAME("malloc_conf"), CTL(config_malloc_conf)},
+    {NAME("opt_safety_checks"), CTL(config_opt_safety_checks)},
+    {NAME("prof"), CTL(config_prof)},
+    {NAME("prof_libgcc"), CTL(config_prof_libgcc)},
+    {NAME("prof_libunwind"), CTL(config_prof_libunwind)},
+    {NAME("prof_frameptr"), CTL(config_prof_frameptr)},
+    {NAME("stats"), CTL(config_stats)}, {NAME("utrace"), CTL(config_utrace)},
+    {NAME("xmalloc"), CTL(config_xmalloc)}};
 
 static const ctl_named_node_t opt_malloc_conf_node[] = {
-	{NAME("symlink"),	CTL(opt_malloc_conf_symlink)},
-	{NAME("env_var"),	CTL(opt_malloc_conf_env_var)},
-	{NAME("global_var"),	CTL(opt_malloc_conf_global_var)},
-	{NAME("global_var_2_conf_harder"),
-	    CTL(opt_malloc_conf_global_var_2_conf_harder)}
-};
+    {NAME("symlink"), CTL(opt_malloc_conf_symlink)},
+    {NAME("env_var"), CTL(opt_malloc_conf_env_var)},
+    {NAME("global_var"), CTL(opt_malloc_conf_global_var)},
+    {NAME("global_var_2_conf_harder"),
+        CTL(opt_malloc_conf_global_var_2_conf_harder)}};
 
-static const ctl_named_node_t opt_node[] = {
-	{NAME("abort"),		CTL(opt_abort)},
-	{NAME("abort_conf"),	CTL(opt_abort_conf)},
-	{NAME("cache_oblivious"),	CTL(opt_cache_oblivious)},
-	{NAME("trust_madvise"),	CTL(opt_trust_madvise)},
-	{NAME("confirm_conf"),	CTL(opt_confirm_conf)},
-	{NAME("hpa"),		CTL(opt_hpa)},
-	{NAME("hpa_slab_max_alloc"),	CTL(opt_hpa_slab_max_alloc)},
-	{NAME("hpa_hugification_threshold"),
-		CTL(opt_hpa_hugification_threshold)},
-	{NAME("hpa_hugify_delay_ms"), CTL(opt_hpa_hugify_delay_ms)},
-	{NAME("hpa_hugify_sync"), CTL(opt_hpa_hugify_sync)},
-	{NAME("hpa_min_purge_interval_ms"), CTL(opt_hpa_min_purge_interval_ms)},
-	{NAME("experimental_hpa_max_purge_nhp"),
-		CTL(opt_experimental_hpa_max_purge_nhp)},
-	{NAME("hpa_dirty_mult"), CTL(opt_hpa_dirty_mult)},
-	{NAME("hpa_sec_nshards"),	CTL(opt_hpa_sec_nshards)},
-	{NAME("hpa_sec_max_alloc"),	CTL(opt_hpa_sec_max_alloc)},
-	{NAME("hpa_sec_max_bytes"),	CTL(opt_hpa_sec_max_bytes)},
-	{NAME("hpa_sec_bytes_after_flush"),
-		CTL(opt_hpa_sec_bytes_after_flush)},
-	{NAME("hpa_sec_batch_fill_extra"),
-		CTL(opt_hpa_sec_batch_fill_extra)},
-	{NAME("huge_arena_pac_thp"), CTL(opt_huge_arena_pac_thp)},
-	{NAME("metadata_thp"),	CTL(opt_metadata_thp)},
-	{NAME("retain"),	CTL(opt_retain)},
-	{NAME("dss"),		CTL(opt_dss)},
-	{NAME("narenas"),	CTL(opt_narenas)},
-	{NAME("percpu_arena"),	CTL(opt_percpu_arena)},
-	{NAME("oversize_threshold"),	CTL(opt_oversize_threshold)},
-	{NAME("mutex_max_spin"),	CTL(opt_mutex_max_spin)},
-	{NAME("background_thread"),	CTL(opt_background_thread)},
-	{NAME("max_background_threads"),	CTL(opt_max_background_threads)},
-	{NAME("dirty_decay_ms"), CTL(opt_dirty_decay_ms)},
-	{NAME("muzzy_decay_ms"), CTL(opt_muzzy_decay_ms)},
-	{NAME("stats_print"),	CTL(opt_stats_print)},
-	{NAME("stats_print_opts"),	CTL(opt_stats_print_opts)},
-	{NAME("stats_interval"),	CTL(opt_stats_interval)},
-	{NAME("stats_interval_opts"),	CTL(opt_stats_interval_opts)},
-	{NAME("junk"),		CTL(opt_junk)},
-	{NAME("zero"),		CTL(opt_zero)},
-	{NAME("utrace"),	CTL(opt_utrace)},
-	{NAME("xmalloc"),	CTL(opt_xmalloc)},
-	{NAME("experimental_infallible_new"),
-		CTL(opt_experimental_infallible_new)},
-	{NAME("experimental_tcache_gc"),
-		CTL(opt_experimental_tcache_gc)},
-	{NAME("max_batched_size"),	CTL(opt_max_batched_size)},
-	{NAME("remote_free_max"),	CTL(opt_remote_free_max)},
-	{NAME("remote_free_max_batch"),	CTL(opt_remote_free_max_batch)},
-	{NAME("tcache"),	CTL(opt_tcache)},
-	{NAME("tcache_max"),	CTL(opt_tcache_max)},
-	{NAME("tcache_nslots_small_min"),
-		CTL(opt_tcache_nslots_small_min)},
-	{NAME("tcache_nslots_small_max"),
-		CTL(opt_tcache_nslots_small_max)},
-	{NAME("tcache_nslots_large"),	CTL(opt_tcache_nslots_large)},
-	{NAME("lg_tcache_nslots_mul"),	CTL(opt_lg_tcache_nslots_mul)},
-	{NAME("tcache_gc_incr_bytes"),	CTL(opt_tcache_gc_incr_bytes)},
-	{NAME("tcache_gc_delay_bytes"),	CTL(opt_tcache_gc_delay_bytes)},
-	{NAME("lg_tcache_flush_small_div"),
-		CTL(opt_lg_tcache_flush_small_div)},
-	{NAME("lg_tcache_flush_large_div"),
-		CTL(opt_lg_tcache_flush_large_div)},
-	{NAME("thp"),		CTL(opt_thp)},
-	{NAME("lg_extent_max_active_fit"), CTL(opt_lg_extent_max_active_fit)},
-	{NAME("prof"),		CTL(opt_prof)},
-	{NAME("prof_prefix"),	CTL(opt_prof_prefix)},
-	{NAME("prof_active"),	CTL(opt_prof_active)},
-	{NAME("prof_thread_active_init"), CTL(opt_prof_thread_active_init)},
-	{NAME("prof_bt_max"), CTL(opt_prof_bt_max)},
-	{NAME("lg_prof_sample"), CTL(opt_lg_prof_sample)},
-	{NAME("experimental_lg_prof_threshold"), CTL(opt_experimental_lg_prof_threshold)},
-	{NAME("lg_prof_interval"), CTL(opt_lg_prof_interval)},
-	{NAME("prof_gdump"),	CTL(opt_prof_gdump)},
-	{NAME("prof_final"),	CTL(opt_prof_final)},
-	{NAME("prof_leak"),	CTL(opt_prof_leak)},
-	{NAME("prof_leak_error"),	CTL(opt_prof_leak_error)},
-	{NAME("prof_accum"),	CTL(opt_prof_accum)},
-	{NAME("prof_pid_namespace"),	CTL(opt_prof_pid_namespace)},
-	{NAME("prof_recent_alloc_max"),	CTL(opt_prof_recent_alloc_max)},
-	{NAME("prof_stats"),	CTL(opt_prof_stats)},
-	{NAME("prof_sys_thread_name"),	CTL(opt_prof_sys_thread_name)},
-	{NAME("prof_time_resolution"),	CTL(opt_prof_time_res)},
-	{NAME("lg_san_uaf_align"),	CTL(opt_lg_san_uaf_align)},
-	{NAME("zero_realloc"),	CTL(opt_zero_realloc)},
-	{NAME("debug_double_free_max_scan"),
-		CTL(opt_debug_double_free_max_scan)},
-	{NAME("disable_large_size_classes"),	CTL(opt_disable_large_size_classes)},
-	{NAME("process_madvise_max_batch"), CTL(opt_process_madvise_max_batch)},
-	{NAME("malloc_conf"),	CHILD(named, opt_malloc_conf)}
-};
+static const ctl_named_node_t opt_node[] = {{NAME("abort"), CTL(opt_abort)},
+    {NAME("abort_conf"), CTL(opt_abort_conf)},
+    {NAME("cache_oblivious"), CTL(opt_cache_oblivious)},
+    {NAME("trust_madvise"), CTL(opt_trust_madvise)},
+    {NAME("confirm_conf"), CTL(opt_confirm_conf)}, {NAME("hpa"), CTL(opt_hpa)},
+    {NAME("hpa_slab_max_alloc"), CTL(opt_hpa_slab_max_alloc)},
+    {NAME("hpa_hugification_threshold"), CTL(opt_hpa_hugification_threshold)},
+    {NAME("hpa_hugify_delay_ms"), CTL(opt_hpa_hugify_delay_ms)},
+    {NAME("hpa_hugify_sync"), CTL(opt_hpa_hugify_sync)},
+    {NAME("hpa_min_purge_interval_ms"), CTL(opt_hpa_min_purge_interval_ms)},
+    {NAME("experimental_hpa_max_purge_nhp"),
+        CTL(opt_experimental_hpa_max_purge_nhp)},
+    {NAME("hpa_dirty_mult"), CTL(opt_hpa_dirty_mult)},
+    {NAME("hpa_sec_nshards"), CTL(opt_hpa_sec_nshards)},
+    {NAME("hpa_sec_max_alloc"), CTL(opt_hpa_sec_max_alloc)},
+    {NAME("hpa_sec_max_bytes"), CTL(opt_hpa_sec_max_bytes)},
+    {NAME("hpa_sec_bytes_after_flush"), CTL(opt_hpa_sec_bytes_after_flush)},
+    {NAME("hpa_sec_batch_fill_extra"), CTL(opt_hpa_sec_batch_fill_extra)},
+    {NAME("huge_arena_pac_thp"), CTL(opt_huge_arena_pac_thp)},
+    {NAME("metadata_thp"), CTL(opt_metadata_thp)},
+    {NAME("retain"), CTL(opt_retain)}, {NAME("dss"), CTL(opt_dss)},
+    {NAME("narenas"), CTL(opt_narenas)},
+    {NAME("percpu_arena"), CTL(opt_percpu_arena)},
+    {NAME("oversize_threshold"), CTL(opt_oversize_threshold)},
+    {NAME("mutex_max_spin"), CTL(opt_mutex_max_spin)},
+    {NAME("background_thread"), CTL(opt_background_thread)},
+    {NAME("max_background_threads"), CTL(opt_max_background_threads)},
+    {NAME("dirty_decay_ms"), CTL(opt_dirty_decay_ms)},
+    {NAME("muzzy_decay_ms"), CTL(opt_muzzy_decay_ms)},
+    {NAME("stats_print"), CTL(opt_stats_print)},
+    {NAME("stats_print_opts"), CTL(opt_stats_print_opts)},
+    {NAME("stats_interval"), CTL(opt_stats_interval)},
+    {NAME("stats_interval_opts"), CTL(opt_stats_interval_opts)},
+    {NAME("junk"), CTL(opt_junk)}, {NAME("zero"), CTL(opt_zero)},
+    {NAME("utrace"), CTL(opt_utrace)}, {NAME("xmalloc"), CTL(opt_xmalloc)},
+    {NAME("experimental_infallible_new"), CTL(opt_experimental_infallible_new)},
+    {NAME("experimental_tcache_gc"), CTL(opt_experimental_tcache_gc)},
+    {NAME("max_batched_size"), CTL(opt_max_batched_size)},
+    {NAME("remote_free_max"), CTL(opt_remote_free_max)},
+    {NAME("remote_free_max_batch"), CTL(opt_remote_free_max_batch)},
+    {NAME("tcache"), CTL(opt_tcache)},
+    {NAME("tcache_max"), CTL(opt_tcache_max)},
+    {NAME("tcache_nslots_small_min"), CTL(opt_tcache_nslots_small_min)},
+    {NAME("tcache_nslots_small_max"), CTL(opt_tcache_nslots_small_max)},
+    {NAME("tcache_nslots_large"), CTL(opt_tcache_nslots_large)},
+    {NAME("lg_tcache_nslots_mul"), CTL(opt_lg_tcache_nslots_mul)},
+    {NAME("tcache_gc_incr_bytes"), CTL(opt_tcache_gc_incr_bytes)},
+    {NAME("tcache_gc_delay_bytes"), CTL(opt_tcache_gc_delay_bytes)},
+    {NAME("lg_tcache_flush_small_div"), CTL(opt_lg_tcache_flush_small_div)},
+    {NAME("lg_tcache_flush_large_div"), CTL(opt_lg_tcache_flush_large_div)},
+    {NAME("thp"), CTL(opt_thp)},
+    {NAME("lg_extent_max_active_fit"), CTL(opt_lg_extent_max_active_fit)},
+    {NAME("prof"), CTL(opt_prof)}, {NAME("prof_prefix"), CTL(opt_prof_prefix)},
+    {NAME("prof_active"), CTL(opt_prof_active)},
+    {NAME("prof_thread_active_init"), CTL(opt_prof_thread_active_init)},
+    {NAME("prof_bt_max"), CTL(opt_prof_bt_max)},
+    {NAME("lg_prof_sample"), CTL(opt_lg_prof_sample)},
+    {NAME("experimental_lg_prof_threshold"),
+        CTL(opt_experimental_lg_prof_threshold)},
+    {NAME("lg_prof_interval"), CTL(opt_lg_prof_interval)},
+    {NAME("prof_gdump"), CTL(opt_prof_gdump)},
+    {NAME("prof_final"), CTL(opt_prof_final)},
+    {NAME("prof_leak"), CTL(opt_prof_leak)},
+    {NAME("prof_leak_error"), CTL(opt_prof_leak_error)},
+    {NAME("prof_accum"), CTL(opt_prof_accum)},
+    {NAME("prof_pid_namespace"), CTL(opt_prof_pid_namespace)},
+    {NAME("prof_recent_alloc_max"), CTL(opt_prof_recent_alloc_max)},
+    {NAME("prof_stats"), CTL(opt_prof_stats)},
+    {NAME("prof_sys_thread_name"), CTL(opt_prof_sys_thread_name)},
+    {NAME("prof_time_resolution"), CTL(opt_prof_time_res)},
+    {NAME("lg_san_uaf_align"), CTL(opt_lg_san_uaf_align)},
+    {NAME("zero_realloc"), CTL(opt_zero_realloc)},
+    {NAME("debug_double_free_max_scan"), CTL(opt_debug_double_free_max_scan)},
+    {NAME("disable_large_size_classes"), CTL(opt_disable_large_size_classes)},
+    {NAME("process_madvise_max_batch"), CTL(opt_process_madvise_max_batch)},
+    {NAME("malloc_conf"), CHILD(named, opt_malloc_conf)}};
 
-static const ctl_named_node_t	tcache_node[] = {
-	{NAME("create"),	CTL(tcache_create)},
-	{NAME("flush"),		CTL(tcache_flush)},
-	{NAME("destroy"),	CTL(tcache_destroy)}
-};
+static const ctl_named_node_t tcache_node[] = {
+    {NAME("create"), CTL(tcache_create)}, {NAME("flush"), CTL(tcache_flush)},
+    {NAME("destroy"), CTL(tcache_destroy)}};
 
 static const ctl_named_node_t arena_i_node[] = {
-	{NAME("initialized"),	CTL(arena_i_initialized)},
-	{NAME("decay"),		CTL(arena_i_decay)},
-	{NAME("purge"),		CTL(arena_i_purge)},
-	{NAME("reset"),		CTL(arena_i_reset)},
-	{NAME("destroy"),	CTL(arena_i_destroy)},
-	{NAME("dss"),		CTL(arena_i_dss)},
-	/*
+    {NAME("initialized"), CTL(arena_i_initialized)},
+    {NAME("decay"), CTL(arena_i_decay)}, {NAME("purge"), CTL(arena_i_purge)},
+    {NAME("reset"), CTL(arena_i_reset)},
+    {NAME("destroy"), CTL(arena_i_destroy)}, {NAME("dss"), CTL(arena_i_dss)},
+    /*
 	 * Undocumented for now, since we anticipate an arena API in flux after
 	 * we cut the last 5-series release.
 	 */
-	{NAME("oversize_threshold"),	CTL(arena_i_oversize_threshold)},
-	{NAME("dirty_decay_ms"),	CTL(arena_i_dirty_decay_ms)},
-	{NAME("muzzy_decay_ms"),	CTL(arena_i_muzzy_decay_ms)},
-	{NAME("extent_hooks"),		CTL(arena_i_extent_hooks)},
-	{NAME("retain_grow_limit"),	CTL(arena_i_retain_grow_limit)},
-	{NAME("name"),			CTL(arena_i_name)}
-};
+    {NAME("oversize_threshold"), CTL(arena_i_oversize_threshold)},
+    {NAME("dirty_decay_ms"), CTL(arena_i_dirty_decay_ms)},
+    {NAME("muzzy_decay_ms"), CTL(arena_i_muzzy_decay_ms)},
+    {NAME("extent_hooks"), CTL(arena_i_extent_hooks)},
+    {NAME("retain_grow_limit"), CTL(arena_i_retain_grow_limit)},
+    {NAME("name"), CTL(arena_i_name)}};
 static const ctl_named_node_t super_arena_i_node[] = {
-	{NAME(""),		CHILD(named, arena_i)}
-};
+    {NAME(""), CHILD(named, arena_i)}};
 
-static const ctl_indexed_node_t arena_node[] = {
-	{INDEX(arena_i)}
-};
+static const ctl_indexed_node_t arena_node[] = {{INDEX(arena_i)}};
 
 static const ctl_named_node_t arenas_bin_i_node[] = {
-	{NAME("size"),		CTL(arenas_bin_i_size)},
-	{NAME("nregs"),		CTL(arenas_bin_i_nregs)},
-	{NAME("slab_size"),	CTL(arenas_bin_i_slab_size)},
-	{NAME("nshards"),	CTL(arenas_bin_i_nshards)}
-};
+    {NAME("size"), CTL(arenas_bin_i_size)},
+    {NAME("nregs"), CTL(arenas_bin_i_nregs)},
+    {NAME("slab_size"), CTL(arenas_bin_i_slab_size)},
+    {NAME("nshards"), CTL(arenas_bin_i_nshards)}};
 static const ctl_named_node_t super_arenas_bin_i_node[] = {
-	{NAME(""),		CHILD(named, arenas_bin_i)}
-};
+    {NAME(""), CHILD(named, arenas_bin_i)}};
 
-static const ctl_indexed_node_t arenas_bin_node[] = {
-	{INDEX(arenas_bin_i)}
-};
+static const ctl_indexed_node_t arenas_bin_node[] = {{INDEX(arenas_bin_i)}};
 
 static const ctl_named_node_t arenas_lextent_i_node[] = {
-	{NAME("size"),		CTL(arenas_lextent_i_size)}
-};
+    {NAME("size"), CTL(arenas_lextent_i_size)}};
 static const ctl_named_node_t super_arenas_lextent_i_node[] = {
-	{NAME(""),		CHILD(named, arenas_lextent_i)}
-};
+    {NAME(""), CHILD(named, arenas_lextent_i)}};
 
 static const ctl_indexed_node_t arenas_lextent_node[] = {
-	{INDEX(arenas_lextent_i)}
-};
+    {INDEX(arenas_lextent_i)}};
 
 static const ctl_named_node_t arenas_node[] = {
-	{NAME("narenas"),	CTL(arenas_narenas)},
-	{NAME("dirty_decay_ms"), CTL(arenas_dirty_decay_ms)},
-	{NAME("muzzy_decay_ms"), CTL(arenas_muzzy_decay_ms)},
-	{NAME("quantum"),	CTL(arenas_quantum)},
-	{NAME("page"),		CTL(arenas_page)},
-	{NAME("hugepage"),	CTL(arenas_hugepage)},
-	{NAME("tcache_max"),	CTL(arenas_tcache_max)},
-	{NAME("nbins"),		CTL(arenas_nbins)},
-	{NAME("nhbins"),	CTL(arenas_nhbins)},
-	{NAME("bin"),		CHILD(indexed, arenas_bin)},
-	{NAME("nlextents"),	CTL(arenas_nlextents)},
-	{NAME("lextent"),	CHILD(indexed, arenas_lextent)},
-	{NAME("create"),	CTL(arenas_create)},
-	{NAME("lookup"),	CTL(arenas_lookup)}
-};
+    {NAME("narenas"), CTL(arenas_narenas)},
+    {NAME("dirty_decay_ms"), CTL(arenas_dirty_decay_ms)},
+    {NAME("muzzy_decay_ms"), CTL(arenas_muzzy_decay_ms)},
+    {NAME("quantum"), CTL(arenas_quantum)}, {NAME("page"), CTL(arenas_page)},
+    {NAME("hugepage"), CTL(arenas_hugepage)},
+    {NAME("tcache_max"), CTL(arenas_tcache_max)},
+    {NAME("nbins"), CTL(arenas_nbins)}, {NAME("nhbins"), CTL(arenas_nhbins)},
+    {NAME("bin"), CHILD(indexed, arenas_bin)},
+    {NAME("nlextents"), CTL(arenas_nlextents)},
+    {NAME("lextent"), CHILD(indexed, arenas_lextent)},
+    {NAME("create"), CTL(arenas_create)}, {NAME("lookup"), CTL(arenas_lookup)}};
 
 static const ctl_named_node_t prof_stats_bins_i_node[] = {
-	{NAME("live"),		CTL(prof_stats_bins_i_live)},
-	{NAME("accum"),		CTL(prof_stats_bins_i_accum)}
-};
+    {NAME("live"), CTL(prof_stats_bins_i_live)},
+    {NAME("accum"), CTL(prof_stats_bins_i_accum)}};
 
 static const ctl_named_node_t super_prof_stats_bins_i_node[] = {
-	{NAME(""),		CHILD(named, prof_stats_bins_i)}
-};
+    {NAME(""), CHILD(named, prof_stats_bins_i)}};
 
 static const ctl_indexed_node_t prof_stats_bins_node[] = {
-	{INDEX(prof_stats_bins_i)}
-};
+    {INDEX(prof_stats_bins_i)}};
 
 static const ctl_named_node_t prof_stats_lextents_i_node[] = {
-	{NAME("live"),		CTL(prof_stats_lextents_i_live)},
-	{NAME("accum"),		CTL(prof_stats_lextents_i_accum)}
-};
+    {NAME("live"), CTL(prof_stats_lextents_i_live)},
+    {NAME("accum"), CTL(prof_stats_lextents_i_accum)}};
 
 static const ctl_named_node_t super_prof_stats_lextents_i_node[] = {
-	{NAME(""),		CHILD(named, prof_stats_lextents_i)}
-};
+    {NAME(""), CHILD(named, prof_stats_lextents_i)}};
 
 static const ctl_indexed_node_t prof_stats_lextents_node[] = {
-	{INDEX(prof_stats_lextents_i)}
+    {INDEX(prof_stats_lextents_i)}};
+
+static const ctl_named_node_t prof_stats_node[] = {
+    {NAME("bins"), CHILD(indexed, prof_stats_bins)},
+    {NAME("lextents"), CHILD(indexed, prof_stats_lextents)},
 };
 
-static const ctl_named_node_t	prof_stats_node[] = {
-	{NAME("bins"),		CHILD(indexed, prof_stats_bins)},
-	{NAME("lextents"),	CHILD(indexed, prof_stats_lextents)},
-};
-
-static const ctl_named_node_t	prof_node[] = {
-	{NAME("thread_active_init"), CTL(prof_thread_active_init)},
-	{NAME("active"),	CTL(prof_active)},
-	{NAME("dump"),		CTL(prof_dump)},
-	{NAME("gdump"),		CTL(prof_gdump)},
-	{NAME("prefix"),	CTL(prof_prefix)},
-	{NAME("reset"),		CTL(prof_reset)},
-	{NAME("interval"),	CTL(prof_interval)},
-	{NAME("lg_sample"),	CTL(lg_prof_sample)},
-	{NAME("log_start"),	CTL(prof_log_start)},
-	{NAME("log_stop"),	CTL(prof_log_stop)},
-	{NAME("stats"),		CHILD(named, prof_stats)}
-};
+static const ctl_named_node_t prof_node[] = {
+    {NAME("thread_active_init"), CTL(prof_thread_active_init)},
+    {NAME("active"), CTL(prof_active)}, {NAME("dump"), CTL(prof_dump)},
+    {NAME("gdump"), CTL(prof_gdump)}, {NAME("prefix"), CTL(prof_prefix)},
+    {NAME("reset"), CTL(prof_reset)}, {NAME("interval"), CTL(prof_interval)},
+    {NAME("lg_sample"), CTL(lg_prof_sample)},
+    {NAME("log_start"), CTL(prof_log_start)},
+    {NAME("log_stop"), CTL(prof_log_stop)},
+    {NAME("stats"), CHILD(named, prof_stats)}};
 
 static const ctl_named_node_t stats_arenas_i_small_node[] = {
-	{NAME("allocated"),	CTL(stats_arenas_i_small_allocated)},
-	{NAME("nmalloc"),	CTL(stats_arenas_i_small_nmalloc)},
-	{NAME("ndalloc"),	CTL(stats_arenas_i_small_ndalloc)},
-	{NAME("nrequests"),	CTL(stats_arenas_i_small_nrequests)},
-	{NAME("nfills"),	CTL(stats_arenas_i_small_nfills)},
-	{NAME("nflushes"),	CTL(stats_arenas_i_small_nflushes)}
-};
+    {NAME("allocated"), CTL(stats_arenas_i_small_allocated)},
+    {NAME("nmalloc"), CTL(stats_arenas_i_small_nmalloc)},
+    {NAME("ndalloc"), CTL(stats_arenas_i_small_ndalloc)},
+    {NAME("nrequests"), CTL(stats_arenas_i_small_nrequests)},
+    {NAME("nfills"), CTL(stats_arenas_i_small_nfills)},
+    {NAME("nflushes"), CTL(stats_arenas_i_small_nflushes)}};
 
 static const ctl_named_node_t stats_arenas_i_large_node[] = {
-	{NAME("allocated"),	CTL(stats_arenas_i_large_allocated)},
-	{NAME("nmalloc"),	CTL(stats_arenas_i_large_nmalloc)},
-	{NAME("ndalloc"),	CTL(stats_arenas_i_large_ndalloc)},
-	{NAME("nrequests"),	CTL(stats_arenas_i_large_nrequests)},
-	{NAME("nfills"),	CTL(stats_arenas_i_large_nfills)},
-	{NAME("nflushes"),	CTL(stats_arenas_i_large_nflushes)}
-};
+    {NAME("allocated"), CTL(stats_arenas_i_large_allocated)},
+    {NAME("nmalloc"), CTL(stats_arenas_i_large_nmalloc)},
+    {NAME("ndalloc"), CTL(stats_arenas_i_large_ndalloc)},
+    {NAME("nrequests"), CTL(stats_arenas_i_large_nrequests)},
+    {NAME("nfills"), CTL(stats_arenas_i_large_nfills)},
+    {NAME("nflushes"), CTL(stats_arenas_i_large_nflushes)}};
 
-#define MUTEX_PROF_DATA_NODE(prefix)					\
-static const ctl_named_node_t stats_##prefix##_node[] = {		\
-	{NAME("num_ops"),						\
-	 CTL(stats_##prefix##_num_ops)},				\
-	{NAME("num_wait"),						\
-	 CTL(stats_##prefix##_num_wait)},				\
-	{NAME("num_spin_acq"),						\
-	 CTL(stats_##prefix##_num_spin_acq)},				\
-	{NAME("num_owner_switch"),					\
-	 CTL(stats_##prefix##_num_owner_switch)},			\
-	{NAME("total_wait_time"),					\
-	 CTL(stats_##prefix##_total_wait_time)},			\
-	{NAME("max_wait_time"),						\
-	 CTL(stats_##prefix##_max_wait_time)},				\
-	{NAME("max_num_thds"),						\
-	 CTL(stats_##prefix##_max_num_thds)}				\
-	/* Note that # of current waiting thread not provided. */	\
-};
+#define MUTEX_PROF_DATA_NODE(prefix)                                                                          \
+	static const ctl_named_node_t stats_##prefix##_node[] = {                                             \
+	    {NAME("num_ops"), CTL(stats_##prefix##_num_ops)},                                                 \
+	    {NAME("num_wait"), CTL(stats_##prefix##_num_wait)},                                               \
+	    {NAME("num_spin_acq"), CTL(stats_##prefix##_num_spin_acq)},                                       \
+	    {NAME("num_owner_switch"),                                                                        \
+	        CTL(stats_##prefix##_num_owner_switch)},                                                      \
+	    {NAME("total_wait_time"), CTL(stats_##prefix##_total_wait_time)},                                 \
+	    {NAME("max_wait_time"), CTL(stats_##prefix##_max_wait_time)},                                     \
+	    {NAME("max_num_thds"),                                                                            \
+	        CTL(stats_##prefix##_max_num_thds)} /* Note that # of current waiting thread not provided. */ \
+	};
 
 MUTEX_PROF_DATA_NODE(arenas_i_bins_j_mutex)
 
 static const ctl_named_node_t stats_arenas_i_bins_j_node[] = {
-	{NAME("nmalloc"),	CTL(stats_arenas_i_bins_j_nmalloc)},
-	{NAME("ndalloc"),	CTL(stats_arenas_i_bins_j_ndalloc)},
-	{NAME("nrequests"),	CTL(stats_arenas_i_bins_j_nrequests)},
-	{NAME("curregs"),	CTL(stats_arenas_i_bins_j_curregs)},
-	{NAME("nfills"),	CTL(stats_arenas_i_bins_j_nfills)},
-	{NAME("nflushes"),	CTL(stats_arenas_i_bins_j_nflushes)},
-	{NAME("nslabs"),	CTL(stats_arenas_i_bins_j_nslabs)},
-	{NAME("nreslabs"),	CTL(stats_arenas_i_bins_j_nreslabs)},
-	{NAME("curslabs"),	CTL(stats_arenas_i_bins_j_curslabs)},
-	{NAME("nonfull_slabs"),	CTL(stats_arenas_i_bins_j_nonfull_slabs)},
-	{NAME("batch_pops"),
-		CTL(stats_arenas_i_bins_j_batch_pops)},
-	{NAME("batch_failed_pushes"),
-		CTL(stats_arenas_i_bins_j_batch_failed_pushes)},
-	{NAME("batch_pushes"),
-		CTL(stats_arenas_i_bins_j_batch_pushes)},
-	{NAME("batch_pushed_elems"),
-		CTL(stats_arenas_i_bins_j_batch_pushed_elems)},
-	{NAME("mutex"),		CHILD(named, stats_arenas_i_bins_j_mutex)}
-};
+    {NAME("nmalloc"), CTL(stats_arenas_i_bins_j_nmalloc)},
+    {NAME("ndalloc"), CTL(stats_arenas_i_bins_j_ndalloc)},
+    {NAME("nrequests"), CTL(stats_arenas_i_bins_j_nrequests)},
+    {NAME("curregs"), CTL(stats_arenas_i_bins_j_curregs)},
+    {NAME("nfills"), CTL(stats_arenas_i_bins_j_nfills)},
+    {NAME("nflushes"), CTL(stats_arenas_i_bins_j_nflushes)},
+    {NAME("nslabs"), CTL(stats_arenas_i_bins_j_nslabs)},
+    {NAME("nreslabs"), CTL(stats_arenas_i_bins_j_nreslabs)},
+    {NAME("curslabs"), CTL(stats_arenas_i_bins_j_curslabs)},
+    {NAME("nonfull_slabs"), CTL(stats_arenas_i_bins_j_nonfull_slabs)},
+    {NAME("batch_pops"), CTL(stats_arenas_i_bins_j_batch_pops)},
+    {NAME("batch_failed_pushes"),
+        CTL(stats_arenas_i_bins_j_batch_failed_pushes)},
+    {NAME("batch_pushes"), CTL(stats_arenas_i_bins_j_batch_pushes)},
+    {NAME("batch_pushed_elems"), CTL(stats_arenas_i_bins_j_batch_pushed_elems)},
+    {NAME("mutex"), CHILD(named, stats_arenas_i_bins_j_mutex)}};
 
 static const ctl_named_node_t super_stats_arenas_i_bins_j_node[] = {
-	{NAME(""),		CHILD(named, stats_arenas_i_bins_j)}
-};
+    {NAME(""), CHILD(named, stats_arenas_i_bins_j)}};
 
 static const ctl_indexed_node_t stats_arenas_i_bins_node[] = {
-	{INDEX(stats_arenas_i_bins_j)}
-};
+    {INDEX(stats_arenas_i_bins_j)}};
 
 static const ctl_named_node_t stats_arenas_i_lextents_j_node[] = {
-	{NAME("nmalloc"),	CTL(stats_arenas_i_lextents_j_nmalloc)},
-	{NAME("ndalloc"),	CTL(stats_arenas_i_lextents_j_ndalloc)},
-	{NAME("nrequests"),	CTL(stats_arenas_i_lextents_j_nrequests)},
-	{NAME("curlextents"),	CTL(stats_arenas_i_lextents_j_curlextents)}
-};
+    {NAME("nmalloc"), CTL(stats_arenas_i_lextents_j_nmalloc)},
+    {NAME("ndalloc"), CTL(stats_arenas_i_lextents_j_ndalloc)},
+    {NAME("nrequests"), CTL(stats_arenas_i_lextents_j_nrequests)},
+    {NAME("curlextents"), CTL(stats_arenas_i_lextents_j_curlextents)}};
 static const ctl_named_node_t super_stats_arenas_i_lextents_j_node[] = {
-	{NAME(""),		CHILD(named, stats_arenas_i_lextents_j)}
-};
+    {NAME(""), CHILD(named, stats_arenas_i_lextents_j)}};
 
 static const ctl_indexed_node_t stats_arenas_i_lextents_node[] = {
-	{INDEX(stats_arenas_i_lextents_j)}
-};
+    {INDEX(stats_arenas_i_lextents_j)}};
 
 static const ctl_named_node_t stats_arenas_i_extents_j_node[] = {
-	{NAME("ndirty"),	CTL(stats_arenas_i_extents_j_ndirty)},
-	{NAME("nmuzzy"),	CTL(stats_arenas_i_extents_j_nmuzzy)},
-	{NAME("nretained"),	CTL(stats_arenas_i_extents_j_nretained)},
-	{NAME("dirty_bytes"),	CTL(stats_arenas_i_extents_j_dirty_bytes)},
-	{NAME("muzzy_bytes"),	CTL(stats_arenas_i_extents_j_muzzy_bytes)},
-	{NAME("retained_bytes"), CTL(stats_arenas_i_extents_j_retained_bytes)}
-};
+    {NAME("ndirty"), CTL(stats_arenas_i_extents_j_ndirty)},
+    {NAME("nmuzzy"), CTL(stats_arenas_i_extents_j_nmuzzy)},
+    {NAME("nretained"), CTL(stats_arenas_i_extents_j_nretained)},
+    {NAME("dirty_bytes"), CTL(stats_arenas_i_extents_j_dirty_bytes)},
+    {NAME("muzzy_bytes"), CTL(stats_arenas_i_extents_j_muzzy_bytes)},
+    {NAME("retained_bytes"), CTL(stats_arenas_i_extents_j_retained_bytes)}};
 
 static const ctl_named_node_t super_stats_arenas_i_extents_j_node[] = {
-	{NAME(""),		CHILD(named, stats_arenas_i_extents_j)}
-};
+    {NAME(""), CHILD(named, stats_arenas_i_extents_j)}};
 
 static const ctl_indexed_node_t stats_arenas_i_extents_node[] = {
-	{INDEX(stats_arenas_i_extents_j)}
-};
+    {INDEX(stats_arenas_i_extents_j)}};
 
-#define OP(mtx)  MUTEX_PROF_DATA_NODE(arenas_i_mutexes_##mtx)
+#define OP(mtx) MUTEX_PROF_DATA_NODE(arenas_i_mutexes_##mtx)
 MUTEX_PROF_ARENA_MUTEXES
 #undef OP
 
 static const ctl_named_node_t stats_arenas_i_mutexes_node[] = {
 #define OP(mtx) {NAME(#mtx), CHILD(named, stats_arenas_i_mutexes_##mtx)},
-MUTEX_PROF_ARENA_MUTEXES
+    MUTEX_PROF_ARENA_MUTEXES
 #undef OP
 };
 
 static const ctl_named_node_t stats_arenas_i_hpa_shard_slabs_node[] = {
-	{NAME("npageslabs_nonhuge"),
-		CTL(stats_arenas_i_hpa_shard_slabs_npageslabs_nonhuge)},
-	{NAME("npageslabs_huge"),
-		CTL(stats_arenas_i_hpa_shard_slabs_npageslabs_huge)},
-	{NAME("nactive_nonhuge"),
-		CTL(stats_arenas_i_hpa_shard_slabs_nactive_nonhuge)},
-	{NAME("nactive_huge"),
-		CTL(stats_arenas_i_hpa_shard_slabs_nactive_huge)},
-	{NAME("ndirty_nonhuge"),
-		CTL(stats_arenas_i_hpa_shard_slabs_ndirty_nonhuge)},
-	{NAME("ndirty_huge"),
-		CTL(stats_arenas_i_hpa_shard_slabs_ndirty_huge)}
-};
+    {NAME("npageslabs_nonhuge"),
+        CTL(stats_arenas_i_hpa_shard_slabs_npageslabs_nonhuge)},
+    {NAME("npageslabs_huge"),
+        CTL(stats_arenas_i_hpa_shard_slabs_npageslabs_huge)},
+    {NAME("nactive_nonhuge"),
+        CTL(stats_arenas_i_hpa_shard_slabs_nactive_nonhuge)},
+    {NAME("nactive_huge"), CTL(stats_arenas_i_hpa_shard_slabs_nactive_huge)},
+    {NAME("ndirty_nonhuge"),
+        CTL(stats_arenas_i_hpa_shard_slabs_ndirty_nonhuge)},
+    {NAME("ndirty_huge"), CTL(stats_arenas_i_hpa_shard_slabs_ndirty_huge)}};
 
 static const ctl_named_node_t stats_arenas_i_hpa_shard_full_slabs_node[] = {
-	{NAME("npageslabs_nonhuge"),
-		CTL(stats_arenas_i_hpa_shard_full_slabs_npageslabs_nonhuge)},
-	{NAME("npageslabs_huge"),
-		CTL(stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge)},
-	{NAME("nactive_nonhuge"),
-		CTL(stats_arenas_i_hpa_shard_full_slabs_nactive_nonhuge)},
-	{NAME("nactive_huge"),
-		CTL(stats_arenas_i_hpa_shard_full_slabs_nactive_huge)},
-	{NAME("ndirty_nonhuge"),
-		CTL(stats_arenas_i_hpa_shard_full_slabs_ndirty_nonhuge)},
-	{NAME("ndirty_huge"),
-		CTL(stats_arenas_i_hpa_shard_full_slabs_ndirty_huge)}
-};
+    {NAME("npageslabs_nonhuge"),
+        CTL(stats_arenas_i_hpa_shard_full_slabs_npageslabs_nonhuge)},
+    {NAME("npageslabs_huge"),
+        CTL(stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge)},
+    {NAME("nactive_nonhuge"),
+        CTL(stats_arenas_i_hpa_shard_full_slabs_nactive_nonhuge)},
+    {NAME("nactive_huge"),
+        CTL(stats_arenas_i_hpa_shard_full_slabs_nactive_huge)},
+    {NAME("ndirty_nonhuge"),
+        CTL(stats_arenas_i_hpa_shard_full_slabs_ndirty_nonhuge)},
+    {NAME("ndirty_huge"),
+        CTL(stats_arenas_i_hpa_shard_full_slabs_ndirty_huge)}};
 
 static const ctl_named_node_t stats_arenas_i_hpa_shard_empty_slabs_node[] = {
-	{NAME("npageslabs_nonhuge"),
-		CTL(stats_arenas_i_hpa_shard_empty_slabs_npageslabs_nonhuge)},
-	{NAME("npageslabs_huge"),
-		CTL(stats_arenas_i_hpa_shard_empty_slabs_npageslabs_huge)},
-	{NAME("nactive_nonhuge"),
-		CTL(stats_arenas_i_hpa_shard_empty_slabs_nactive_nonhuge)},
-	{NAME("nactive_huge"),
-		CTL(stats_arenas_i_hpa_shard_empty_slabs_nactive_huge)},
-	{NAME("ndirty_nonhuge"),
-		CTL(stats_arenas_i_hpa_shard_empty_slabs_ndirty_nonhuge)},
-	{NAME("ndirty_huge"),
-		CTL(stats_arenas_i_hpa_shard_empty_slabs_ndirty_huge)}
-};
+    {NAME("npageslabs_nonhuge"),
+        CTL(stats_arenas_i_hpa_shard_empty_slabs_npageslabs_nonhuge)},
+    {NAME("npageslabs_huge"),
+        CTL(stats_arenas_i_hpa_shard_empty_slabs_npageslabs_huge)},
+    {NAME("nactive_nonhuge"),
+        CTL(stats_arenas_i_hpa_shard_empty_slabs_nactive_nonhuge)},
+    {NAME("nactive_huge"),
+        CTL(stats_arenas_i_hpa_shard_empty_slabs_nactive_huge)},
+    {NAME("ndirty_nonhuge"),
+        CTL(stats_arenas_i_hpa_shard_empty_slabs_ndirty_nonhuge)},
+    {NAME("ndirty_huge"),
+        CTL(stats_arenas_i_hpa_shard_empty_slabs_ndirty_huge)}};
 
-static const ctl_named_node_t stats_arenas_i_hpa_shard_nonfull_slabs_j_node[] = {
-	{NAME("npageslabs_nonhuge"),
-		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_nonhuge)},
-	{NAME("npageslabs_huge"),
-		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_huge)},
-	{NAME("nactive_nonhuge"),
-		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_nonhuge)},
-	{NAME("nactive_huge"),
-		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_huge)},
-	{NAME("ndirty_nonhuge"),
-		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_ndirty_nonhuge)},
-	{NAME("ndirty_huge"),
-		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_ndirty_huge)}
-};
+static const ctl_named_node_t stats_arenas_i_hpa_shard_nonfull_slabs_j_node[] =
+    {{NAME("npageslabs_nonhuge"),
+         CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_nonhuge)},
+        {NAME("npageslabs_huge"),
+            CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_huge)},
+        {NAME("nactive_nonhuge"),
+            CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_nonhuge)},
+        {NAME("nactive_huge"),
+            CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_huge)},
+        {NAME("ndirty_nonhuge"),
+            CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_ndirty_nonhuge)},
+        {NAME("ndirty_huge"),
+            CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_ndirty_huge)}};
 
-static const ctl_named_node_t super_stats_arenas_i_hpa_shard_nonfull_slabs_j_node[] = {
-	{NAME(""),
-		CHILD(named, stats_arenas_i_hpa_shard_nonfull_slabs_j)}
-};
+static const ctl_named_node_t
+    super_stats_arenas_i_hpa_shard_nonfull_slabs_j_node[] = {
+        {NAME(""), CHILD(named, stats_arenas_i_hpa_shard_nonfull_slabs_j)}};
 
 static const ctl_indexed_node_t stats_arenas_i_hpa_shard_nonfull_slabs_node[] =
-{
-	{INDEX(stats_arenas_i_hpa_shard_nonfull_slabs_j)}
-};
+    {{INDEX(stats_arenas_i_hpa_shard_nonfull_slabs_j)}};
 
 static const ctl_named_node_t stats_arenas_i_hpa_shard_node[] = {
-	{NAME("npageslabs"),	CTL(stats_arenas_i_hpa_shard_npageslabs)},
-	{NAME("nactive"),	CTL(stats_arenas_i_hpa_shard_nactive)},
-	{NAME("ndirty"),	CTL(stats_arenas_i_hpa_shard_ndirty)},
+    {NAME("npageslabs"), CTL(stats_arenas_i_hpa_shard_npageslabs)},
+    {NAME("nactive"), CTL(stats_arenas_i_hpa_shard_nactive)},
+    {NAME("ndirty"), CTL(stats_arenas_i_hpa_shard_ndirty)},
 
-	{NAME("slabs"),	CHILD(named, stats_arenas_i_hpa_shard_slabs)},
+    {NAME("slabs"), CHILD(named, stats_arenas_i_hpa_shard_slabs)},
 
-	{NAME("npurge_passes"),	CTL(stats_arenas_i_hpa_shard_npurge_passes)},
-	{NAME("npurges"),	CTL(stats_arenas_i_hpa_shard_npurges)},
-	{NAME("nhugifies"),	CTL(stats_arenas_i_hpa_shard_nhugifies)},
-	{NAME("nhugify_failures"),
-	    CTL(stats_arenas_i_hpa_shard_nhugify_failures)},
-	{NAME("ndehugifies"),	CTL(stats_arenas_i_hpa_shard_ndehugifies)},
+    {NAME("npurge_passes"), CTL(stats_arenas_i_hpa_shard_npurge_passes)},
+    {NAME("npurges"), CTL(stats_arenas_i_hpa_shard_npurges)},
+    {NAME("nhugifies"), CTL(stats_arenas_i_hpa_shard_nhugifies)},
+    {NAME("nhugify_failures"), CTL(stats_arenas_i_hpa_shard_nhugify_failures)},
+    {NAME("ndehugifies"), CTL(stats_arenas_i_hpa_shard_ndehugifies)},
 
-	{NAME("full_slabs"),	CHILD(named,
-	    stats_arenas_i_hpa_shard_full_slabs)},
-	{NAME("empty_slabs"),	CHILD(named,
-	    stats_arenas_i_hpa_shard_empty_slabs)},
-	{NAME("nonfull_slabs"),	CHILD(indexed,
-	    stats_arenas_i_hpa_shard_nonfull_slabs)}
-};
+    {NAME("full_slabs"), CHILD(named, stats_arenas_i_hpa_shard_full_slabs)},
+    {NAME("empty_slabs"), CHILD(named, stats_arenas_i_hpa_shard_empty_slabs)},
+    {NAME("nonfull_slabs"),
+        CHILD(indexed, stats_arenas_i_hpa_shard_nonfull_slabs)}};
 
 static const ctl_named_node_t stats_arenas_i_node[] = {
-	{NAME("nthreads"),	CTL(stats_arenas_i_nthreads)},
-	{NAME("uptime"),	CTL(stats_arenas_i_uptime)},
-	{NAME("dss"),		CTL(stats_arenas_i_dss)},
-	{NAME("dirty_decay_ms"), CTL(stats_arenas_i_dirty_decay_ms)},
-	{NAME("muzzy_decay_ms"), CTL(stats_arenas_i_muzzy_decay_ms)},
-	{NAME("pactive"),	CTL(stats_arenas_i_pactive)},
-	{NAME("pdirty"),	CTL(stats_arenas_i_pdirty)},
-	{NAME("pmuzzy"),	CTL(stats_arenas_i_pmuzzy)},
-	{NAME("mapped"),	CTL(stats_arenas_i_mapped)},
-	{NAME("retained"),	CTL(stats_arenas_i_retained)},
-	{NAME("extent_avail"),	CTL(stats_arenas_i_extent_avail)},
-	{NAME("dirty_npurge"),	CTL(stats_arenas_i_dirty_npurge)},
-	{NAME("dirty_nmadvise"), CTL(stats_arenas_i_dirty_nmadvise)},
-	{NAME("dirty_purged"),	CTL(stats_arenas_i_dirty_purged)},
-	{NAME("muzzy_npurge"),	CTL(stats_arenas_i_muzzy_npurge)},
-	{NAME("muzzy_nmadvise"), CTL(stats_arenas_i_muzzy_nmadvise)},
-	{NAME("muzzy_purged"),	CTL(stats_arenas_i_muzzy_purged)},
-	{NAME("base"),		CTL(stats_arenas_i_base)},
-	{NAME("internal"),	CTL(stats_arenas_i_internal)},
-	{NAME("metadata_edata"),	CTL(stats_arenas_i_metadata_edata)},
-	{NAME("metadata_rtree"),	CTL(stats_arenas_i_metadata_rtree)},
-	{NAME("metadata_thp"),	CTL(stats_arenas_i_metadata_thp)},
-	{NAME("tcache_bytes"),	CTL(stats_arenas_i_tcache_bytes)},
-	{NAME("tcache_stashed_bytes"),
-	    CTL(stats_arenas_i_tcache_stashed_bytes)},
-	{NAME("resident"),	CTL(stats_arenas_i_resident)},
-	{NAME("abandoned_vm"),	CTL(stats_arenas_i_abandoned_vm)},
-	{NAME("hpa_sec_bytes"),	CTL(stats_arenas_i_hpa_sec_bytes)},
-	{NAME("small"),		CHILD(named, stats_arenas_i_small)},
-	{NAME("large"),		CHILD(named, stats_arenas_i_large)},
-	{NAME("bins"),		CHILD(indexed, stats_arenas_i_bins)},
-	{NAME("lextents"),	CHILD(indexed, stats_arenas_i_lextents)},
-	{NAME("extents"),	CHILD(indexed, stats_arenas_i_extents)},
-	{NAME("mutexes"),	CHILD(named, stats_arenas_i_mutexes)},
-	{NAME("hpa_shard"),	CHILD(named, stats_arenas_i_hpa_shard)}
-};
+    {NAME("nthreads"), CTL(stats_arenas_i_nthreads)},
+    {NAME("uptime"), CTL(stats_arenas_i_uptime)},
+    {NAME("dss"), CTL(stats_arenas_i_dss)},
+    {NAME("dirty_decay_ms"), CTL(stats_arenas_i_dirty_decay_ms)},
+    {NAME("muzzy_decay_ms"), CTL(stats_arenas_i_muzzy_decay_ms)},
+    {NAME("pactive"), CTL(stats_arenas_i_pactive)},
+    {NAME("pdirty"), CTL(stats_arenas_i_pdirty)},
+    {NAME("pmuzzy"), CTL(stats_arenas_i_pmuzzy)},
+    {NAME("mapped"), CTL(stats_arenas_i_mapped)},
+    {NAME("retained"), CTL(stats_arenas_i_retained)},
+    {NAME("extent_avail"), CTL(stats_arenas_i_extent_avail)},
+    {NAME("dirty_npurge"), CTL(stats_arenas_i_dirty_npurge)},
+    {NAME("dirty_nmadvise"), CTL(stats_arenas_i_dirty_nmadvise)},
+    {NAME("dirty_purged"), CTL(stats_arenas_i_dirty_purged)},
+    {NAME("muzzy_npurge"), CTL(stats_arenas_i_muzzy_npurge)},
+    {NAME("muzzy_nmadvise"), CTL(stats_arenas_i_muzzy_nmadvise)},
+    {NAME("muzzy_purged"), CTL(stats_arenas_i_muzzy_purged)},
+    {NAME("base"), CTL(stats_arenas_i_base)},
+    {NAME("internal"), CTL(stats_arenas_i_internal)},
+    {NAME("metadata_edata"), CTL(stats_arenas_i_metadata_edata)},
+    {NAME("metadata_rtree"), CTL(stats_arenas_i_metadata_rtree)},
+    {NAME("metadata_thp"), CTL(stats_arenas_i_metadata_thp)},
+    {NAME("tcache_bytes"), CTL(stats_arenas_i_tcache_bytes)},
+    {NAME("tcache_stashed_bytes"), CTL(stats_arenas_i_tcache_stashed_bytes)},
+    {NAME("resident"), CTL(stats_arenas_i_resident)},
+    {NAME("abandoned_vm"), CTL(stats_arenas_i_abandoned_vm)},
+    {NAME("hpa_sec_bytes"), CTL(stats_arenas_i_hpa_sec_bytes)},
+    {NAME("small"), CHILD(named, stats_arenas_i_small)},
+    {NAME("large"), CHILD(named, stats_arenas_i_large)},
+    {NAME("bins"), CHILD(indexed, stats_arenas_i_bins)},
+    {NAME("lextents"), CHILD(indexed, stats_arenas_i_lextents)},
+    {NAME("extents"), CHILD(indexed, stats_arenas_i_extents)},
+    {NAME("mutexes"), CHILD(named, stats_arenas_i_mutexes)},
+    {NAME("hpa_shard"), CHILD(named, stats_arenas_i_hpa_shard)}};
 static const ctl_named_node_t super_stats_arenas_i_node[] = {
-	{NAME(""),		CHILD(named, stats_arenas_i)}
-};
+    {NAME(""), CHILD(named, stats_arenas_i)}};
 
-static const ctl_indexed_node_t stats_arenas_node[] = {
-	{INDEX(stats_arenas_i)}
-};
+static const ctl_indexed_node_t stats_arenas_node[] = {{INDEX(stats_arenas_i)}};
 
 static const ctl_named_node_t stats_background_thread_node[] = {
-	{NAME("num_threads"),	CTL(stats_background_thread_num_threads)},
-	{NAME("num_runs"),	CTL(stats_background_thread_num_runs)},
-	{NAME("run_interval"),	CTL(stats_background_thread_run_interval)}
-};
+    {NAME("num_threads"), CTL(stats_background_thread_num_threads)},
+    {NAME("num_runs"), CTL(stats_background_thread_num_runs)},
+    {NAME("run_interval"), CTL(stats_background_thread_run_interval)}};
 
 #define OP(mtx) MUTEX_PROF_DATA_NODE(mutexes_##mtx)
 MUTEX_PROF_GLOBAL_MUTEXES
@@ -945,95 +851,81 @@ MUTEX_PROF_GLOBAL_MUTEXES
 
 static const ctl_named_node_t stats_mutexes_node[] = {
 #define OP(mtx) {NAME(#mtx), CHILD(named, stats_mutexes_##mtx)},
-MUTEX_PROF_GLOBAL_MUTEXES
+    MUTEX_PROF_GLOBAL_MUTEXES
 #undef OP
-	{NAME("reset"),		CTL(stats_mutexes_reset)}
-};
+    {NAME("reset"), CTL(stats_mutexes_reset)}};
 #undef MUTEX_PROF_DATA_NODE
 
 static const ctl_named_node_t stats_node[] = {
-	{NAME("allocated"),	CTL(stats_allocated)},
-	{NAME("active"),	CTL(stats_active)},
-	{NAME("metadata"),	CTL(stats_metadata)},
-	{NAME("metadata_edata"),	CTL(stats_metadata_edata)},
-	{NAME("metadata_rtree"),	CTL(stats_metadata_rtree)},
-	{NAME("metadata_thp"),	CTL(stats_metadata_thp)},
-	{NAME("resident"),	CTL(stats_resident)},
-	{NAME("mapped"),	CTL(stats_mapped)},
-	{NAME("retained"),	CTL(stats_retained)},
-	{NAME("background_thread"),
-	 CHILD(named, stats_background_thread)},
-	{NAME("mutexes"),	CHILD(named, stats_mutexes)},
-	{NAME("arenas"),	CHILD(indexed, stats_arenas)},
-	{NAME("zero_reallocs"),	CTL(stats_zero_reallocs)},
+    {NAME("allocated"), CTL(stats_allocated)},
+    {NAME("active"), CTL(stats_active)},
+    {NAME("metadata"), CTL(stats_metadata)},
+    {NAME("metadata_edata"), CTL(stats_metadata_edata)},
+    {NAME("metadata_rtree"), CTL(stats_metadata_rtree)},
+    {NAME("metadata_thp"), CTL(stats_metadata_thp)},
+    {NAME("resident"), CTL(stats_resident)},
+    {NAME("mapped"), CTL(stats_mapped)},
+    {NAME("retained"), CTL(stats_retained)},
+    {NAME("background_thread"), CHILD(named, stats_background_thread)},
+    {NAME("mutexes"), CHILD(named, stats_mutexes)},
+    {NAME("arenas"), CHILD(indexed, stats_arenas)},
+    {NAME("zero_reallocs"), CTL(stats_zero_reallocs)},
 };
 
 static const ctl_named_node_t experimental_hooks_node[] = {
-	{NAME("install"),	CTL(experimental_hooks_install)},
-	{NAME("remove"),	CTL(experimental_hooks_remove)},
-	{NAME("prof_backtrace"),	CTL(experimental_hooks_prof_backtrace)},
-	{NAME("prof_dump"),	CTL(experimental_hooks_prof_dump)},
-	{NAME("prof_sample"),	CTL(experimental_hooks_prof_sample)},
-	{NAME("prof_sample_free"),	CTL(experimental_hooks_prof_sample_free)},
-	{NAME("prof_threshold"),	CTL(experimental_hooks_prof_threshold)},
-	{NAME("safety_check_abort"),	CTL(experimental_hooks_safety_check_abort)},
-	{NAME("thread_event"),	CTL(experimental_hooks_thread_event)},
+    {NAME("install"), CTL(experimental_hooks_install)},
+    {NAME("remove"), CTL(experimental_hooks_remove)},
+    {NAME("prof_backtrace"), CTL(experimental_hooks_prof_backtrace)},
+    {NAME("prof_dump"), CTL(experimental_hooks_prof_dump)},
+    {NAME("prof_sample"), CTL(experimental_hooks_prof_sample)},
+    {NAME("prof_sample_free"), CTL(experimental_hooks_prof_sample_free)},
+    {NAME("prof_threshold"), CTL(experimental_hooks_prof_threshold)},
+    {NAME("safety_check_abort"), CTL(experimental_hooks_safety_check_abort)},
+    {NAME("thread_event"), CTL(experimental_hooks_thread_event)},
 };
 
 static const ctl_named_node_t experimental_thread_node[] = {
-	{NAME("activity_callback"),
-		CTL(experimental_thread_activity_callback)}
-};
+    {NAME("activity_callback"), CTL(experimental_thread_activity_callback)}};
 
 static const ctl_named_node_t experimental_utilization_node[] = {
-	{NAME("query"),		CTL(experimental_utilization_query)},
-	{NAME("batch_query"),	CTL(experimental_utilization_batch_query)}
-};
+    {NAME("query"), CTL(experimental_utilization_query)},
+    {NAME("batch_query"), CTL(experimental_utilization_batch_query)}};
 
 static const ctl_named_node_t experimental_arenas_i_node[] = {
-	{NAME("pactivep"),	CTL(experimental_arenas_i_pactivep)}
-};
+    {NAME("pactivep"), CTL(experimental_arenas_i_pactivep)}};
 static const ctl_named_node_t super_experimental_arenas_i_node[] = {
-	{NAME(""),		CHILD(named, experimental_arenas_i)}
-};
+    {NAME(""), CHILD(named, experimental_arenas_i)}};
 
 static const ctl_indexed_node_t experimental_arenas_node[] = {
-	{INDEX(experimental_arenas_i)}
-};
+    {INDEX(experimental_arenas_i)}};
 
 static const ctl_named_node_t experimental_prof_recent_node[] = {
-	{NAME("alloc_max"),	CTL(experimental_prof_recent_alloc_max)},
-	{NAME("alloc_dump"),	CTL(experimental_prof_recent_alloc_dump)},
+    {NAME("alloc_max"), CTL(experimental_prof_recent_alloc_max)},
+    {NAME("alloc_dump"), CTL(experimental_prof_recent_alloc_dump)},
 };
 
 static const ctl_named_node_t experimental_node[] = {
-	{NAME("hooks"),		CHILD(named, experimental_hooks)},
-	{NAME("utilization"),	CHILD(named, experimental_utilization)},
-	{NAME("arenas"),	CHILD(indexed, experimental_arenas)},
-	{NAME("arenas_create_ext"),	CTL(experimental_arenas_create_ext)},
-	{NAME("prof_recent"),	CHILD(named, experimental_prof_recent)},
-	{NAME("batch_alloc"),	CTL(experimental_batch_alloc)},
-	{NAME("thread"),	CHILD(named, experimental_thread)}
-};
+    {NAME("hooks"), CHILD(named, experimental_hooks)},
+    {NAME("utilization"), CHILD(named, experimental_utilization)},
+    {NAME("arenas"), CHILD(indexed, experimental_arenas)},
+    {NAME("arenas_create_ext"), CTL(experimental_arenas_create_ext)},
+    {NAME("prof_recent"), CHILD(named, experimental_prof_recent)},
+    {NAME("batch_alloc"), CTL(experimental_batch_alloc)},
+    {NAME("thread"), CHILD(named, experimental_thread)}};
 
-static const ctl_named_node_t	root_node[] = {
-	{NAME("version"),	CTL(version)},
-	{NAME("epoch"),		CTL(epoch)},
-	{NAME("background_thread"),	CTL(background_thread)},
-	{NAME("max_background_threads"),	CTL(max_background_threads)},
-	{NAME("thread"),	CHILD(named, thread)},
-	{NAME("config"),	CHILD(named, config)},
-	{NAME("opt"),		CHILD(named, opt)},
-	{NAME("tcache"),	CHILD(named, tcache)},
-	{NAME("arena"),		CHILD(indexed, arena)},
-	{NAME("arenas"),	CHILD(named, arenas)},
-	{NAME("prof"),		CHILD(named, prof)},
-	{NAME("stats"),		CHILD(named, stats)},
-	{NAME("experimental"),	CHILD(named, experimental)}
-};
+static const ctl_named_node_t root_node[] = {{NAME("version"), CTL(version)},
+    {NAME("epoch"), CTL(epoch)},
+    {NAME("background_thread"), CTL(background_thread)},
+    {NAME("max_background_threads"), CTL(max_background_threads)},
+    {NAME("thread"), CHILD(named, thread)},
+    {NAME("config"), CHILD(named, config)}, {NAME("opt"), CHILD(named, opt)},
+    {NAME("tcache"), CHILD(named, tcache)},
+    {NAME("arena"), CHILD(indexed, arena)},
+    {NAME("arenas"), CHILD(named, arenas)}, {NAME("prof"), CHILD(named, prof)},
+    {NAME("stats"), CHILD(named, stats)},
+    {NAME("experimental"), CHILD(named, experimental)}};
 static const ctl_named_node_t super_root_node[] = {
-	{NAME(""),		CHILD(named, root)}
-};
+    {NAME(""), CHILD(named, root)}};
 
 #undef NAME
 #undef CHILD
@@ -1048,8 +940,7 @@ static const ctl_named_node_t super_root_node[] = {
  */
 static void
 ctl_accum_locked_u64(locked_u64_t *dst, locked_u64_t *src) {
-	locked_inc_u64_unsynchronized(dst,
-	    locked_read_u64_unsynchronized(src));
+	locked_inc_u64_unsynchronized(dst, locked_read_u64_unsynchronized(src));
 }
 
 static void
@@ -1089,8 +980,8 @@ arenas_i2a_impl(size_t i, bool compat, bool validate) {
 			 * more than one past the range of indices that have
 			 * initialized ctl data.
 			 */
-			assert(i < ctl_arenas->narenas || (!validate && i ==
-			    ctl_arenas->narenas));
+			assert(i < ctl_arenas->narenas
+			    || (!validate && i == ctl_arenas->narenas));
 			a = (unsigned)i + 2;
 		}
 		break;
@@ -1114,12 +1005,12 @@ arenas_i_impl(tsd_t *tsd, size_t i, bool compat, bool init) {
 	if (init && ret == NULL) {
 		if (config_stats) {
 			struct container_s {
-				ctl_arena_t		ctl_arena;
-				ctl_arena_stats_t	astats;
+				ctl_arena_t       ctl_arena;
+				ctl_arena_stats_t astats;
 			};
-			struct container_s *cont =
-			    (struct container_s *)base_alloc(tsd_tsdn(tsd),
-			    b0get(), sizeof(struct container_s), QUANTUM);
+			struct container_s *cont = (struct container_s *)
+			    base_alloc(tsd_tsdn(tsd), b0get(),
+			        sizeof(struct container_s), QUANTUM);
 			if (cont == NULL) {
 				return NULL;
 			}
@@ -1177,8 +1068,8 @@ ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_t *ctl_arena, arena_t *arena) {
 		for (i = 0; i < SC_NBINS; i++) {
 			bin_stats_t *bstats =
 			    &ctl_arena->astats->bstats[i].stats_data;
-			ctl_arena->astats->allocated_small += bstats->curregs *
-			    sz_index2size(i);
+			ctl_arena->astats->allocated_small += bstats->curregs
+			    * sz_index2size(i);
 			ctl_arena->astats->nmalloc_small += bstats->nmalloc;
 			ctl_arena->astats->ndalloc_small += bstats->ndalloc;
 			ctl_arena->astats->nrequests_small += bstats->nrequests;
@@ -1194,8 +1085,8 @@ ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_t *ctl_arena, arena_t *arena) {
 }
 
 static void
-ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
-    bool destroyed) {
+ctl_arena_stats_sdmerge(
+    ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena, bool destroyed) {
 	unsigned i;
 
 	if (!destroyed) {
@@ -1216,52 +1107,59 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
 
 		if (!destroyed) {
 			sdstats->astats.mapped += astats->astats.mapped;
-			sdstats->astats.pa_shard_stats.pac_stats.retained
-			    += astats->astats.pa_shard_stats.pac_stats.retained;
-			sdstats->astats.pa_shard_stats.edata_avail
-			    += astats->astats.pa_shard_stats.edata_avail;
+			sdstats->astats.pa_shard_stats.pac_stats.retained +=
+			    astats->astats.pa_shard_stats.pac_stats.retained;
+			sdstats->astats.pa_shard_stats.edata_avail +=
+			    astats->astats.pa_shard_stats.edata_avail;
 		}
 
-		ctl_accum_locked_u64(
-		    &sdstats->astats.pa_shard_stats.pac_stats.decay_dirty.npurge,
-		    &astats->astats.pa_shard_stats.pac_stats.decay_dirty.npurge);
-		ctl_accum_locked_u64(
-		    &sdstats->astats.pa_shard_stats.pac_stats.decay_dirty.nmadvise,
-		    &astats->astats.pa_shard_stats.pac_stats.decay_dirty.nmadvise);
-		ctl_accum_locked_u64(
-		    &sdstats->astats.pa_shard_stats.pac_stats.decay_dirty.purged,
-		    &astats->astats.pa_shard_stats.pac_stats.decay_dirty.purged);
+		ctl_accum_locked_u64(&sdstats->astats.pa_shard_stats.pac_stats
+		                          .decay_dirty.npurge,
+		    &astats->astats.pa_shard_stats.pac_stats.decay_dirty
+		         .npurge);
+		ctl_accum_locked_u64(&sdstats->astats.pa_shard_stats.pac_stats
+		                          .decay_dirty.nmadvise,
+		    &astats->astats.pa_shard_stats.pac_stats.decay_dirty
+		         .nmadvise);
+		ctl_accum_locked_u64(&sdstats->astats.pa_shard_stats.pac_stats
+		                          .decay_dirty.purged,
+		    &astats->astats.pa_shard_stats.pac_stats.decay_dirty
+		         .purged);
 
-		ctl_accum_locked_u64(
-		    &sdstats->astats.pa_shard_stats.pac_stats.decay_muzzy.npurge,
-		    &astats->astats.pa_shard_stats.pac_stats.decay_muzzy.npurge);
-		ctl_accum_locked_u64(
-		    &sdstats->astats.pa_shard_stats.pac_stats.decay_muzzy.nmadvise,
-		    &astats->astats.pa_shard_stats.pac_stats.decay_muzzy.nmadvise);
-		ctl_accum_locked_u64(
-		    &sdstats->astats.pa_shard_stats.pac_stats.decay_muzzy.purged,
-		    &astats->astats.pa_shard_stats.pac_stats.decay_muzzy.purged);
+		ctl_accum_locked_u64(&sdstats->astats.pa_shard_stats.pac_stats
+		                          .decay_muzzy.npurge,
+		    &astats->astats.pa_shard_stats.pac_stats.decay_muzzy
+		         .npurge);
+		ctl_accum_locked_u64(&sdstats->astats.pa_shard_stats.pac_stats
+		                          .decay_muzzy.nmadvise,
+		    &astats->astats.pa_shard_stats.pac_stats.decay_muzzy
+		         .nmadvise);
+		ctl_accum_locked_u64(&sdstats->astats.pa_shard_stats.pac_stats
+		                          .decay_muzzy.purged,
+		    &astats->astats.pa_shard_stats.pac_stats.decay_muzzy
+		         .purged);
 
-#define OP(mtx) malloc_mutex_prof_merge(				\
-		    &(sdstats->astats.mutex_prof_data[			\
-		        arena_prof_mutex_##mtx]),			\
-		    &(astats->astats.mutex_prof_data[			\
-		        arena_prof_mutex_##mtx]));
-MUTEX_PROF_ARENA_MUTEXES
+#define OP(mtx)                                                                \
+	malloc_mutex_prof_merge(                                               \
+	    &(sdstats->astats.mutex_prof_data[arena_prof_mutex_##mtx]),        \
+	    &(astats->astats.mutex_prof_data[arena_prof_mutex_##mtx]));
+		MUTEX_PROF_ARENA_MUTEXES
 #undef OP
 		if (!destroyed) {
 			sdstats->astats.base += astats->astats.base;
-			sdstats->astats.metadata_edata += astats->astats
-			    .metadata_edata;
-			sdstats->astats.metadata_rtree += astats->astats
-			    .metadata_rtree;
+			sdstats->astats.metadata_edata +=
+			    astats->astats.metadata_edata;
+			sdstats->astats.metadata_rtree +=
+			    astats->astats.metadata_rtree;
 			sdstats->astats.resident += astats->astats.resident;
-			sdstats->astats.metadata_thp += astats->astats.metadata_thp;
+			sdstats->astats.metadata_thp +=
+			    astats->astats.metadata_thp;
 			ctl_accum_atomic_zu(&sdstats->astats.internal,
 			    &astats->astats.internal);
 		} else {
 			assert(atomic_load_zu(
-			    &astats->astats.internal, ATOMIC_RELAXED) == 0);
+			           &astats->astats.internal, ATOMIC_RELAXED)
+			    == 0);
 		}
 
 		if (!destroyed) {
@@ -1283,8 +1181,8 @@ MUTEX_PROF_ARENA_MUTEXES
 		}
 		sdstats->astats.nmalloc_large += astats->astats.nmalloc_large;
 		sdstats->astats.ndalloc_large += astats->astats.ndalloc_large;
-		sdstats->astats.nrequests_large
-		    += astats->astats.nrequests_large;
+		sdstats->astats.nrequests_large +=
+		    astats->astats.nrequests_large;
 		sdstats->astats.nflushes_large += astats->astats.nflushes_large;
 		ctl_accum_atomic_zu(
 		    &sdstats->astats.pa_shard_stats.pac_stats.abandoned_vm,
@@ -1322,14 +1220,12 @@ MUTEX_PROF_ARENA_MUTEXES
 				assert(bstats->nonfull_slabs == 0);
 			}
 
-			merged->batch_pops
-			    += bstats->batch_pops;
-			merged->batch_failed_pushes
-			    += bstats->batch_failed_pushes;
-			merged->batch_pushes
-			    += bstats->batch_pushes;
-			merged->batch_pushed_elems
-			    += bstats->batch_pushed_elems;
+			merged->batch_pops += bstats->batch_pops;
+			merged->batch_failed_pushes +=
+			    bstats->batch_failed_pushes;
+			merged->batch_pushes += bstats->batch_pushes;
+			merged->batch_pushed_elems +=
+			    bstats->batch_pushed_elems;
 
 			malloc_mutex_prof_merge(&sdstats->bstats[i].mutex_data,
 			    &astats->bstats[i].mutex_data);
@@ -1355,14 +1251,14 @@ MUTEX_PROF_ARENA_MUTEXES
 		for (i = 0; i < SC_NPSIZES; i++) {
 			sdstats->estats[i].ndirty += astats->estats[i].ndirty;
 			sdstats->estats[i].nmuzzy += astats->estats[i].nmuzzy;
-			sdstats->estats[i].nretained
-			    += astats->estats[i].nretained;
-			sdstats->estats[i].dirty_bytes
-			    += astats->estats[i].dirty_bytes;
-			sdstats->estats[i].muzzy_bytes
-			    += astats->estats[i].muzzy_bytes;
-			sdstats->estats[i].retained_bytes
-			    += astats->estats[i].retained_bytes;
+			sdstats->estats[i].nretained +=
+			    astats->estats[i].nretained;
+			sdstats->estats[i].dirty_bytes +=
+			    astats->estats[i].dirty_bytes;
+			sdstats->estats[i].muzzy_bytes +=
+			    astats->estats[i].muzzy_bytes;
+			sdstats->estats[i].retained_bytes +=
+			    astats->estats[i].retained_bytes;
 		}
 
 		/* Merge HPA stats. */
@@ -1384,11 +1280,11 @@ ctl_arena_refresh(tsdn_t *tsdn, arena_t *arena, ctl_arena_t *ctl_sdarena,
 
 static unsigned
 ctl_arena_init(tsd_t *tsd, const arena_config_t *config) {
-	unsigned arena_ind;
+	unsigned     arena_ind;
 	ctl_arena_t *ctl_arena;
 
-	if ((ctl_arena = ql_last(&ctl_arenas->destroyed, destroyed_link)) !=
-	    NULL) {
+	if ((ctl_arena = ql_last(&ctl_arenas->destroyed, destroyed_link))
+	    != NULL) {
 		ql_remove(&ctl_arenas->destroyed, ctl_arena, destroyed_link);
 		arena_ind = ctl_arena->arena_ind;
 	} else {
@@ -1415,8 +1311,8 @@ ctl_arena_init(tsd_t *tsd, const arena_config_t *config) {
 static void
 ctl_background_thread_stats_read(tsdn_t *tsdn) {
 	background_thread_stats_t *stats = &ctl_stats->background_thread;
-	if (!have_background_thread ||
-	    background_thread_stats_read(tsdn, stats)) {
+	if (!have_background_thread
+	    || background_thread_stats_read(tsdn, stats)) {
 		memset(stats, 0, sizeof(background_thread_stats_t));
 		nstime_init_zero(&stats->run_interval);
 	}
@@ -1452,39 +1348,39 @@ ctl_refresh(tsdn_t *tsdn) {
 
 	for (unsigned i = 0; i < narenas; i++) {
 		ctl_arena_t *ctl_arena = arenas_i(i);
-		bool initialized = (tarenas[i] != NULL);
+		bool         initialized = (tarenas[i] != NULL);
 
 		ctl_arena->initialized = initialized;
 		if (initialized) {
-			ctl_arena_refresh(tsdn, tarenas[i], ctl_sarena, i,
-			    false);
+			ctl_arena_refresh(
+			    tsdn, tarenas[i], ctl_sarena, i, false);
 		}
 	}
 
 	if (config_stats) {
-		ctl_stats->allocated = ctl_sarena->astats->allocated_small +
-		    ctl_sarena->astats->astats.allocated_large;
+		ctl_stats->allocated = ctl_sarena->astats->allocated_small
+		    + ctl_sarena->astats->astats.allocated_large;
 		ctl_stats->active = (ctl_sarena->pactive << LG_PAGE);
-		ctl_stats->metadata = ctl_sarena->astats->astats.base +
-		    atomic_load_zu(&ctl_sarena->astats->astats.internal,
-			ATOMIC_RELAXED);
-		ctl_stats->metadata_edata = ctl_sarena->astats->astats
-		    .metadata_edata;
-		ctl_stats->metadata_rtree = ctl_sarena->astats->astats
-		    .metadata_rtree;
+		ctl_stats->metadata = ctl_sarena->astats->astats.base
+		    + atomic_load_zu(
+		        &ctl_sarena->astats->astats.internal, ATOMIC_RELAXED);
+		ctl_stats->metadata_edata =
+		    ctl_sarena->astats->astats.metadata_edata;
+		ctl_stats->metadata_rtree =
+		    ctl_sarena->astats->astats.metadata_rtree;
 		ctl_stats->resident = ctl_sarena->astats->astats.resident;
 		ctl_stats->metadata_thp =
 		    ctl_sarena->astats->astats.metadata_thp;
 		ctl_stats->mapped = ctl_sarena->astats->astats.mapped;
-		ctl_stats->retained = ctl_sarena->astats->astats
-		    .pa_shard_stats.pac_stats.retained;
+		ctl_stats->retained = ctl_sarena->astats->astats.pa_shard_stats
+		                          .pac_stats.retained;
 
 		ctl_background_thread_stats_read(tsdn);
 
-#define READ_GLOBAL_MUTEX_PROF_DATA(i, mtx)				\
-    malloc_mutex_lock(tsdn, &mtx);					\
-    malloc_mutex_prof_read(tsdn, &ctl_stats->mutex_prof_data[i], &mtx);	\
-    malloc_mutex_unlock(tsdn, &mtx);
+#define READ_GLOBAL_MUTEX_PROF_DATA(i, mtx)                                    \
+	malloc_mutex_lock(tsdn, &mtx);                                         \
+	malloc_mutex_prof_read(tsdn, &ctl_stats->mutex_prof_data[i], &mtx);    \
+	malloc_mutex_unlock(tsdn, &mtx);
 
 		if (config_prof && opt_prof) {
 			READ_GLOBAL_MUTEX_PROF_DATA(
@@ -1507,9 +1403,9 @@ ctl_refresh(tsdn_t *tsdn) {
 			    global_prof_mutex_background_thread,
 			    background_thread_lock);
 		} else {
-			memset(&ctl_stats->mutex_prof_data[
-			    global_prof_mutex_background_thread], 0,
-			    sizeof(mutex_prof_data_t));
+			memset(&ctl_stats->mutex_prof_data
+			            [global_prof_mutex_background_thread],
+			    0, sizeof(mutex_prof_data_t));
 		}
 		/* We own ctl mutex already. */
 		malloc_mutex_prof_read(tsdn,
@@ -1522,21 +1418,21 @@ ctl_refresh(tsdn_t *tsdn) {
 
 static bool
 ctl_init(tsd_t *tsd) {
-	bool ret;
+	bool    ret;
 	tsdn_t *tsdn = tsd_tsdn(tsd);
 
 	malloc_mutex_lock(tsdn, &ctl_mtx);
 	if (!ctl_initialized) {
 		ctl_arena_t *ctl_sarena, *ctl_darena;
-		unsigned i;
+		unsigned     i;
 
 		/*
 		 * Allocate demand-zeroed space for pointers to the full
 		 * range of supported arena indices.
 		 */
 		if (ctl_arenas == NULL) {
-			ctl_arenas = (ctl_arenas_t *)base_alloc(tsdn,
-			    b0get(), sizeof(ctl_arenas_t), QUANTUM);
+			ctl_arenas = (ctl_arenas_t *)base_alloc(
+			    tsdn, b0get(), sizeof(ctl_arenas_t), QUANTUM);
 			if (ctl_arenas == NULL) {
 				ret = true;
 				goto label_return;
@@ -1544,8 +1440,8 @@ ctl_init(tsd_t *tsd) {
 		}
 
 		if (config_stats && ctl_stats == NULL) {
-			ctl_stats = (ctl_stats_t *)base_alloc(tsdn, b0get(),
-			    sizeof(ctl_stats_t), QUANTUM);
+			ctl_stats = (ctl_stats_t *)base_alloc(
+			    tsdn, b0get(), sizeof(ctl_stats_t), QUANTUM);
 			if (ctl_stats == NULL) {
 				ret = true;
 				goto label_return;
@@ -1557,15 +1453,17 @@ ctl_init(tsd_t *tsd) {
 		 * here rather than doing it lazily elsewhere, in order
 		 * to limit when OOM-caused errors can occur.
 		 */
-		if ((ctl_sarena = arenas_i_impl(tsd, MALLCTL_ARENAS_ALL, false,
-		    true)) == NULL) {
+		if ((ctl_sarena = arenas_i_impl(
+		         tsd, MALLCTL_ARENAS_ALL, false, true))
+		    == NULL) {
 			ret = true;
 			goto label_return;
 		}
 		ctl_sarena->initialized = true;
 
-		if ((ctl_darena = arenas_i_impl(tsd, MALLCTL_ARENAS_DESTROYED,
-		    false, true)) == NULL) {
+		if ((ctl_darena = arenas_i_impl(
+		         tsd, MALLCTL_ARENAS_DESTROYED, false, true))
+		    == NULL) {
 			ret = true;
 			goto label_return;
 		}
@@ -1600,9 +1498,9 @@ static int
 ctl_lookup(tsdn_t *tsdn, const ctl_named_node_t *starting_node,
     const char *name, const ctl_named_node_t **ending_nodep, size_t *mibp,
     size_t *depthp) {
-	int ret;
-	const char *elm, *tdot, *dot;
-	size_t elen, i, j;
+	int                     ret;
+	const char             *elm, *tdot, *dot;
+	size_t                  elen, i, j;
 	const ctl_named_node_t *node;
 
 	elm = name;
@@ -1624,8 +1522,8 @@ ctl_lookup(tsdn_t *tsdn, const ctl_named_node_t *starting_node,
 			for (j = 0; j < node->nchildren; j++) {
 				const ctl_named_node_t *child =
 				    ctl_named_children(node, j);
-				if (strlen(child->name) == elen &&
-				    strncmp(elm, child->name, elen) == 0) {
+				if (strlen(child->name) == elen
+				    && strncmp(elm, child->name, elen) == 0) {
 					node = child;
 					mibp[i] = j;
 					break;
@@ -1636,7 +1534,7 @@ ctl_lookup(tsdn_t *tsdn, const ctl_named_node_t *starting_node,
 				goto label_return;
 			}
 		} else {
-			uintmax_t index;
+			uintmax_t                 index;
 			const ctl_indexed_node_t *inode;
 
 			/* Children are indexed. */
@@ -1674,8 +1572,8 @@ ctl_lookup(tsdn_t *tsdn, const ctl_named_node_t *starting_node,
 
 		/* Update elm. */
 		elm = &dot[1];
-		dot = ((tdot = strchr(elm, '.')) != NULL) ? tdot :
-		    strchr(elm, '\0');
+		dot = ((tdot = strchr(elm, '.')) != NULL) ? tdot
+		                                          : strchr(elm, '\0');
 		elen = (size_t)((uintptr_t)dot - (uintptr_t)elm);
 	}
 	if (ending_nodep != NULL) {
@@ -1690,9 +1588,9 @@ label_return:
 int
 ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp,
     void *newp, size_t newlen) {
-	int ret;
-	size_t depth;
-	size_t mib[CTL_MAX_DEPTH];
+	int                     ret;
+	size_t                  depth;
+	size_t                  mib[CTL_MAX_DEPTH];
 	const ctl_named_node_t *node;
 
 	if (!ctl_initialized && ctl_init(tsd)) {
@@ -1701,8 +1599,8 @@ ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp,
 	}
 
 	depth = CTL_MAX_DEPTH;
-	ret = ctl_lookup(tsd_tsdn(tsd), super_root_node, name, &node, mib,
-	    &depth);
+	ret = ctl_lookup(
+	    tsd_tsdn(tsd), super_root_node, name, &node, mib, &depth);
 	if (ret != 0) {
 		goto label_return;
 	}
@@ -1715,7 +1613,7 @@ ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp,
 	}
 
 label_return:
-	return(ret);
+	return (ret);
 }
 
 int
@@ -1727,10 +1625,10 @@ ctl_nametomib(tsd_t *tsd, const char *name, size_t *mibp, size_t *miblenp) {
 		goto label_return;
 	}
 
-	ret = ctl_lookup(tsd_tsdn(tsd), super_root_node, name, NULL, mibp,
-	    miblenp);
+	ret = ctl_lookup(
+	    tsd_tsdn(tsd), super_root_node, name, NULL, mibp, miblenp);
 label_return:
-	return(ret);
+	return (ret);
 }
 
 static int
@@ -1766,13 +1664,13 @@ ctl_lookupbymib(tsdn_t *tsdn, const ctl_named_node_t **ending_nodep,
 	ret = 0;
 
 label_return:
-	return(ret);
+	return (ret);
 }
 
 int
 ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+	int                     ret;
 	const ctl_named_node_t *node;
 
 	if (!ctl_initialized && ctl_init(tsd)) {
@@ -1794,13 +1692,13 @@ ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	}
 
 label_return:
-	return(ret);
+	return (ret);
 }
 
 int
-ctl_mibnametomib(tsd_t *tsd, size_t *mib, size_t miblen, const char *name,
-    size_t *miblenp) {
-	int ret;
+ctl_mibnametomib(
+    tsd_t *tsd, size_t *mib, size_t miblen, const char *name, size_t *miblenp) {
+	int                     ret;
 	const ctl_named_node_t *node;
 
 	if (!ctl_initialized && ctl_init(tsd)) {
@@ -1820,17 +1718,17 @@ ctl_mibnametomib(tsd_t *tsd, size_t *mib, size_t miblen, const char *name,
 	assert(miblenp != NULL);
 	assert(*miblenp >= miblen);
 	*miblenp -= miblen;
-	ret = ctl_lookup(tsd_tsdn(tsd), node, name, NULL, mib + miblen,
-	    miblenp);
+	ret = ctl_lookup(
+	    tsd_tsdn(tsd), node, name, NULL, mib + miblen, miblenp);
 	*miblenp += miblen;
 label_return:
-	return(ret);
+	return (ret);
 }
 
 int
 ctl_bymibname(tsd_t *tsd, size_t *mib, size_t miblen, const char *name,
     size_t *miblenp, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+	int                     ret;
 	const ctl_named_node_t *node;
 
 	if (!ctl_initialized && ctl_init(tsd)) {
@@ -1853,29 +1751,29 @@ ctl_bymibname(tsd_t *tsd, size_t *mib, size_t miblen, const char *name,
 	/*
 	 * The same node supplies the starting node and stores the ending node.
 	 */
-	ret = ctl_lookup(tsd_tsdn(tsd), node, name, &node, mib + miblen,
-	    miblenp);
+	ret = ctl_lookup(
+	    tsd_tsdn(tsd), node, name, &node, mib + miblen, miblenp);
 	*miblenp += miblen;
 	if (ret != 0) {
 		goto label_return;
 	}
 
 	if (node != NULL && node->ctl) {
-		ret = node->ctl(tsd, mib, *miblenp, oldp, oldlenp, newp,
-		    newlen);
+		ret = node->ctl(
+		    tsd, mib, *miblenp, oldp, oldlenp, newp, newlen);
 	} else {
 		/* The name refers to a partial path through the ctl tree. */
 		ret = ENOENT;
 	}
 
 label_return:
-	return(ret);
+	return (ret);
 }
 
 bool
 ctl_boot(void) {
 	if (malloc_mutex_init(&ctl_mtx, "ctl", WITNESS_RANK_CTL,
-	    malloc_mutex_rank_exclusive)) {
+	        malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 
@@ -1907,195 +1805,201 @@ ctl_mtx_assert_held(tsdn_t *tsdn) {
 /******************************************************************************/
 /* *_ctl() functions. */
 
-#define READONLY()	do {						\
-	if (newp != NULL || newlen != 0) {				\
-		ret = EPERM;						\
-		goto label_return;					\
-	}								\
-} while (0)
+#define READONLY()                                                             \
+	do {                                                                   \
+		if (newp != NULL || newlen != 0) {                             \
+			ret = EPERM;                                           \
+			goto label_return;                                     \
+		}                                                              \
+	} while (0)
 
-#define WRITEONLY()	do {						\
-	if (oldp != NULL || oldlenp != NULL) {				\
-		ret = EPERM;						\
-		goto label_return;					\
-	}								\
-} while (0)
+#define WRITEONLY()                                                            \
+	do {                                                                   \
+		if (oldp != NULL || oldlenp != NULL) {                         \
+			ret = EPERM;                                           \
+			goto label_return;                                     \
+		}                                                              \
+	} while (0)
 
 /* Can read or write, but not both. */
-#define READ_XOR_WRITE()	do {					\
-	if ((oldp != NULL && oldlenp != NULL) && (newp != NULL ||	\
-	    newlen != 0)) {						\
-		ret = EPERM;						\
-		goto label_return;					\
-	}								\
-} while (0)
+#define READ_XOR_WRITE()                                                       \
+	do {                                                                   \
+		if ((oldp != NULL && oldlenp != NULL)                          \
+		    && (newp != NULL || newlen != 0)) {                        \
+			ret = EPERM;                                           \
+			goto label_return;                                     \
+		}                                                              \
+	} while (0)
 
 /* Can neither read nor write. */
-#define NEITHER_READ_NOR_WRITE()	do {				\
-	if (oldp != NULL || oldlenp != NULL || newp != NULL ||		\
-	    newlen != 0) {						\
-		ret = EPERM;						\
-		goto label_return;					\
-	}								\
-} while (0)
+#define NEITHER_READ_NOR_WRITE()                                               \
+	do {                                                                   \
+		if (oldp != NULL || oldlenp != NULL || newp != NULL            \
+		    || newlen != 0) {                                          \
+			ret = EPERM;                                           \
+			goto label_return;                                     \
+		}                                                              \
+	} while (0)
 
 /* Verify that the space provided is enough. */
-#define VERIFY_READ(t)	do {						\
-	if (oldp == NULL || oldlenp == NULL || *oldlenp != sizeof(t)) {	\
-		if (oldlenp != NULL) {					\
-			*oldlenp = 0;					\
-		}							\
-		ret = EINVAL;						\
-		goto label_return;					\
-	}								\
-} while (0)
+#define VERIFY_READ(t)                                                         \
+	do {                                                                   \
+		if (oldp == NULL || oldlenp == NULL                            \
+		    || *oldlenp != sizeof(t)) {                                \
+			if (oldlenp != NULL) {                                 \
+				*oldlenp = 0;                                  \
+			}                                                      \
+			ret = EINVAL;                                          \
+			goto label_return;                                     \
+		}                                                              \
+	} while (0)
 
-#define READ(v, t)	do {						\
-	if (oldp != NULL && oldlenp != NULL) {				\
-		if (*oldlenp != sizeof(t)) {				\
-			size_t	copylen = (sizeof(t) <= *oldlenp)	\
-			    ? sizeof(t) : *oldlenp;			\
-			memcpy(oldp, (void *)&(v), copylen);		\
-			*oldlenp = copylen;				\
-			ret = EINVAL;					\
-			goto label_return;				\
-		}							\
-		*(t *)oldp = (v);					\
-	}								\
-} while (0)
+#define READ(v, t)                                                             \
+	do {                                                                   \
+		if (oldp != NULL && oldlenp != NULL) {                         \
+			if (*oldlenp != sizeof(t)) {                           \
+				size_t copylen = (sizeof(t) <= *oldlenp)       \
+				    ? sizeof(t)                                \
+				    : *oldlenp;                                \
+				memcpy(oldp, (void *)&(v), copylen);           \
+				*oldlenp = copylen;                            \
+				ret = EINVAL;                                  \
+				goto label_return;                             \
+			}                                                      \
+			*(t *)oldp = (v);                                      \
+		}                                                              \
+	} while (0)
 
-#define WRITE(v, t)	do {						\
-	if (newp != NULL) {						\
-		if (newlen != sizeof(t)) {				\
-			ret = EINVAL;					\
-			goto label_return;				\
-		}							\
-		(v) = *(t *)newp;					\
-	}								\
-} while (0)
+#define WRITE(v, t)                                                            \
+	do {                                                                   \
+		if (newp != NULL) {                                            \
+			if (newlen != sizeof(t)) {                             \
+				ret = EINVAL;                                  \
+				goto label_return;                             \
+			}                                                      \
+			(v) = *(t *)newp;                                      \
+		}                                                              \
+	} while (0)
 
-#define ASSURED_WRITE(v, t)	do {					\
-	if (newp == NULL || newlen != sizeof(t)) {			\
-		ret = EINVAL;						\
-		goto label_return;					\
-	}								\
-	(v) = *(t *)newp;						\
-} while (0)
+#define ASSURED_WRITE(v, t)                                                    \
+	do {                                                                   \
+		if (newp == NULL || newlen != sizeof(t)) {                     \
+			ret = EINVAL;                                          \
+			goto label_return;                                     \
+		}                                                              \
+		(v) = *(t *)newp;                                              \
+	} while (0)
 
-#define MIB_UNSIGNED(v, i) do {						\
-	if (mib[i] > UINT_MAX) {					\
-		ret = EFAULT;						\
-		goto label_return;					\
-	}								\
-	v = (unsigned)mib[i];						\
-} while (0)
+#define MIB_UNSIGNED(v, i)                                                     \
+	do {                                                                   \
+		if (mib[i] > UINT_MAX) {                                       \
+			ret = EFAULT;                                          \
+			goto label_return;                                     \
+		}                                                              \
+		v = (unsigned)mib[i];                                          \
+	} while (0)
 
 /*
  * There's a lot of code duplication in the following macros due to limitations
  * in how nested cpp macros are expanded.
  */
-#define CTL_RO_CGEN(c, n, v, t)						\
-static int								\
-n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,			\
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {		\
-	int ret;							\
-	t oldval;							\
-									\
-	if (!(c)) {							\
-		return ENOENT;						\
-	}								\
-	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);			\
-	READONLY();							\
-	oldval = (v);							\
-	READ(oldval, t);						\
-									\
-	ret = 0;							\
-label_return:								\
-	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);			\
-	return ret;							\
-}
+#define CTL_RO_CGEN(c, n, v, t)                                                \
+	static int n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,       \
+	    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {          \
+		int ret;                                                       \
+		t   oldval;                                                    \
+                                                                               \
+		if (!(c)) {                                                    \
+			return ENOENT;                                         \
+		}                                                              \
+		malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);                    \
+		READONLY();                                                    \
+		oldval = (v);                                                  \
+		READ(oldval, t);                                               \
+                                                                               \
+		ret = 0;                                                       \
+	label_return:                                                          \
+		malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);                  \
+		return ret;                                                    \
+	}
 
-#define CTL_RO_GEN(n, v, t)						\
-static int								\
-n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
-    size_t *oldlenp, void *newp, size_t newlen) {			\
-	int ret;							\
-	t oldval;							\
-									\
-	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);			\
-	READONLY();							\
-	oldval = (v);							\
-	READ(oldval, t);						\
-									\
-	ret = 0;							\
-label_return:								\
-	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);			\
-	return ret;							\
-}
+#define CTL_RO_GEN(n, v, t)                                                    \
+	static int n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,       \
+	    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {          \
+		int ret;                                                       \
+		t   oldval;                                                    \
+                                                                               \
+		malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);                    \
+		READONLY();                                                    \
+		oldval = (v);                                                  \
+		READ(oldval, t);                                               \
+                                                                               \
+		ret = 0;                                                       \
+	label_return:                                                          \
+		malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);                  \
+		return ret;                                                    \
+	}
 
 /*
  * ctl_mtx is not acquired, under the assumption that no pertinent data will
  * mutate during the call.
  */
-#define CTL_RO_NL_CGEN(c, n, v, t)					\
-static int								\
-n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,			\
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {		\
-	int ret;							\
-	t oldval;							\
-									\
-	if (!(c)) {							\
-		return ENOENT;						\
-	}								\
-	READONLY();							\
-	oldval = (v);							\
-	READ(oldval, t);						\
-									\
-	ret = 0;							\
-label_return:								\
-	return ret;							\
-}
+#define CTL_RO_NL_CGEN(c, n, v, t)                                             \
+	static int n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,       \
+	    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {          \
+		int ret;                                                       \
+		t   oldval;                                                    \
+                                                                               \
+		if (!(c)) {                                                    \
+			return ENOENT;                                         \
+		}                                                              \
+		READONLY();                                                    \
+		oldval = (v);                                                  \
+		READ(oldval, t);                                               \
+                                                                               \
+		ret = 0;                                                       \
+	label_return:                                                          \
+		return ret;                                                    \
+	}
 
-#define CTL_RO_NL_GEN(n, v, t)						\
-static int								\
-n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,			\
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {		\
-	int ret;							\
-	t oldval;							\
-									\
-	READONLY();							\
-	oldval = (v);							\
-	READ(oldval, t);						\
-									\
-	ret = 0;							\
-label_return:								\
-	return ret;							\
-}
+#define CTL_RO_NL_GEN(n, v, t)                                                 \
+	static int n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,       \
+	    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {          \
+		int ret;                                                       \
+		t   oldval;                                                    \
+                                                                               \
+		READONLY();                                                    \
+		oldval = (v);                                                  \
+		READ(oldval, t);                                               \
+                                                                               \
+		ret = 0;                                                       \
+	label_return:                                                          \
+		return ret;                                                    \
+	}
 
-#define CTL_RO_CONFIG_GEN(n, t)						\
-static int								\
-n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,			\
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {		\
-	int ret;							\
-	t oldval;							\
-									\
-	READONLY();							\
-	oldval = n;							\
-	READ(oldval, t);						\
-									\
-	ret = 0;							\
-label_return:								\
-	return ret;							\
-}
+#define CTL_RO_CONFIG_GEN(n, t)                                                \
+	static int n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,       \
+	    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {          \
+		int ret;                                                       \
+		t   oldval;                                                    \
+                                                                               \
+		READONLY();                                                    \
+		oldval = n;                                                    \
+		READ(oldval, t);                                               \
+                                                                               \
+		ret = 0;                                                       \
+	label_return:                                                          \
+		return ret;                                                    \
+	}
 
 /******************************************************************************/
 
 CTL_RO_NL_GEN(version, JEMALLOC_VERSION, const char *)
 
 static int
-epoch_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+epoch_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int             ret;
 	UNUSED uint64_t newval;
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
@@ -2112,10 +2016,9 @@ label_return:
 }
 
 static int
-background_thread_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen) {
-	int ret;
+background_thread_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int  ret;
 	bool oldval;
 
 	if (!have_background_thread) {
@@ -2164,10 +2067,9 @@ label_return:
 }
 
 static int
-max_background_threads_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen) {
-	int ret;
+max_background_threads_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int    ret;
 	size_t oldval;
 
 	if (!have_background_thread) {
@@ -2193,8 +2095,7 @@ max_background_threads_ctl(tsd_t *tsd, const size_t *mib,
 			ret = 0;
 			goto label_return;
 		}
-		if (newval > opt_max_background_threads ||
-		    newval == 0) {
+		if (newval > opt_max_background_threads || newval == 0) {
 			ret = EINVAL;
 			goto label_return;
 		}
@@ -2244,19 +2145,19 @@ CTL_RO_CONFIG_GEN(config_xmalloc, bool)
 CTL_RO_NL_GEN(opt_abort, opt_abort, bool)
 CTL_RO_NL_GEN(opt_abort_conf, opt_abort_conf, bool)
 CTL_RO_NL_GEN(opt_cache_oblivious, opt_cache_oblivious, bool)
-CTL_RO_NL_GEN(opt_debug_double_free_max_scan,
-    opt_debug_double_free_max_scan, unsigned)
+CTL_RO_NL_GEN(
+    opt_debug_double_free_max_scan, opt_debug_double_free_max_scan, unsigned)
 CTL_RO_NL_GEN(opt_trust_madvise, opt_trust_madvise, bool)
 CTL_RO_NL_GEN(opt_confirm_conf, opt_confirm_conf, bool)
 
 /* HPA options. */
 CTL_RO_NL_GEN(opt_hpa, opt_hpa, bool)
-CTL_RO_NL_GEN(opt_hpa_hugification_threshold,
-    opt_hpa_opts.hugification_threshold, size_t)
+CTL_RO_NL_GEN(
+    opt_hpa_hugification_threshold, opt_hpa_opts.hugification_threshold, size_t)
 CTL_RO_NL_GEN(opt_hpa_hugify_delay_ms, opt_hpa_opts.hugify_delay_ms, uint64_t)
 CTL_RO_NL_GEN(opt_hpa_hugify_sync, opt_hpa_opts.hugify_sync, bool)
-CTL_RO_NL_GEN(opt_hpa_min_purge_interval_ms, opt_hpa_opts.min_purge_interval_ms,
-    uint64_t)
+CTL_RO_NL_GEN(
+    opt_hpa_min_purge_interval_ms, opt_hpa_opts.min_purge_interval_ms, uint64_t)
 CTL_RO_NL_GEN(opt_experimental_hpa_max_purge_nhp,
     opt_hpa_opts.experimental_max_purge_nhp, ssize_t)
 
@@ -2271,19 +2172,19 @@ CTL_RO_NL_GEN(opt_hpa_slab_max_alloc, opt_hpa_opts.slab_max_alloc, size_t)
 CTL_RO_NL_GEN(opt_hpa_sec_nshards, opt_hpa_sec_opts.nshards, size_t)
 CTL_RO_NL_GEN(opt_hpa_sec_max_alloc, opt_hpa_sec_opts.max_alloc, size_t)
 CTL_RO_NL_GEN(opt_hpa_sec_max_bytes, opt_hpa_sec_opts.max_bytes, size_t)
-CTL_RO_NL_GEN(opt_hpa_sec_bytes_after_flush, opt_hpa_sec_opts.bytes_after_flush,
-    size_t)
-CTL_RO_NL_GEN(opt_hpa_sec_batch_fill_extra, opt_hpa_sec_opts.batch_fill_extra,
-    size_t)
+CTL_RO_NL_GEN(
+    opt_hpa_sec_bytes_after_flush, opt_hpa_sec_opts.bytes_after_flush, size_t)
+CTL_RO_NL_GEN(
+    opt_hpa_sec_batch_fill_extra, opt_hpa_sec_opts.batch_fill_extra, size_t)
 
 CTL_RO_NL_GEN(opt_huge_arena_pac_thp, opt_huge_arena_pac_thp, bool)
-CTL_RO_NL_GEN(opt_metadata_thp, metadata_thp_mode_names[opt_metadata_thp],
-    const char *)
+CTL_RO_NL_GEN(
+    opt_metadata_thp, metadata_thp_mode_names[opt_metadata_thp], const char *)
 CTL_RO_NL_GEN(opt_retain, opt_retain, bool)
 CTL_RO_NL_GEN(opt_dss, opt_dss, const char *)
 CTL_RO_NL_GEN(opt_narenas, opt_narenas, unsigned)
-CTL_RO_NL_GEN(opt_percpu_arena, percpu_arena_mode_names[opt_percpu_arena],
-    const char *)
+CTL_RO_NL_GEN(
+    opt_percpu_arena, percpu_arena_mode_names[opt_percpu_arena], const char *)
 CTL_RO_NL_GEN(opt_mutex_max_spin, opt_mutex_max_spin, int64_t)
 CTL_RO_NL_GEN(opt_oversize_threshold, opt_oversize_threshold, size_t)
 CTL_RO_NL_GEN(opt_background_thread, opt_background_thread, bool)
@@ -2302,65 +2203,66 @@ CTL_RO_NL_CGEN(config_enable_cxx, opt_experimental_infallible_new,
     opt_experimental_infallible_new, bool)
 CTL_RO_NL_GEN(opt_experimental_tcache_gc, opt_experimental_tcache_gc, bool)
 CTL_RO_NL_GEN(opt_max_batched_size, opt_bin_info_max_batched_size, size_t)
-CTL_RO_NL_GEN(opt_remote_free_max, opt_bin_info_remote_free_max,
-    size_t)
-CTL_RO_NL_GEN(opt_remote_free_max_batch, opt_bin_info_remote_free_max_batch,
-    size_t)
+CTL_RO_NL_GEN(opt_remote_free_max, opt_bin_info_remote_free_max, size_t)
+CTL_RO_NL_GEN(
+    opt_remote_free_max_batch, opt_bin_info_remote_free_max_batch, size_t)
 CTL_RO_NL_GEN(opt_tcache, opt_tcache, bool)
 CTL_RO_NL_GEN(opt_tcache_max, opt_tcache_max, size_t)
-CTL_RO_NL_GEN(opt_tcache_nslots_small_min, opt_tcache_nslots_small_min,
-    unsigned)
-CTL_RO_NL_GEN(opt_tcache_nslots_small_max, opt_tcache_nslots_small_max,
-    unsigned)
+CTL_RO_NL_GEN(
+    opt_tcache_nslots_small_min, opt_tcache_nslots_small_min, unsigned)
+CTL_RO_NL_GEN(
+    opt_tcache_nslots_small_max, opt_tcache_nslots_small_max, unsigned)
 CTL_RO_NL_GEN(opt_tcache_nslots_large, opt_tcache_nslots_large, unsigned)
 CTL_RO_NL_GEN(opt_lg_tcache_nslots_mul, opt_lg_tcache_nslots_mul, ssize_t)
 CTL_RO_NL_GEN(opt_tcache_gc_incr_bytes, opt_tcache_gc_incr_bytes, size_t)
 CTL_RO_NL_GEN(opt_tcache_gc_delay_bytes, opt_tcache_gc_delay_bytes, size_t)
-CTL_RO_NL_GEN(opt_lg_tcache_flush_small_div, opt_lg_tcache_flush_small_div,
-    unsigned)
-CTL_RO_NL_GEN(opt_lg_tcache_flush_large_div, opt_lg_tcache_flush_large_div,
-    unsigned)
+CTL_RO_NL_GEN(
+    opt_lg_tcache_flush_small_div, opt_lg_tcache_flush_small_div, unsigned)
+CTL_RO_NL_GEN(
+    opt_lg_tcache_flush_large_div, opt_lg_tcache_flush_large_div, unsigned)
 CTL_RO_NL_GEN(opt_thp, thp_mode_names[opt_thp], const char *)
-CTL_RO_NL_GEN(opt_lg_extent_max_active_fit, opt_lg_extent_max_active_fit,
-    size_t)
-CTL_RO_NL_GEN(opt_process_madvise_max_batch, opt_process_madvise_max_batch,
-    size_t)
+CTL_RO_NL_GEN(
+    opt_lg_extent_max_active_fit, opt_lg_extent_max_active_fit, size_t)
+CTL_RO_NL_GEN(
+    opt_process_madvise_max_batch, opt_process_madvise_max_batch, size_t)
 CTL_RO_NL_CGEN(config_prof, opt_prof, opt_prof, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_prefix, opt_prof_prefix, const char *)
 CTL_RO_NL_CGEN(config_prof, opt_prof_active, opt_prof_active, bool)
-CTL_RO_NL_CGEN(config_prof, opt_prof_thread_active_init,
-    opt_prof_thread_active_init, bool)
+CTL_RO_NL_CGEN(
+    config_prof, opt_prof_thread_active_init, opt_prof_thread_active_init, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_bt_max, opt_prof_bt_max, unsigned)
 CTL_RO_NL_CGEN(config_prof, opt_lg_prof_sample, opt_lg_prof_sample, size_t)
-CTL_RO_NL_CGEN(config_prof, opt_experimental_lg_prof_threshold, opt_experimental_lg_prof_threshold, size_t)
+CTL_RO_NL_CGEN(config_prof, opt_experimental_lg_prof_threshold,
+    opt_experimental_lg_prof_threshold, size_t)
 CTL_RO_NL_CGEN(config_prof, opt_prof_accum, opt_prof_accum, bool)
-CTL_RO_NL_CGEN(config_prof, opt_prof_pid_namespace, opt_prof_pid_namespace,
-    bool)
+CTL_RO_NL_CGEN(
+    config_prof, opt_prof_pid_namespace, opt_prof_pid_namespace, bool)
 CTL_RO_NL_CGEN(config_prof, opt_lg_prof_interval, opt_lg_prof_interval, ssize_t)
 CTL_RO_NL_CGEN(config_prof, opt_prof_gdump, opt_prof_gdump, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_final, opt_prof_final, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_leak, opt_prof_leak, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_leak_error, opt_prof_leak_error, bool)
-CTL_RO_NL_CGEN(config_prof, opt_prof_recent_alloc_max,
-    opt_prof_recent_alloc_max, ssize_t)
+CTL_RO_NL_CGEN(
+    config_prof, opt_prof_recent_alloc_max, opt_prof_recent_alloc_max, ssize_t)
 CTL_RO_NL_CGEN(config_prof, opt_prof_stats, opt_prof_stats, bool)
-CTL_RO_NL_CGEN(config_prof, opt_prof_sys_thread_name, opt_prof_sys_thread_name,
-    bool)
+CTL_RO_NL_CGEN(
+    config_prof, opt_prof_sys_thread_name, opt_prof_sys_thread_name, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_time_res,
     prof_time_res_mode_names[opt_prof_time_res], const char *)
-CTL_RO_NL_CGEN(config_uaf_detection, opt_lg_san_uaf_align,
-    opt_lg_san_uaf_align, ssize_t)
+CTL_RO_NL_CGEN(
+    config_uaf_detection, opt_lg_san_uaf_align, opt_lg_san_uaf_align, ssize_t)
 CTL_RO_NL_GEN(opt_zero_realloc,
     zero_realloc_mode_names[opt_zero_realloc_action], const char *)
-CTL_RO_NL_GEN(opt_disable_large_size_classes, opt_disable_large_size_classes, bool)
+CTL_RO_NL_GEN(
+    opt_disable_large_size_classes, opt_disable_large_size_classes, bool)
 
 /* malloc_conf options */
 CTL_RO_NL_CGEN(opt_malloc_conf_symlink, opt_malloc_conf_symlink,
     opt_malloc_conf_symlink, const char *)
 CTL_RO_NL_CGEN(opt_malloc_conf_env_var, opt_malloc_conf_env_var,
     opt_malloc_conf_env_var, const char *)
-CTL_RO_NL_CGEN(je_malloc_conf, opt_malloc_conf_global_var, je_malloc_conf,
-    const char *)
+CTL_RO_NL_CGEN(
+    je_malloc_conf, opt_malloc_conf_global_var, je_malloc_conf, const char *)
 CTL_RO_NL_CGEN(je_malloc_conf_2_conf_harder,
     opt_malloc_conf_global_var_2_conf_harder, je_malloc_conf_2_conf_harder,
     const char *)
@@ -2368,9 +2270,9 @@ CTL_RO_NL_CGEN(je_malloc_conf_2_conf_harder,
 /******************************************************************************/
 
 static int
-thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int      ret;
 	arena_t *oldarena;
 	unsigned newind, oldind;
 
@@ -2391,8 +2293,8 @@ thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 			goto label_return;
 		}
 
-		if (have_percpu_arena &&
-		    PERCPU_ARENA_ENABLED(opt_percpu_arena)) {
+		if (have_percpu_arena
+		    && PERCPU_ARENA_ENABLED(opt_percpu_arena)) {
 			if (newind < percpu_arena_ind_limit(opt_percpu_arena)) {
 				/*
 				 * If perCPU arena is enabled, thread_arena
@@ -2429,9 +2331,8 @@ CTL_RO_NL_GEN(thread_allocatedp, tsd_thread_allocatedp_get(tsd), uint64_t *)
 
 static int
 thread_tcache_ncached_max_read_sizeclass_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen) {
-	int ret;
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int    ret;
 	size_t bin_size = 0;
 
 	/* Read the bin size from newp. */
@@ -2455,8 +2356,7 @@ label_return:
 
 static int
 thread_tcache_ncached_max_write_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen) {
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 	WRITEONLY();
 	if (newp != NULL) {
@@ -2471,8 +2371,8 @@ thread_tcache_ncached_max_write_ctl(tsd_t *tsd, const size_t *mib,
 			goto label_return;
 		}
 		/* Get the length of the setting string safely. */
-		char *end = (char *)memchr(settings, '\0',
-		    CTL_MULTI_SETTING_MAX_LEN);
+		char *end = (char *)memchr(
+		    settings, '\0', CTL_MULTI_SETTING_MAX_LEN);
 		if (end == NULL) {
 			ret = EINVAL;
 			goto label_return;
@@ -2502,10 +2402,9 @@ CTL_RO_NL_GEN(thread_deallocated, tsd_thread_deallocated_get(tsd), uint64_t)
 CTL_RO_NL_GEN(thread_deallocatedp, tsd_thread_deallocatedp_get(tsd), uint64_t *)
 
 static int
-thread_tcache_enabled_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen) {
-	int ret;
+thread_tcache_enabled_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int  ret;
 	bool oldval;
 
 	oldval = tcache_enabled_get(tsd);
@@ -2524,10 +2423,9 @@ label_return:
 }
 
 static int
-thread_tcache_max_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen) {
-	int ret;
+thread_tcache_max_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int    ret;
 	size_t oldval;
 
 	/* pointer to tcache_t always exists even with tcache disabled. */
@@ -2547,7 +2445,7 @@ thread_tcache_max_ctl(tsd_t *tsd, const size_t *mib,
 			new_tcache_max = TCACHE_MAXCLASS_LIMIT;
 		}
 		new_tcache_max = sz_s2u(new_tcache_max);
-		if(new_tcache_max != oldval) {
+		if (new_tcache_max != oldval) {
 			thread_tcache_max_set(tsd, new_tcache_max);
 		}
 	}
@@ -2558,9 +2456,8 @@ label_return:
 }
 
 static int
-thread_tcache_flush_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen) {
+thread_tcache_flush_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 
 	if (!tcache_available(tsd)) {
@@ -2578,9 +2475,8 @@ label_return:
 }
 
 static int
-thread_peak_read_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen) {
+thread_peak_read_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 	if (!config_stats) {
 		return ENOENT;
@@ -2595,9 +2491,8 @@ label_return:
 }
 
 static int
-thread_peak_reset_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen) {
+thread_peak_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 	if (!config_stats) {
 		return ENOENT;
@@ -2610,9 +2505,8 @@ label_return:
 }
 
 static int
-thread_prof_name_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen) {
+thread_prof_name_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 
 	if (!config_prof || !opt_prof) {
@@ -2642,10 +2536,9 @@ label_return:
 }
 
 static int
-thread_prof_active_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen) {
-	int ret;
+thread_prof_active_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int  ret;
 	bool oldval;
 
 	if (!config_prof) {
@@ -2675,9 +2568,8 @@ label_return:
 }
 
 static int
-thread_idle_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen) {
+thread_idle_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 
 	NEITHER_READ_NOR_WRITE();
@@ -2710,9 +2602,9 @@ label_return:
 /******************************************************************************/
 
 static int
-tcache_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+tcache_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int      ret;
 	unsigned tcache_ind;
 
 	READONLY();
@@ -2729,9 +2621,9 @@ label_return:
 }
 
 static int
-tcache_flush_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+tcache_flush_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int      ret;
 	unsigned tcache_ind;
 
 	WRITEONLY();
@@ -2744,9 +2636,9 @@ label_return:
 }
 
 static int
-tcache_destroy_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+tcache_destroy_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int      ret;
 	unsigned tcache_ind;
 
 	WRITEONLY();
@@ -2763,10 +2655,10 @@ label_return:
 static int
 arena_i_initialized_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
-	tsdn_t *tsdn = tsd_tsdn(tsd);
+	int      ret;
+	tsdn_t  *tsdn = tsd_tsdn(tsd);
 	unsigned arena_ind;
-	bool initialized;
+	bool     initialized;
 
 	READONLY();
 	MIB_UNSIGNED(arena_ind, 1);
@@ -2808,8 +2700,8 @@ arena_i_decay(tsdn_t *tsdn, unsigned arena_ind, bool all) {
 
 			for (i = 0; i < narenas; i++) {
 				if (tarenas[i] != NULL) {
-					arena_decay(tsdn, tarenas[i], false,
-					    all);
+					arena_decay(
+					    tsdn, tarenas[i], false, all);
 				}
 			}
 		} else {
@@ -2832,7 +2724,7 @@ arena_i_decay(tsdn_t *tsdn, unsigned arena_ind, bool all) {
 static int
 arena_i_decay_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+	int      ret;
 	unsigned arena_ind;
 
 	NEITHER_READ_NOR_WRITE();
@@ -2847,7 +2739,7 @@ label_return:
 static int
 arena_i_purge_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+	int      ret;
 	unsigned arena_ind;
 
 	NEITHER_READ_NOR_WRITE();
@@ -2913,12 +2805,12 @@ arena_reset_finish_background_thread(tsd_t *tsd, unsigned arena_ind) {
 static int
 arena_i_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+	int      ret;
 	unsigned arena_ind;
 	arena_t *arena;
 
-	ret = arena_i_reset_destroy_helper(tsd, mib, miblen, oldp, oldlenp,
-	    newp, newlen, &arena_ind, &arena);
+	ret = arena_i_reset_destroy_helper(
+	    tsd, mib, miblen, oldp, oldlenp, newp, newlen, &arena_ind, &arena);
 	if (ret != 0) {
 		return ret;
 	}
@@ -2933,21 +2825,21 @@ arena_i_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 static int
 arena_i_destroy_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
-	unsigned arena_ind;
-	arena_t *arena;
+	int          ret;
+	unsigned     arena_ind;
+	arena_t     *arena;
 	ctl_arena_t *ctl_darena, *ctl_arena;
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 
-	ret = arena_i_reset_destroy_helper(tsd, mib, miblen, oldp, oldlenp,
-	    newp, newlen, &arena_ind, &arena);
+	ret = arena_i_reset_destroy_helper(
+	    tsd, mib, miblen, oldp, oldlenp, newp, newlen, &arena_ind, &arena);
 	if (ret != 0) {
 		goto label_return;
 	}
 
-	if (arena_nthreads_get(arena, false) != 0 || arena_nthreads_get(arena,
-	    true) != 0) {
+	if (arena_nthreads_get(arena, false) != 0
+	    || arena_nthreads_get(arena, true) != 0) {
 		ret = EFAULT;
 		goto label_return;
 	}
@@ -2978,16 +2870,16 @@ label_return:
 static int
 arena_i_dss_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+	int         ret;
 	const char *dss = NULL;
-	unsigned arena_ind;
-	dss_prec_t dss_prec = dss_prec_limit;
+	unsigned    arena_ind;
+	dss_prec_t  dss_prec = dss_prec_limit;
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	WRITE(dss, const char *);
 	MIB_UNSIGNED(arena_ind, 1);
 	if (dss != NULL) {
-		int i;
+		int  i;
 		bool match = false;
 
 		for (i = 0; i < dss_prec_limit; i++) {
@@ -3009,18 +2901,19 @@ arena_i_dss_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	 * 6.0.0.
 	 */
 	dss_prec_t dss_prec_old;
-	if (arena_ind == MALLCTL_ARENAS_ALL || arena_ind ==
-	    ctl_arenas->narenas) {
-		if (dss_prec != dss_prec_limit &&
-		    extent_dss_prec_set(dss_prec)) {
+	if (arena_ind == MALLCTL_ARENAS_ALL
+	    || arena_ind == ctl_arenas->narenas) {
+		if (dss_prec != dss_prec_limit
+		    && extent_dss_prec_set(dss_prec)) {
 			ret = EFAULT;
 			goto label_return;
 		}
 		dss_prec_old = extent_dss_prec_get();
 	} else {
 		arena_t *arena = arena_get(tsd_tsdn(tsd), arena_ind, false);
-		if (arena == NULL || (dss_prec != dss_prec_limit &&
-		    arena_dss_prec_set(arena, dss_prec))) {
+		if (arena == NULL
+		    || (dss_prec != dss_prec_limit
+		        && arena_dss_prec_set(arena, dss_prec))) {
 			ret = EFAULT;
 			goto label_return;
 		}
@@ -3071,7 +2964,7 @@ label_return:
 static int
 arena_i_decay_ms_ctl_impl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen, bool dirty) {
-	int ret;
+	int      ret;
 	unsigned arena_ind;
 	arena_t *arena;
 
@@ -3093,8 +2986,8 @@ arena_i_decay_ms_ctl_impl(tsd_t *tsd, const size_t *mib, size_t miblen,
 			goto label_return;
 		}
 
-		if (arena_decay_ms_set(tsd_tsdn(tsd), arena, state,
-		    *(ssize_t *)newp)) {
+		if (arena_decay_ms_set(
+		        tsd_tsdn(tsd), arena, state, *(ssize_t *)newp)) {
 			ret = EFAULT;
 			goto label_return;
 		}
@@ -3108,21 +3001,21 @@ label_return:
 static int
 arena_i_dirty_decay_ms_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	return arena_i_decay_ms_ctl_impl(tsd, mib, miblen, oldp, oldlenp, newp,
-	    newlen, true);
+	return arena_i_decay_ms_ctl_impl(
+	    tsd, mib, miblen, oldp, oldlenp, newp, newlen, true);
 }
 
 static int
 arena_i_muzzy_decay_ms_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	return arena_i_decay_ms_ctl_impl(tsd, mib, miblen, oldp, oldlenp, newp,
-	    newlen, false);
+	return arena_i_decay_ms_ctl_impl(
+	    tsd, mib, miblen, oldp, oldlenp, newp, newlen, false);
 }
 
 static int
 arena_i_extent_hooks_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+	int      ret;
 	unsigned arena_ind;
 	arena_t *arena;
 
@@ -3147,8 +3040,8 @@ arena_i_extent_hooks_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 				arena_config_t config = arena_config_default;
 				config.extent_hooks = new_extent_hooks;
 
-				arena = arena_init(tsd_tsdn(tsd), arena_ind,
-				    &config);
+				arena = arena_init(
+				    tsd_tsdn(tsd), arena_ind, &config);
 				if (arena == NULL) {
 					ret = EFAULT;
 					goto label_return;
@@ -3159,13 +3052,12 @@ arena_i_extent_hooks_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 				extent_hooks_t *new_extent_hooks
 				    JEMALLOC_CC_SILENCE_INIT(NULL);
 				WRITE(new_extent_hooks, extent_hooks_t *);
-				old_extent_hooks = arena_set_extent_hooks(tsd,
-				    arena, new_extent_hooks);
+				old_extent_hooks = arena_set_extent_hooks(
+				    tsd, arena, new_extent_hooks);
 				READ(old_extent_hooks, extent_hooks_t *);
 			} else {
-				old_extent_hooks =
-				    ehooks_get_extent_hooks_ptr(
-					arena_get_ehooks(arena));
+				old_extent_hooks = ehooks_get_extent_hooks_ptr(
+				    arena_get_ehooks(arena));
 				READ(old_extent_hooks, extent_hooks_t *);
 			}
 		}
@@ -3180,10 +3072,9 @@ label_return:
 }
 
 static int
-arena_i_retain_grow_limit_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen) {
-	int ret;
+arena_i_retain_grow_limit_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int      ret;
 	unsigned arena_ind;
 	arena_t *arena;
 
@@ -3194,14 +3085,14 @@ arena_i_retain_grow_limit_ctl(tsd_t *tsd, const size_t *mib,
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	MIB_UNSIGNED(arena_ind, 1);
-	if (arena_ind < narenas_total_get() && (arena =
-	    arena_get(tsd_tsdn(tsd), arena_ind, false)) != NULL) {
+	if (arena_ind < narenas_total_get()
+	    && (arena = arena_get(tsd_tsdn(tsd), arena_ind, false)) != NULL) {
 		size_t old_limit, new_limit;
 		if (newp != NULL) {
 			WRITE(new_limit, size_t);
 		}
-		bool err = arena_retain_grow_limit_get_set(tsd, arena,
-		    &old_limit, newp != NULL ? &new_limit : NULL);
+		bool err = arena_retain_grow_limit_get_set(
+		    tsd, arena, &old_limit, newp != NULL ? &new_limit : NULL);
 		if (!err) {
 			READ(old_limit, size_t);
 			ret = 0;
@@ -3223,16 +3114,16 @@ label_return:
  * ARENA_NAME_LEN or the length of the name when it was set.
  */
 static int
-arena_i_name_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
-	unsigned arena_ind;
+arena_i_name_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int        ret;
+	unsigned   arena_ind;
 	char *name JEMALLOC_CLANG_ANALYZER_SILENCE_INIT(NULL);
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	MIB_UNSIGNED(arena_ind, 1);
-	if (arena_ind == MALLCTL_ARENAS_ALL || arena_ind >=
-	    ctl_arenas->narenas) {
+	if (arena_ind == MALLCTL_ARENAS_ALL
+	    || arena_ind >= ctl_arenas->narenas) {
 		ret = EINVAL;
 		goto label_return;
 	}
@@ -3272,8 +3163,7 @@ label_return:
 }
 
 static const ctl_named_node_t *
-arena_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
-    size_t i) {
+arena_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) {
 	const ctl_named_node_t *ret;
 
 	malloc_mutex_lock(tsdn, &ctl_mtx);
@@ -3298,9 +3188,9 @@ label_return:
 /******************************************************************************/
 
 static int
-arenas_narenas_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+arenas_narenas_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int      ret;
 	unsigned narenas;
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
@@ -3315,14 +3205,13 @@ label_return:
 }
 
 static int
-arenas_decay_ms_ctl_impl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen, bool dirty) {
+arenas_decay_ms_ctl_impl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen, bool dirty) {
 	int ret;
 
 	if (oldp != NULL && oldlenp != NULL) {
-		size_t oldval = (dirty ? arena_dirty_decay_ms_default_get() :
-		    arena_muzzy_decay_ms_default_get());
+		size_t oldval = (dirty ? arena_dirty_decay_ms_default_get()
+		                       : arena_muzzy_decay_ms_default_get());
 		READ(oldval, ssize_t);
 	}
 	if (newp != NULL) {
@@ -3330,8 +3219,9 @@ arenas_decay_ms_ctl_impl(tsd_t *tsd, const size_t *mib,
 			ret = EINVAL;
 			goto label_return;
 		}
-		if (dirty ? arena_dirty_decay_ms_default_set(*(ssize_t *)newp)
-		    : arena_muzzy_decay_ms_default_set(*(ssize_t *)newp)) {
+		if (dirty
+		        ? arena_dirty_decay_ms_default_set(*(ssize_t *)newp)
+		        : arena_muzzy_decay_ms_default_set(*(ssize_t *)newp)) {
 			ret = EFAULT;
 			goto label_return;
 		}
@@ -3345,15 +3235,15 @@ label_return:
 static int
 arenas_dirty_decay_ms_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	return arenas_decay_ms_ctl_impl(tsd, mib, miblen, oldp, oldlenp, newp,
-	    newlen, true);
+	return arenas_decay_ms_ctl_impl(
+	    tsd, mib, miblen, oldp, oldlenp, newp, newlen, true);
 }
 
 static int
 arenas_muzzy_decay_ms_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	return arenas_decay_ms_ctl_impl(tsd, mib, miblen, oldp, oldlenp, newp,
-	    newlen, false);
+	return arenas_decay_ms_ctl_impl(
+	    tsd, mib, miblen, oldp, oldlenp, newp, newlen, false);
 }
 
 CTL_RO_NL_GEN(arenas_quantum, QUANTUM, size_t)
@@ -3367,8 +3257,7 @@ CTL_RO_NL_GEN(arenas_bin_i_nregs, bin_infos[mib[2]].nregs, uint32_t)
 CTL_RO_NL_GEN(arenas_bin_i_slab_size, bin_infos[mib[2]].slab_size, size_t)
 CTL_RO_NL_GEN(arenas_bin_i_nshards, bin_infos[mib[2]].n_shards, uint32_t)
 static const ctl_named_node_t *
-arenas_bin_i_index(tsdn_t *tsdn, const size_t *mib,
-    size_t miblen, size_t i) {
+arenas_bin_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) {
 	if (i > SC_NBINS) {
 		return NULL;
 	}
@@ -3377,10 +3266,10 @@ arenas_bin_i_index(tsdn_t *tsdn, const size_t *mib,
 
 CTL_RO_NL_GEN(arenas_nlextents, SC_NSIZES - SC_NBINS, unsigned)
 CTL_RO_NL_GEN(arenas_lextent_i_size,
-    sz_index2size_unsafe(SC_NBINS+(szind_t)mib[2]), size_t)
+    sz_index2size_unsafe(SC_NBINS + (szind_t)mib[2]), size_t)
 static const ctl_named_node_t *
-arenas_lextent_i_index(tsdn_t *tsdn, const size_t *mib,
-    size_t miblen, size_t i) {
+arenas_lextent_i_index(
+    tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) {
 	if (i > SC_NSIZES - SC_NBINS) {
 		return NULL;
 	}
@@ -3388,9 +3277,9 @@ arenas_lextent_i_index(tsdn_t *tsdn, const size_t *mib,
 }
 
 static int
-arenas_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+arenas_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int      ret;
 	unsigned arena_ind;
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
@@ -3411,10 +3300,9 @@ label_return:
 }
 
 static int
-experimental_arenas_create_ext_ctl(tsd_t *tsd,
-    const size_t *mib, size_t miblen,
+experimental_arenas_create_ext_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+	int      ret;
 	unsigned arena_ind;
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
@@ -3435,22 +3323,21 @@ label_return:
 }
 
 static int
-arenas_lookup_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen) {
-	int ret;
-	unsigned arena_ind;
-	void *ptr;
+arenas_lookup_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int                   ret;
+	unsigned              arena_ind;
+	void                 *ptr;
 	emap_full_alloc_ctx_t alloc_ctx;
-	bool ptr_not_present;
-	arena_t *arena;
+	bool                  ptr_not_present;
+	arena_t              *arena;
 
 	ptr = NULL;
 	ret = EINVAL;
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	WRITE(ptr, void *);
-	ptr_not_present = emap_full_alloc_ctx_try_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr,
-		&alloc_ctx);
+	ptr_not_present = emap_full_alloc_ctx_try_lookup(
+	    tsd_tsdn(tsd), &arena_emap_global, ptr, &alloc_ctx);
 	if (ptr_not_present || alloc_ctx.edata == NULL) {
 		goto label_return;
 	}
@@ -3472,10 +3359,9 @@ label_return:
 /******************************************************************************/
 
 static int
-prof_thread_active_init_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen) {
-	int ret;
+prof_thread_active_init_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int  ret;
 	bool oldval;
 
 	if (!config_prof) {
@@ -3491,11 +3377,11 @@ prof_thread_active_init_ctl(tsd_t *tsd, const size_t *mib,
 			ret = EINVAL;
 			goto label_return;
 		}
-		oldval = prof_thread_active_init_set(tsd_tsdn(tsd),
-		    *(bool *)newp);
+		oldval = prof_thread_active_init_set(
+		    tsd_tsdn(tsd), *(bool *)newp);
 	} else {
-		oldval = opt_prof ? prof_thread_active_init_get(tsd_tsdn(tsd)) :
-		    false;
+		oldval = opt_prof ? prof_thread_active_init_get(tsd_tsdn(tsd))
+		                  : false;
 	}
 	READ(oldval, bool);
 
@@ -3505,9 +3391,9 @@ label_return:
 }
 
 static int
-prof_active_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+prof_active_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int  ret;
 	bool oldval;
 
 	if (!config_prof) {
@@ -3543,9 +3429,9 @@ label_return:
 }
 
 static int
-prof_dump_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+prof_dump_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int         ret;
 	const char *filename = NULL;
 
 	if (!config_prof || !opt_prof) {
@@ -3566,9 +3452,9 @@ label_return:
 }
 
 static int
-prof_gdump_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+prof_gdump_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int  ret;
 	bool oldval;
 
 	if (!config_prof) {
@@ -3596,9 +3482,9 @@ label_return:
 }
 
 static int
-prof_prefix_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+prof_prefix_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int         ret;
 	const char *prefix = NULL;
 
 	if (!config_prof || !opt_prof) {
@@ -3616,9 +3502,9 @@ label_return:
 }
 
 static int
-prof_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+prof_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int    ret;
 	size_t lg_sample = lg_prof_sample;
 
 	if (!config_prof || !opt_prof) {
@@ -3689,8 +3575,7 @@ experimental_hooks_prof_backtrace_ctl(tsd_t *tsd, const size_t *mib,
 		goto label_return;
 	}
 	if (oldp != NULL) {
-		prof_backtrace_hook_t old_hook =
-		    prof_backtrace_hook_get();
+		prof_backtrace_hook_t old_hook = prof_backtrace_hook_get();
 		READ(old_hook, prof_backtrace_hook_t);
 	}
 	if (newp != NULL) {
@@ -3712,8 +3597,8 @@ label_return:
 }
 
 static int
-experimental_hooks_prof_dump_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+experimental_hooks_prof_dump_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 
 	if (oldp == NULL && newp == NULL) {
@@ -3721,8 +3606,7 @@ experimental_hooks_prof_dump_ctl(tsd_t *tsd, const size_t *mib,
 		goto label_return;
 	}
 	if (oldp != NULL) {
-		prof_dump_hook_t old_hook =
-		    prof_dump_hook_get();
+		prof_dump_hook_t old_hook = prof_dump_hook_get();
 		READ(old_hook, prof_dump_hook_t);
 	}
 	if (newp != NULL) {
@@ -3740,8 +3624,8 @@ label_return:
 }
 
 static int
-experimental_hooks_prof_sample_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+experimental_hooks_prof_sample_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 
 	if (oldp == NULL && newp == NULL) {
@@ -3749,8 +3633,7 @@ experimental_hooks_prof_sample_ctl(tsd_t *tsd, const size_t *mib,
 		goto label_return;
 	}
 	if (oldp != NULL) {
-		prof_sample_hook_t old_hook =
-		    prof_sample_hook_get();
+		prof_sample_hook_t old_hook = prof_sample_hook_get();
 		READ(old_hook, prof_sample_hook_t);
 	}
 	if (newp != NULL) {
@@ -3777,8 +3660,7 @@ experimental_hooks_prof_sample_free_ctl(tsd_t *tsd, const size_t *mib,
 		goto label_return;
 	}
 	if (oldp != NULL) {
-		prof_sample_free_hook_t old_hook =
-		    prof_sample_free_hook_get();
+		prof_sample_free_hook_t old_hook = prof_sample_free_hook_get();
 		READ(old_hook, prof_sample_free_hook_t);
 	}
 	if (newp != NULL) {
@@ -3795,7 +3677,6 @@ label_return:
 	return ret;
 }
 
-
 static int
 experimental_hooks_prof_threshold_ctl(tsd_t *tsd, const size_t *mib,
     size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
@@ -3806,8 +3687,7 @@ experimental_hooks_prof_threshold_ctl(tsd_t *tsd, const size_t *mib,
 		goto label_return;
 	}
 	if (oldp != NULL) {
-		prof_threshold_hook_t old_hook =
-		    prof_threshold_hook_get();
+		prof_threshold_hook_t old_hook = prof_threshold_hook_get();
 		READ(old_hook, prof_threshold_hook_t);
 	}
 	if (newp != NULL) {
@@ -3822,7 +3702,7 @@ label_return:
 
 static int
 experimental_hooks_thread_event_ctl(tsd_t *tsd, const size_t *mib,
-	size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 
 	if (newp == NULL) {
@@ -3864,10 +3744,10 @@ label_return:
 CTL_RO_CGEN(config_stats, stats_allocated, ctl_stats->allocated, size_t)
 CTL_RO_CGEN(config_stats, stats_active, ctl_stats->active, size_t)
 CTL_RO_CGEN(config_stats, stats_metadata, ctl_stats->metadata, size_t)
-CTL_RO_CGEN(config_stats, stats_metadata_edata, ctl_stats->metadata_edata,
-    size_t)
-CTL_RO_CGEN(config_stats, stats_metadata_rtree, ctl_stats->metadata_rtree,
-    size_t)
+CTL_RO_CGEN(
+    config_stats, stats_metadata_edata, ctl_stats->metadata_edata, size_t)
+CTL_RO_CGEN(
+    config_stats, stats_metadata_rtree, ctl_stats->metadata_rtree, size_t)
 CTL_RO_CGEN(config_stats, stats_metadata_thp, ctl_stats->metadata_thp, size_t)
 CTL_RO_CGEN(config_stats, stats_resident, ctl_stats->resident, size_t)
 CTL_RO_CGEN(config_stats, stats_mapped, ctl_stats->mapped, size_t)
@@ -3884,10 +3764,10 @@ CTL_RO_CGEN(config_stats, stats_zero_reallocs,
     atomic_load_zu(&zero_realloc_count, ATOMIC_RELAXED), size_t)
 
 CTL_RO_GEN(stats_arenas_i_dss, arenas_i(mib[2])->dss, const char *)
-CTL_RO_GEN(stats_arenas_i_dirty_decay_ms, arenas_i(mib[2])->dirty_decay_ms,
-    ssize_t)
-CTL_RO_GEN(stats_arenas_i_muzzy_decay_ms, arenas_i(mib[2])->muzzy_decay_ms,
-    ssize_t)
+CTL_RO_GEN(
+    stats_arenas_i_dirty_decay_ms, arenas_i(mib[2])->dirty_decay_ms, ssize_t)
+CTL_RO_GEN(
+    stats_arenas_i_muzzy_decay_ms, arenas_i(mib[2])->muzzy_decay_ms, ssize_t)
 CTL_RO_GEN(stats_arenas_i_nthreads, arenas_i(mib[2])->nthreads, unsigned)
 CTL_RO_GEN(stats_arenas_i_uptime,
     nstime_ns(&arenas_i(mib[2])->astats->astats.uptime), uint64_t)
@@ -3903,33 +3783,38 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_extent_avail,
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_npurge,
     locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.decay_dirty.npurge),
+        &arenas_i(mib[2])
+             ->astats->astats.pa_shard_stats.pac_stats.decay_dirty.npurge),
     uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_nmadvise,
     locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.decay_dirty.nmadvise),
+        &arenas_i(mib[2])
+             ->astats->astats.pa_shard_stats.pac_stats.decay_dirty.nmadvise),
     uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_purged,
     locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.decay_dirty.purged),
+        &arenas_i(mib[2])
+             ->astats->astats.pa_shard_stats.pac_stats.decay_dirty.purged),
     uint64_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_npurge,
     locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.decay_muzzy.npurge),
+        &arenas_i(mib[2])
+             ->astats->astats.pa_shard_stats.pac_stats.decay_muzzy.npurge),
     uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_nmadvise,
     locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.decay_muzzy.nmadvise),
+        &arenas_i(mib[2])
+             ->astats->astats.pa_shard_stats.pac_stats.decay_muzzy.nmadvise),
     uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_purged,
     locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.decay_muzzy.purged),
+        &arenas_i(mib[2])
+             ->astats->astats.pa_shard_stats.pac_stats.decay_muzzy.purged),
     uint64_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_base,
-    arenas_i(mib[2])->astats->astats.base,
-    size_t)
+    arenas_i(mib[2])->astats->astats.base, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_internal,
     atomic_load_zu(&arenas_i(mib[2])->astats->astats.internal, ATOMIC_RELAXED),
     size_t)
@@ -3944,12 +3829,12 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_tcache_bytes,
 CTL_RO_CGEN(config_stats, stats_arenas_i_tcache_stashed_bytes,
     arenas_i(mib[2])->astats->astats.tcache_stashed_bytes, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_resident,
-    arenas_i(mib[2])->astats->astats.resident,
-    size_t)
+    arenas_i(mib[2])->astats->astats.resident, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_abandoned_vm,
     atomic_load_zu(
-    &arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.abandoned_vm,
-    ATOMIC_RELAXED), size_t)
+        &arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.abandoned_vm,
+        ATOMIC_RELAXED),
+    size_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_sec_bytes,
     arenas_i(mib[2])->astats->secstats.bytes, size_t)
@@ -3984,55 +3869,55 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_large_nflushes,
     arenas_i(mib[2])->astats->astats.nflushes_large, uint64_t)
 
 /* Lock profiling related APIs below. */
-#define RO_MUTEX_CTL_GEN(n, l)						\
-CTL_RO_CGEN(config_stats, stats_##n##_num_ops,				\
-    l.n_lock_ops, uint64_t)						\
-CTL_RO_CGEN(config_stats, stats_##n##_num_wait,				\
-    l.n_wait_times, uint64_t)						\
-CTL_RO_CGEN(config_stats, stats_##n##_num_spin_acq,			\
-    l.n_spin_acquired, uint64_t)					\
-CTL_RO_CGEN(config_stats, stats_##n##_num_owner_switch,			\
-    l.n_owner_switches, uint64_t) 					\
-CTL_RO_CGEN(config_stats, stats_##n##_total_wait_time,			\
-    nstime_ns(&l.tot_wait_time), uint64_t)				\
-CTL_RO_CGEN(config_stats, stats_##n##_max_wait_time,			\
-    nstime_ns(&l.max_wait_time), uint64_t)				\
-CTL_RO_CGEN(config_stats, stats_##n##_max_num_thds,			\
-    l.max_n_thds, uint32_t)
+#define RO_MUTEX_CTL_GEN(n, l)                                                 \
+	CTL_RO_CGEN(config_stats, stats_##n##_num_ops, l.n_lock_ops, uint64_t) \
+	CTL_RO_CGEN(                                                           \
+	    config_stats, stats_##n##_num_wait, l.n_wait_times, uint64_t)      \
+	CTL_RO_CGEN(config_stats, stats_##n##_num_spin_acq, l.n_spin_acquired, \
+	    uint64_t)                                                          \
+	CTL_RO_CGEN(config_stats, stats_##n##_num_owner_switch,                \
+	    l.n_owner_switches, uint64_t)                                      \
+	CTL_RO_CGEN(config_stats, stats_##n##_total_wait_time,                 \
+	    nstime_ns(&l.tot_wait_time), uint64_t)                             \
+	CTL_RO_CGEN(config_stats, stats_##n##_max_wait_time,                   \
+	    nstime_ns(&l.max_wait_time), uint64_t)                             \
+	CTL_RO_CGEN(                                                           \
+	    config_stats, stats_##n##_max_num_thds, l.max_n_thds, uint32_t)
 
 /* Global mutexes. */
-#define OP(mtx)								\
-    RO_MUTEX_CTL_GEN(mutexes_##mtx,					\
-        ctl_stats->mutex_prof_data[global_prof_mutex_##mtx])
+#define OP(mtx)                                                                \
+	RO_MUTEX_CTL_GEN(mutexes_##mtx,                                        \
+	    ctl_stats->mutex_prof_data[global_prof_mutex_##mtx])
 MUTEX_PROF_GLOBAL_MUTEXES
 #undef OP
 
 /* Per arena mutexes */
-#define OP(mtx) RO_MUTEX_CTL_GEN(arenas_i_mutexes_##mtx,		\
-    arenas_i(mib[2])->astats->astats.mutex_prof_data[arena_prof_mutex_##mtx])
+#define OP(mtx)                                                                \
+	RO_MUTEX_CTL_GEN(arenas_i_mutexes_##mtx,                               \
+	    arenas_i(mib[2])                                                   \
+	        ->astats->astats.mutex_prof_data[arena_prof_mutex_##mtx])
 MUTEX_PROF_ARENA_MUTEXES
 #undef OP
 
 /* tcache bin mutex */
-RO_MUTEX_CTL_GEN(arenas_i_bins_j_mutex,
-    arenas_i(mib[2])->astats->bstats[mib[4]].mutex_data)
+RO_MUTEX_CTL_GEN(
+    arenas_i_bins_j_mutex, arenas_i(mib[2])->astats->bstats[mib[4]].mutex_data)
 #undef RO_MUTEX_CTL_GEN
 
 /* Resets all mutex stats, including global, arena and bin mutexes. */
 static int
-stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen) {
+stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
 	if (!config_stats) {
 		return ENOENT;
 	}
 
 	tsdn_t *tsdn = tsd_tsdn(tsd);
 
-#define MUTEX_PROF_RESET(mtx)						\
-    malloc_mutex_lock(tsdn, &mtx);					\
-    malloc_mutex_prof_data_reset(tsdn, &mtx);				\
-    malloc_mutex_unlock(tsdn, &mtx);
+#define MUTEX_PROF_RESET(mtx)                                                  \
+	malloc_mutex_lock(tsdn, &mtx);                                         \
+	malloc_mutex_prof_data_reset(tsdn, &mtx);                              \
+	malloc_mutex_unlock(tsdn, &mtx);
 
 	/* Global mutexes: ctl and prof. */
 	MUTEX_PROF_RESET(ctl_mtx);
@@ -4100,15 +3985,17 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nonfull_slabs,
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_batch_pops,
     arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.batch_pops, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_batch_failed_pushes,
-    arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.batch_failed_pushes, uint64_t)
+    arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.batch_failed_pushes,
+    uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_batch_pushes,
     arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.batch_pushes, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_batch_pushed_elems,
-    arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.batch_pushed_elems, uint64_t)
+    arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.batch_pushed_elems,
+    uint64_t)
 
 static const ctl_named_node_t *
-stats_arenas_i_bins_j_index(tsdn_t *tsdn, const size_t *mib,
-    size_t miblen, size_t j) {
+stats_arenas_i_bins_j_index(
+    tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t j) {
 	if (j > SC_NBINS) {
 		return NULL;
 	}
@@ -4117,19 +4004,22 @@ stats_arenas_i_bins_j_index(tsdn_t *tsdn, const size_t *mib,
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_nmalloc,
     locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->lstats[mib[4]].nmalloc), uint64_t)
+        &arenas_i(mib[2])->astats->lstats[mib[4]].nmalloc),
+    uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_ndalloc,
     locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->lstats[mib[4]].ndalloc), uint64_t)
+        &arenas_i(mib[2])->astats->lstats[mib[4]].ndalloc),
+    uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_nrequests,
     locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->lstats[mib[4]].nrequests), uint64_t)
+        &arenas_i(mib[2])->astats->lstats[mib[4]].nrequests),
+    uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_curlextents,
     arenas_i(mib[2])->astats->lstats[mib[4]].curlextents, size_t)
 
 static const ctl_named_node_t *
-stats_arenas_i_lextents_j_index(tsdn_t *tsdn, const size_t *mib,
-    size_t miblen, size_t j) {
+stats_arenas_i_lextents_j_index(
+    tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t j) {
 	if (j > SC_NSIZES - SC_NBINS) {
 		return NULL;
 	}
@@ -4137,21 +4027,21 @@ stats_arenas_i_lextents_j_index(tsdn_t *tsdn, const size_t *mib,
 }
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_extents_j_ndirty,
-        arenas_i(mib[2])->astats->estats[mib[4]].ndirty, size_t);
+    arenas_i(mib[2])->astats->estats[mib[4]].ndirty, size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_extents_j_nmuzzy,
-        arenas_i(mib[2])->astats->estats[mib[4]].nmuzzy, size_t);
+    arenas_i(mib[2])->astats->estats[mib[4]].nmuzzy, size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_extents_j_nretained,
-        arenas_i(mib[2])->astats->estats[mib[4]].nretained, size_t);
+    arenas_i(mib[2])->astats->estats[mib[4]].nretained, size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_extents_j_dirty_bytes,
-        arenas_i(mib[2])->astats->estats[mib[4]].dirty_bytes, size_t);
+    arenas_i(mib[2])->astats->estats[mib[4]].dirty_bytes, size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_extents_j_muzzy_bytes,
-        arenas_i(mib[2])->astats->estats[mib[4]].muzzy_bytes, size_t);
+    arenas_i(mib[2])->astats->estats[mib[4]].muzzy_bytes, size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_extents_j_retained_bytes,
-        arenas_i(mib[2])->astats->estats[mib[4]].retained_bytes, size_t);
+    arenas_i(mib[2])->astats->estats[mib[4]].retained_bytes, size_t);
 
 static const ctl_named_node_t *
-stats_arenas_i_extents_j_index(tsdn_t *tsdn, const size_t *mib,
-    size_t miblen, size_t j) {
+stats_arenas_i_extents_j_index(
+    tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t j) {
 	if (j >= SC_NPSIZES) {
 		return NULL;
 	}
@@ -4182,7 +4072,8 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_slabs_ndirty_huge,
     arenas_i(mib[2])->astats->hpastats.psset_stats.slabs[1].ndirty, size_t);
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_npurge_passes,
-    arenas_i(mib[2])->astats->hpastats.nonderived_stats.npurge_passes, uint64_t);
+    arenas_i(mib[2])->astats->hpastats.nonderived_stats.npurge_passes,
+    uint64_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_npurges,
     arenas_i(mib[2])->astats->hpastats.nonderived_stats.npurges, uint64_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nhugifies,
@@ -4194,66 +4085,92 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_ndehugifies,
     arenas_i(mib[2])->astats->hpastats.nonderived_stats.ndehugifies, uint64_t);
 
 /* Full, nonhuge */
-CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_npageslabs_nonhuge,
+CTL_RO_CGEN(config_stats,
+    stats_arenas_i_hpa_shard_full_slabs_npageslabs_nonhuge,
     arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[0].npageslabs,
     size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_nactive_nonhuge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[0].nactive, size_t);
+    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[0].nactive,
+    size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_ndirty_nonhuge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[0].ndirty, size_t);
+    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[0].ndirty,
+    size_t);
 
 /* Full, huge */
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge,
     arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[1].npageslabs,
     size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_nactive_huge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[1].nactive, size_t);
+    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[1].nactive,
+    size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_ndirty_huge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[1].ndirty, size_t);
+    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[1].ndirty,
+    size_t);
 
 /* Empty, nonhuge */
-CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_empty_slabs_npageslabs_nonhuge,
+CTL_RO_CGEN(config_stats,
+    stats_arenas_i_hpa_shard_empty_slabs_npageslabs_nonhuge,
     arenas_i(mib[2])->astats->hpastats.psset_stats.empty_slabs[0].npageslabs,
     size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_empty_slabs_nactive_nonhuge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.empty_slabs[0].nactive, size_t);
+    arenas_i(mib[2])->astats->hpastats.psset_stats.empty_slabs[0].nactive,
+    size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_empty_slabs_ndirty_nonhuge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.empty_slabs[0].ndirty, size_t);
+    arenas_i(mib[2])->astats->hpastats.psset_stats.empty_slabs[0].ndirty,
+    size_t);
 
 /* Empty, huge */
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_empty_slabs_npageslabs_huge,
     arenas_i(mib[2])->astats->hpastats.psset_stats.empty_slabs[1].npageslabs,
     size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_empty_slabs_nactive_huge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.empty_slabs[1].nactive, size_t);
+    arenas_i(mib[2])->astats->hpastats.psset_stats.empty_slabs[1].nactive,
+    size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_empty_slabs_ndirty_huge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.empty_slabs[1].ndirty, size_t);
+    arenas_i(mib[2])->astats->hpastats.psset_stats.empty_slabs[1].ndirty,
+    size_t);
 
 /* Nonfull, nonhuge */
-CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_nonhuge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][0].npageslabs,
+CTL_RO_CGEN(config_stats,
+    stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_nonhuge,
+    arenas_i(mib[2])
+        ->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][0]
+        .npageslabs,
     size_t);
-CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_nonhuge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][0].nactive,
+CTL_RO_CGEN(config_stats,
+    stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_nonhuge,
+    arenas_i(mib[2])
+        ->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][0]
+        .nactive,
     size_t);
-CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_ndirty_nonhuge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][0].ndirty,
+CTL_RO_CGEN(config_stats,
+    stats_arenas_i_hpa_shard_nonfull_slabs_j_ndirty_nonhuge,
+    arenas_i(mib[2])
+        ->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][0]
+        .ndirty,
     size_t);
 
 /* Nonfull, huge */
-CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_huge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][1].npageslabs,
+CTL_RO_CGEN(config_stats,
+    stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_huge,
+    arenas_i(mib[2])
+        ->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][1]
+        .npageslabs,
     size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_huge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][1].nactive,
+    arenas_i(mib[2])
+        ->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][1]
+        .nactive,
     size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_ndirty_huge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][1].ndirty,
+    arenas_i(mib[2])
+        ->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][1]
+        .ndirty,
     size_t);
 
 static const ctl_named_node_t *
-stats_arenas_i_hpa_shard_nonfull_slabs_j_index(tsdn_t *tsdn, const size_t *mib,
-    size_t miblen, size_t j) {
+stats_arenas_i_hpa_shard_nonfull_slabs_j_index(
+    tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t j) {
 	if (j >= PSSET_NPSIZES) {
 		return NULL;
 	}
@@ -4271,8 +4188,7 @@ ctl_arenas_i_verify(size_t i) {
 }
 
 static const ctl_named_node_t *
-stats_arenas_i_index(tsdn_t *tsdn, const size_t *mib,
-    size_t miblen, size_t i) {
+stats_arenas_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) {
 	const ctl_named_node_t *ret;
 
 	malloc_mutex_lock(tsdn, &ctl_mtx);
@@ -4291,7 +4207,7 @@ static int
 experimental_hooks_install_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
-	if (oldp == NULL || oldlenp == NULL|| newp == NULL) {
+	if (oldp == NULL || oldlenp == NULL || newp == NULL) {
 		ret = EINVAL;
 		goto label_return;
 	}
@@ -4426,8 +4342,8 @@ label_return:
  * motivation from C++.
  */
 static int
-experimental_utilization_query_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+experimental_utilization_query_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 
 	assert(sizeof(inspect_extent_util_stats_verbose_t)
@@ -4442,8 +4358,8 @@ experimental_utilization_query_ctl(tsd_t *tsd, const size_t *mib,
 
 	void *ptr = NULL;
 	WRITE(ptr, void *);
-	inspect_extent_util_stats_verbose_t *util_stats
-	    = (inspect_extent_util_stats_verbose_t *)oldp;
+	inspect_extent_util_stats_verbose_t *util_stats =
+	    (inspect_extent_util_stats_verbose_t *)oldp;
 	inspect_extent_util_stats_verbose_get(tsd_tsdn(tsd), ptr,
 	    &util_stats->nfree, &util_stats->nregs, &util_stats->size,
 	    &util_stats->bin_nfree, &util_stats->bin_nregs,
@@ -4565,7 +4481,7 @@ experimental_utilization_batch_query_ctl(tsd_t *tsd, const size_t *mib,
 		goto label_return;
 	}
 
-	void **ptrs = (void **)newp;
+	void                       **ptrs = (void **)newp;
 	inspect_extent_util_stats_t *util_stats =
 	    (inspect_extent_util_stats_t *)oldp;
 	size_t i;
@@ -4581,8 +4497,8 @@ label_return:
 }
 
 static const ctl_named_node_t *
-experimental_arenas_i_index(tsdn_t *tsdn, const size_t *mib,
-    size_t miblen, size_t i) {
+experimental_arenas_i_index(
+    tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) {
 	const ctl_named_node_t *ret;
 
 	malloc_mutex_lock(tsdn, &ctl_mtx);
@@ -4597,8 +4513,8 @@ label_return:
 }
 
 static int
-experimental_arenas_i_pactivep_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+experimental_arenas_i_pactivep_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
 	if (!config_stats) {
 		return ENOENT;
 	}
@@ -4608,16 +4524,16 @@ experimental_arenas_i_pactivep_ctl(tsd_t *tsd, const size_t *mib,
 
 	unsigned arena_ind;
 	arena_t *arena;
-	int ret;
-	size_t *pactivep;
+	int      ret;
+	size_t  *pactivep;
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	READONLY();
 	MIB_UNSIGNED(arena_ind, 2);
-	if (arena_ind < narenas_total_get() && (arena =
-	    arena_get(tsd_tsdn(tsd), arena_ind, false)) != NULL) {
-#if defined(JEMALLOC_GCC_ATOMIC_ATOMICS) ||				\
-    defined(JEMALLOC_GCC_SYNC_ATOMICS) || defined(_MSC_VER)
+	if (arena_ind < narenas_total_get()
+	    && (arena = arena_get(tsd_tsdn(tsd), arena_ind, false)) != NULL) {
+#if defined(JEMALLOC_GCC_ATOMIC_ATOMICS) || defined(JEMALLOC_GCC_SYNC_ATOMICS) \
+    || defined(_MSC_VER)
 		/* Expose the underlying counter for fast read. */
 		pactivep = (size_t *)&(arena->pa_shard.nactive.repr);
 		READ(pactivep, size_t *);
@@ -4669,7 +4585,7 @@ label_return:
 typedef struct write_cb_packet_s write_cb_packet_t;
 struct write_cb_packet_s {
 	write_cb_t *write_cb;
-	void *cbopaque;
+	void       *cbopaque;
 };
 
 static int
@@ -4688,8 +4604,8 @@ experimental_prof_recent_alloc_dump_ctl(tsd_t *tsd, const size_t *mib,
 	write_cb_packet_t write_cb_packet;
 	ASSURED_WRITE(write_cb_packet, write_cb_packet_t);
 
-	prof_recent_alloc_dump(tsd, write_cb_packet.write_cb,
-	    write_cb_packet.cbopaque);
+	prof_recent_alloc_dump(
+	    tsd, write_cb_packet.write_cb, write_cb_packet.cbopaque);
 
 	ret = 0;
 
@@ -4702,12 +4618,12 @@ struct batch_alloc_packet_s {
 	void **ptrs;
 	size_t num;
 	size_t size;
-	int flags;
+	int    flags;
 };
 
 static int
-experimental_batch_alloc_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+experimental_batch_alloc_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 
 	VERIFY_READ(size_t);
@@ -4728,8 +4644,8 @@ label_return:
 static int
 prof_stats_bins_i_live_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
-	unsigned binind;
+	int          ret;
+	unsigned     binind;
 	prof_stats_t stats;
 
 	if (!(config_prof && opt_prof && opt_prof_stats)) {
@@ -4754,8 +4670,8 @@ label_return:
 static int
 prof_stats_bins_i_accum_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
-	unsigned binind;
+	int          ret;
+	unsigned     binind;
 	prof_stats_t stats;
 
 	if (!(config_prof && opt_prof && opt_prof_stats)) {
@@ -4778,8 +4694,8 @@ label_return:
 }
 
 static const ctl_named_node_t *
-prof_stats_bins_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
-    size_t i) {
+prof_stats_bins_i_index(
+    tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) {
 	if (!(config_prof && opt_prof && opt_prof_stats)) {
 		return NULL;
 	}
@@ -4792,8 +4708,8 @@ prof_stats_bins_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
 static int
 prof_stats_lextents_i_live_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
-	unsigned lextent_ind;
+	int          ret;
+	unsigned     lextent_ind;
 	prof_stats_t stats;
 
 	if (!(config_prof && opt_prof && opt_prof_stats)) {
@@ -4818,8 +4734,8 @@ label_return:
 static int
 prof_stats_lextents_i_accum_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
-	unsigned lextent_ind;
+	int          ret;
+	unsigned     lextent_ind;
 	prof_stats_t stats;
 
 	if (!(config_prof && opt_prof && opt_prof_stats)) {
@@ -4842,8 +4758,8 @@ label_return:
 }
 
 static const ctl_named_node_t *
-prof_stats_lextents_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
-    size_t i) {
+prof_stats_lextents_i_index(
+    tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) {
 	if (!(config_prof && opt_prof && opt_prof_stats)) {
 		return NULL;
 	}
diff --git a/src/decay.c b/src/decay.c
index f75696dd..7bbce2a6 100644
--- a/src/decay.c
+++ b/src/decay.c
@@ -4,9 +4,8 @@
 #include "jemalloc/internal/decay.h"
 
 static const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = {
-#define STEP(step, h, x, y)			\
-		h,
-		SMOOTHSTEP
+#define STEP(step, h, x, y) h,
+    SMOOTHSTEP
 #undef STEP
 };
 
@@ -21,8 +20,9 @@ decay_deadline_init(decay_t *decay) {
 	if (decay_ms_read(decay) > 0) {
 		nstime_t jitter;
 
-		nstime_init(&jitter, prng_range_u64(&decay->jitter_state,
-		    nstime_ns(&decay->interval)));
+		nstime_init(&jitter,
+		    prng_range_u64(
+		        &decay->jitter_state, nstime_ns(&decay->interval)));
 		nstime_add(&decay->deadline, &jitter);
 	}
 }
@@ -31,8 +31,8 @@ void
 decay_reinit(decay_t *decay, nstime_t *cur_time, ssize_t decay_ms) {
 	atomic_store_zd(&decay->time_ms, decay_ms, ATOMIC_RELAXED);
 	if (decay_ms > 0) {
-		nstime_init(&decay->interval, (uint64_t)decay_ms *
-		    KQU(1000000));
+		nstime_init(
+		    &decay->interval, (uint64_t)decay_ms * KQU(1000000));
 		nstime_idivide(&decay->interval, SMOOTHSTEP_NSTEPS);
 	}
 
@@ -52,7 +52,7 @@ decay_init(decay_t *decay, nstime_t *cur_time, ssize_t decay_ms) {
 		decay->ceil_npages = 0;
 	}
 	if (malloc_mutex_init(&decay->mtx, "decay", WITNESS_RANK_DECAY,
-	    malloc_mutex_rank_exclusive)) {
+	        malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 	decay->purging = false;
@@ -65,8 +65,8 @@ decay_ms_valid(ssize_t decay_ms) {
 	if (decay_ms < -1) {
 		return false;
 	}
-	if (decay_ms == -1 || (uint64_t)decay_ms <= NSTIME_SEC_MAX *
-	    KQU(1000)) {
+	if (decay_ms == -1
+	    || (uint64_t)decay_ms <= NSTIME_SEC_MAX * KQU(1000)) {
 		return true;
 	}
 	return false;
@@ -74,8 +74,8 @@ decay_ms_valid(ssize_t decay_ms) {
 
 static void
 decay_maybe_update_time(decay_t *decay, nstime_t *new_time) {
-	if (unlikely(!nstime_monotonic() && nstime_compare(&decay->epoch,
-	    new_time) > 0)) {
+	if (unlikely(!nstime_monotonic()
+	        && nstime_compare(&decay->epoch, new_time) > 0)) {
 		/*
 		 * Time went backwards.  Move the epoch back in time and
 		 * generate a new deadline, with the expectation that time
@@ -115,11 +115,11 @@ decay_backlog_npages_limit(const decay_t *decay) {
  * placed as the newest record.
  */
 static void
-decay_backlog_update(decay_t *decay, uint64_t nadvance_u64,
-    size_t current_npages) {
+decay_backlog_update(
+    decay_t *decay, uint64_t nadvance_u64, size_t current_npages) {
 	if (nadvance_u64 >= SMOOTHSTEP_NSTEPS) {
-		memset(decay->backlog, 0, (SMOOTHSTEP_NSTEPS-1) *
-		    sizeof(size_t));
+		memset(decay->backlog, 0,
+		    (SMOOTHSTEP_NSTEPS - 1) * sizeof(size_t));
 	} else {
 		size_t nadvance_z = (size_t)nadvance_u64;
 
@@ -128,14 +128,15 @@ decay_backlog_update(decay_t *decay, uint64_t nadvance_u64,
 		memmove(decay->backlog, &decay->backlog[nadvance_z],
 		    (SMOOTHSTEP_NSTEPS - nadvance_z) * sizeof(size_t));
 		if (nadvance_z > 1) {
-			memset(&decay->backlog[SMOOTHSTEP_NSTEPS -
-			    nadvance_z], 0, (nadvance_z-1) * sizeof(size_t));
+			memset(&decay->backlog[SMOOTHSTEP_NSTEPS - nadvance_z],
+			    0, (nadvance_z - 1) * sizeof(size_t));
 		}
 	}
 
-	size_t npages_delta = (current_npages > decay->nunpurged) ?
-	    current_npages - decay->nunpurged : 0;
-	decay->backlog[SMOOTHSTEP_NSTEPS-1] = npages_delta;
+	size_t npages_delta = (current_npages > decay->nunpurged)
+	    ? current_npages - decay->nunpurged
+	    : 0;
+	decay->backlog[SMOOTHSTEP_NSTEPS - 1] = npages_delta;
 
 	if (config_debug) {
 		if (current_npages > decay->ceil_npages) {
@@ -165,18 +166,17 @@ decay_npages_purge_in(decay_t *decay, nstime_t *time, size_t npages_new) {
 		npages_purge = npages_new;
 	} else {
 		uint64_t h_steps_max = h_steps[SMOOTHSTEP_NSTEPS - 1];
-		assert(h_steps_max >=
-		    h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
-		npages_purge = npages_new * (h_steps_max -
-		    h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
+		assert(h_steps_max >= h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
+		npages_purge = npages_new
+		    * (h_steps_max - h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
 		npages_purge >>= SMOOTHSTEP_BFP;
 	}
 	return npages_purge;
 }
 
 bool
-decay_maybe_advance_epoch(decay_t *decay, nstime_t *new_time,
-    size_t npages_current) {
+decay_maybe_advance_epoch(
+    decay_t *decay, nstime_t *new_time, size_t npages_current) {
 	/* Handle possible non-monotonicity of time. */
 	decay_maybe_update_time(decay, new_time);
 
@@ -202,8 +202,9 @@ decay_maybe_advance_epoch(decay_t *decay, nstime_t *new_time,
 	decay_backlog_update(decay, nadvance_u64, npages_current);
 
 	decay->npages_limit = decay_backlog_npages_limit(decay);
-	decay->nunpurged = (decay->npages_limit > npages_current) ?
-	    decay->npages_limit : npages_current;
+	decay->nunpurged = (decay->npages_limit > npages_current)
+	    ? decay->npages_limit
+	    : npages_current;
 
 	return true;
 }
@@ -226,21 +227,21 @@ decay_maybe_advance_epoch(decay_t *decay, nstime_t *new_time,
  */
 static inline size_t
 decay_npurge_after_interval(decay_t *decay, size_t interval) {
-	size_t i;
+	size_t   i;
 	uint64_t sum = 0;
 	for (i = 0; i < interval; i++) {
 		sum += decay->backlog[i] * h_steps[i];
 	}
 	for (; i < SMOOTHSTEP_NSTEPS; i++) {
-		sum += decay->backlog[i] *
-		    (h_steps[i] - h_steps[i - interval]);
+		sum += decay->backlog[i] * (h_steps[i] - h_steps[i - interval]);
 	}
 
 	return (size_t)(sum >> SMOOTHSTEP_BFP);
 }
 
-uint64_t decay_ns_until_purge(decay_t *decay, size_t npages_current,
-    uint64_t npages_threshold) {
+uint64_t
+decay_ns_until_purge(
+    decay_t *decay, size_t npages_current, uint64_t npages_threshold) {
 	if (!decay_gradually(decay)) {
 		return DECAY_UNBOUNDED_TIME_TO_PURGE;
 	}
@@ -278,7 +279,7 @@ uint64_t decay_ns_until_purge(decay_t *decay, size_t npages_current,
 	}
 
 	unsigned n_search = 0;
-	size_t target, npurge;
+	size_t   target, npurge;
 	while ((npurge_lb + npages_threshold < npurge_ub) && (lb + 2 < ub)) {
 		target = (lb + ub) / 2;
 		npurge = decay_npurge_after_interval(decay, target);
diff --git a/src/ecache.c b/src/ecache.c
index a242227d..20fcee9e 100644
--- a/src/ecache.c
+++ b/src/ecache.c
@@ -7,7 +7,7 @@ bool
 ecache_init(tsdn_t *tsdn, ecache_t *ecache, extent_state_t state, unsigned ind,
     bool delay_coalesce) {
 	if (malloc_mutex_init(&ecache->mtx, "extents", WITNESS_RANK_EXTENTS,
-	    malloc_mutex_rank_exclusive)) {
+	        malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 	ecache->state = state;
diff --git a/src/edata.c b/src/edata.c
index 82b6f565..d71d1679 100644
--- a/src/edata.c
+++ b/src/edata.c
@@ -1,6 +1,5 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
-ph_gen(, edata_avail, edata_t, avail_link,
-    edata_esnead_comp)
-ph_gen(, edata_heap, edata_t, heap_link, edata_snad_comp)
+ph_gen(, edata_avail, edata_t, avail_link, edata_esnead_comp)
+    ph_gen(, edata_heap, edata_t, heap_link, edata_snad_comp)
diff --git a/src/edata_cache.c b/src/edata_cache.c
index 6bc1848c..3ac8273a 100644
--- a/src/edata_cache.c
+++ b/src/edata_cache.c
@@ -11,7 +11,7 @@ edata_cache_init(edata_cache_t *edata_cache, base_t *base) {
 	 */
 	atomic_store_zu(&edata_cache->count, 0, ATOMIC_RELAXED);
 	if (malloc_mutex_init(&edata_cache->mtx, "edata_cache",
-	    WITNESS_RANK_EDATA_CACHE, malloc_mutex_rank_exclusive)) {
+	        WITNESS_RANK_EDATA_CACHE, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 	edata_cache->base = base;
@@ -63,8 +63,7 @@ edata_cache_fast_init(edata_cache_fast_t *ecs, edata_cache_t *fallback) {
 }
 
 static void
-edata_cache_fast_try_fill_from_fallback(tsdn_t *tsdn,
-    edata_cache_fast_t *ecs) {
+edata_cache_fast_try_fill_from_fallback(tsdn_t *tsdn, edata_cache_fast_t *ecs) {
 	edata_t *edata;
 	malloc_mutex_lock(tsdn, &ecs->fallback->mtx);
 	for (int i = 0; i < EDATA_CACHE_FAST_FILL; i++) {
@@ -80,8 +79,8 @@ edata_cache_fast_try_fill_from_fallback(tsdn_t *tsdn,
 
 edata_t *
 edata_cache_fast_get(tsdn_t *tsdn, edata_cache_fast_t *ecs) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_EDATA_CACHE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_EDATA_CACHE, 0);
 
 	if (ecs->disabled) {
 		assert(edata_list_inactive_first(&ecs->list) == NULL);
@@ -118,7 +117,7 @@ edata_cache_fast_flush_all(tsdn_t *tsdn, edata_cache_fast_t *ecs) {
 	 * flush and disable pathways.
 	 */
 	edata_t *edata;
-	size_t nflushed = 0;
+	size_t   nflushed = 0;
 	malloc_mutex_lock(tsdn, &ecs->fallback->mtx);
 	while ((edata = edata_list_inactive_first(&ecs->list)) != NULL) {
 		edata_list_inactive_remove(&ecs->list, edata);
@@ -131,8 +130,8 @@ edata_cache_fast_flush_all(tsdn_t *tsdn, edata_cache_fast_t *ecs) {
 
 void
 edata_cache_fast_put(tsdn_t *tsdn, edata_cache_fast_t *ecs, edata_t *edata) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_EDATA_CACHE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_EDATA_CACHE, 0);
 
 	if (ecs->disabled) {
 		assert(edata_list_inactive_first(&ecs->list) == NULL);
diff --git a/src/ehooks.c b/src/ehooks.c
index 89e30409..d7abb960 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -27,9 +27,10 @@ extent_alloc_core(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 	assert(alignment != 0);
 
 	/* "primary" dss. */
-	if (have_dss && dss_prec == dss_prec_primary && (ret =
-	    extent_alloc_dss(tsdn, arena, new_addr, size, alignment, zero,
-	    commit)) != NULL) {
+	if (have_dss && dss_prec == dss_prec_primary
+	    && (ret = extent_alloc_dss(
+	            tsdn, arena, new_addr, size, alignment, zero, commit))
+	        != NULL) {
 		return ret;
 	}
 	/* mmap. */
@@ -38,9 +39,10 @@ extent_alloc_core(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 		return ret;
 	}
 	/* "secondary" dss. */
-	if (have_dss && dss_prec == dss_prec_secondary && (ret =
-	    extent_alloc_dss(tsdn, arena, new_addr, size, alignment, zero,
-	    commit)) != NULL) {
+	if (have_dss && dss_prec == dss_prec_secondary
+	    && (ret = extent_alloc_dss(
+	            tsdn, arena, new_addr, size, alignment, zero, commit))
+	        != NULL) {
 		return ret;
 	}
 
@@ -54,10 +56,11 @@ ehooks_default_alloc_impl(tsdn_t *tsdn, void *new_addr, size_t size,
 	arena_t *arena = arena_get(tsdn, arena_ind, false);
 	/* NULL arena indicates arena_create. */
 	assert(arena != NULL || alignment == BASE_BLOCK_MIN_ALIGN);
-	dss_prec_t dss = (arena == NULL) ? dss_prec_disabled :
-	    (dss_prec_t)atomic_load_u(&arena->dss_prec, ATOMIC_RELAXED);
-	void *ret = extent_alloc_core(tsdn, arena, new_addr, size, alignment,
-	    zero, commit, dss);
+	dss_prec_t dss = (arena == NULL)
+	    ? dss_prec_disabled
+	    : (dss_prec_t)atomic_load_u(&arena->dss_prec, ATOMIC_RELAXED);
+	void      *ret = extent_alloc_core(
+            tsdn, arena, new_addr, size, alignment, zero, commit, dss);
 	if (have_madvise_huge && ret) {
 		pages_set_thp_state(ret, size);
 	}
@@ -100,8 +103,8 @@ ehooks_default_destroy(extent_hooks_t *extent_hooks, void *addr, size_t size,
 
 bool
 ehooks_default_commit_impl(void *addr, size_t offset, size_t length) {
-	return pages_commit((void *)((byte_t *)addr + (uintptr_t)offset),
-	    length);
+	return pages_commit(
+	    (void *)((byte_t *)addr + (uintptr_t)offset), length);
 }
 
 static bool
@@ -112,8 +115,8 @@ ehooks_default_commit(extent_hooks_t *extent_hooks, void *addr, size_t size,
 
 bool
 ehooks_default_decommit_impl(void *addr, size_t offset, size_t length) {
-	return pages_decommit((void *)((byte_t *)addr + (uintptr_t)offset),
-	    length);
+	return pages_decommit(
+	    (void *)((byte_t *)addr + (uintptr_t)offset), length);
 }
 
 static bool
@@ -125,8 +128,8 @@ ehooks_default_decommit(extent_hooks_t *extent_hooks, void *addr, size_t size,
 #ifdef PAGES_CAN_PURGE_LAZY
 bool
 ehooks_default_purge_lazy_impl(void *addr, size_t offset, size_t length) {
-	return pages_purge_lazy((void *)((byte_t *)addr + (uintptr_t)offset),
-	    length);
+	return pages_purge_lazy(
+	    (void *)((byte_t *)addr + (uintptr_t)offset), length);
 }
 
 static bool
@@ -143,8 +146,8 @@ ehooks_default_purge_lazy(extent_hooks_t *extent_hooks, void *addr, size_t size,
 #ifdef PAGES_CAN_PURGE_FORCED
 bool
 ehooks_default_purge_forced_impl(void *addr, size_t offset, size_t length) {
-	return pages_purge_forced((void *)((byte_t *)addr +
-	    (uintptr_t)offset), length);
+	return pages_purge_forced(
+	    (void *)((byte_t *)addr + (uintptr_t)offset), length);
 }
 
 static bool
@@ -201,11 +204,11 @@ ehooks_default_merge_impl(tsdn_t *tsdn, void *addr_a, void *addr_b) {
 		return true;
 	}
 	if (config_debug) {
-		edata_t *a = emap_edata_lookup(tsdn, &arena_emap_global,
-		    addr_a);
-		bool head_a = edata_is_head_get(a);
-		edata_t *b = emap_edata_lookup(tsdn, &arena_emap_global,
-		    addr_b);
+		edata_t *a = emap_edata_lookup(
+		    tsdn, &arena_emap_global, addr_a);
+		bool     head_a = edata_is_head_get(a);
+		edata_t *b = emap_edata_lookup(
+		    tsdn, &arena_emap_global, addr_b);
 		bool head_b = edata_is_head_get(b);
 		emap_assert_mapped(tsdn, &arena_emap_global, a);
 		emap_assert_mapped(tsdn, &arena_emap_global, b);
@@ -254,22 +257,17 @@ ehooks_default_unguard_impl(void *guard1, void *guard2) {
 	pages_unmark_guards(guard1, guard2);
 }
 
-const extent_hooks_t ehooks_default_extent_hooks = {
-	ehooks_default_alloc,
-	ehooks_default_dalloc,
-	ehooks_default_destroy,
-	ehooks_default_commit,
-	ehooks_default_decommit,
+const extent_hooks_t ehooks_default_extent_hooks = {ehooks_default_alloc,
+    ehooks_default_dalloc, ehooks_default_destroy, ehooks_default_commit,
+    ehooks_default_decommit,
 #ifdef PAGES_CAN_PURGE_LAZY
-	ehooks_default_purge_lazy,
+    ehooks_default_purge_lazy,
 #else
-	NULL,
+    NULL,
 #endif
 #ifdef PAGES_CAN_PURGE_FORCED
-	ehooks_default_purge_forced,
+    ehooks_default_purge_forced,
 #else
-	NULL,
+    NULL,
 #endif
-	ehooks_default_split,
-	ehooks_default_merge
-};
+    ehooks_default_split, ehooks_default_merge};
diff --git a/src/emap.c b/src/emap.c
index f7d5c25a..54bfabab 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -16,10 +16,10 @@ emap_init(emap_t *emap, base_t *base, bool zeroed) {
 }
 
 void
-emap_update_edata_state(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
-    extent_state_t state) {
-	witness_assert_positive_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE);
+emap_update_edata_state(
+    tsdn_t *tsdn, emap_t *emap, edata_t *edata, extent_state_t state) {
+	witness_assert_positive_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE);
 
 	edata_state_set(edata, state);
 
@@ -28,10 +28,11 @@ emap_update_edata_state(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
 	    rtree_ctx, (uintptr_t)edata_base_get(edata), /* dependent */ true,
 	    /* init_missing */ false);
 	assert(elm1 != NULL);
-	rtree_leaf_elm_t *elm2 = edata_size_get(edata) == PAGE ? NULL :
-	    rtree_leaf_elm_lookup(tsdn, &emap->rtree, rtree_ctx,
-	    (uintptr_t)edata_last_get(edata), /* dependent */ true,
-	    /* init_missing */ false);
+	rtree_leaf_elm_t *elm2 = edata_size_get(edata) == PAGE
+	    ? NULL
+	    : rtree_leaf_elm_lookup(tsdn, &emap->rtree, rtree_ctx,
+	          (uintptr_t)edata_last_get(edata), /* dependent */ true,
+	          /* init_missing */ false);
 
 	rtree_leaf_elm_state_update(tsdn, &emap->rtree, elm1, elm2, state);
 
@@ -42,17 +43,17 @@ static inline edata_t *
 emap_try_acquire_edata_neighbor_impl(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
     extent_pai_t pai, extent_state_t expected_state, bool forward,
     bool expanding) {
-	witness_assert_positive_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE);
+	witness_assert_positive_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE);
 	assert(!edata_guarded_get(edata));
 	assert(!expanding || forward);
 	assert(!edata_state_in_transition(expected_state));
-	assert(expected_state == extent_state_dirty ||
-	       expected_state == extent_state_muzzy ||
-	       expected_state == extent_state_retained);
+	assert(expected_state == extent_state_dirty
+	    || expected_state == extent_state_muzzy
+	    || expected_state == extent_state_retained);
 
-	void *neighbor_addr = forward ? edata_past_get(edata) :
-	    edata_before_get(edata);
+	void *neighbor_addr = forward ? edata_past_get(edata)
+	                              : edata_before_get(edata);
 	/*
 	 * This is subtle; the rtree code asserts that its input pointer is
 	 * non-NULL, and this is a useful thing to check.  But it's possible
@@ -73,10 +74,10 @@ emap_try_acquire_edata_neighbor_impl(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
 		return NULL;
 	}
 
-	rtree_contents_t neighbor_contents = rtree_leaf_elm_read(tsdn,
-	    &emap->rtree, elm, /* dependent */ false);
+	rtree_contents_t neighbor_contents = rtree_leaf_elm_read(
+	    tsdn, &emap->rtree, elm, /* dependent */ false);
 	if (!extent_can_acquire_neighbor(edata, neighbor_contents, pai,
-	    expected_state, forward, expanding)) {
+	        expected_state, forward, expanding)) {
 		return NULL;
 	}
 
@@ -109,8 +110,8 @@ emap_try_acquire_edata_neighbor_expand(tsdn_t *tsdn, emap_t *emap,
 }
 
 void
-emap_release_edata(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
-    extent_state_t new_state) {
+emap_release_edata(
+    tsdn_t *tsdn, emap_t *emap, edata_t *edata, extent_state_t new_state) {
 	assert(emap_edata_in_transition(tsdn, emap, edata));
 	assert(emap_edata_is_acquired(tsdn, emap, edata));
 
@@ -145,8 +146,8 @@ emap_rtree_write_acquired(tsdn_t *tsdn, emap_t *emap, rtree_leaf_elm_t *elm_a,
 	contents.edata = edata;
 	contents.metadata.szind = szind;
 	contents.metadata.slab = slab;
-	contents.metadata.is_head = (edata == NULL) ? false :
-	    edata_is_head_get(edata);
+	contents.metadata.is_head = (edata == NULL) ? false
+	                                            : edata_is_head_get(edata);
 	contents.metadata.state = (edata == NULL) ? 0 : edata_state_get(edata);
 	rtree_leaf_elm_write(tsdn, &emap->rtree, elm_a, contents);
 	if (elm_b != NULL) {
@@ -155,29 +156,33 @@ emap_rtree_write_acquired(tsdn_t *tsdn, emap_t *emap, rtree_leaf_elm_t *elm_a,
 }
 
 bool
-emap_register_boundary(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
-    szind_t szind, bool slab) {
+emap_register_boundary(
+    tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind, bool slab) {
 	assert(edata_state_get(edata) == extent_state_active);
 	EMAP_DECLARE_RTREE_CTX;
 
 	rtree_leaf_elm_t *elm_a, *elm_b;
-	bool err = emap_rtree_leaf_elms_lookup(tsdn, emap, rtree_ctx, edata,
-	    false, true, &elm_a, &elm_b);
+	bool              err = emap_rtree_leaf_elms_lookup(
+            tsdn, emap, rtree_ctx, edata, false, true, &elm_a, &elm_b);
 	if (err) {
 		return true;
 	}
 	assert(rtree_leaf_elm_read(tsdn, &emap->rtree, elm_a,
-	    /* dependent */ false).edata == NULL);
+	           /* dependent */ false)
+	           .edata
+	    == NULL);
 	assert(rtree_leaf_elm_read(tsdn, &emap->rtree, elm_b,
-	    /* dependent */ false).edata == NULL);
+	           /* dependent */ false)
+	           .edata
+	    == NULL);
 	emap_rtree_write_acquired(tsdn, emap, elm_a, elm_b, edata, szind, slab);
 	return false;
 }
 
 /* Invoked *after* emap_register_boundary. */
 void
-emap_register_interior(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
-    szind_t szind) {
+emap_register_interior(
+    tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind) {
 	EMAP_DECLARE_RTREE_CTX;
 
 	assert(edata_slab_get(edata));
@@ -226,10 +231,10 @@ emap_deregister_boundary(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
 	EMAP_DECLARE_RTREE_CTX;
 	rtree_leaf_elm_t *elm_a, *elm_b;
 
-	emap_rtree_leaf_elms_lookup(tsdn, emap, rtree_ctx, edata,
-	    true, false, &elm_a, &elm_b);
-	emap_rtree_write_acquired(tsdn, emap, elm_a, elm_b, NULL, SC_NSIZES,
-	    false);
+	emap_rtree_leaf_elms_lookup(
+	    tsdn, emap, rtree_ctx, edata, true, false, &elm_a, &elm_b);
+	emap_rtree_write_acquired(
+	    tsdn, emap, elm_a, elm_b, NULL, SC_NSIZES, false);
 }
 
 void
@@ -245,8 +250,8 @@ emap_deregister_interior(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
 }
 
 void
-emap_remap(tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind,
-    bool slab) {
+emap_remap(
+    tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind, bool slab) {
 	EMAP_DECLARE_RTREE_CTX;
 
 	if (szind != SC_NSIZES) {
@@ -274,8 +279,8 @@ emap_remap(tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind,
 		if (slab && edata_size_get(edata) > PAGE) {
 			uintptr_t key = (uintptr_t)edata_past_get(edata)
 			    - (uintptr_t)PAGE;
-			rtree_write(tsdn, &emap->rtree, rtree_ctx, key,
-			    contents);
+			rtree_write(
+			    tsdn, &emap->rtree, rtree_ctx, key, contents);
 		}
 	}
 }
@@ -344,29 +349,29 @@ emap_merge_commit(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
 	clear_contents.metadata.state = (extent_state_t)0;
 
 	if (prepare->lead_elm_b != NULL) {
-		rtree_leaf_elm_write(tsdn, &emap->rtree,
-		    prepare->lead_elm_b, clear_contents);
+		rtree_leaf_elm_write(
+		    tsdn, &emap->rtree, prepare->lead_elm_b, clear_contents);
 	}
 
 	rtree_leaf_elm_t *merged_b;
 	if (prepare->trail_elm_b != NULL) {
-		rtree_leaf_elm_write(tsdn, &emap->rtree,
-		    prepare->trail_elm_a, clear_contents);
+		rtree_leaf_elm_write(
+		    tsdn, &emap->rtree, prepare->trail_elm_a, clear_contents);
 		merged_b = prepare->trail_elm_b;
 	} else {
 		merged_b = prepare->trail_elm_a;
 	}
 
-	emap_rtree_write_acquired(tsdn, emap, prepare->lead_elm_a, merged_b,
-	    lead, SC_NSIZES, false);
+	emap_rtree_write_acquired(
+	    tsdn, emap, prepare->lead_elm_a, merged_b, lead, SC_NSIZES, false);
 }
 
 void
 emap_do_assert_mapped(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
 	EMAP_DECLARE_RTREE_CTX;
 
-	rtree_contents_t contents = rtree_read(tsdn, &emap->rtree, rtree_ctx,
-	    (uintptr_t)edata_base_get(edata));
+	rtree_contents_t contents = rtree_read(
+	    tsdn, &emap->rtree, rtree_ctx, (uintptr_t)edata_base_get(edata));
 	assert(contents.edata == edata);
 	assert(contents.metadata.is_head == edata_is_head_get(edata));
 	assert(contents.metadata.state == edata_state_get(edata));
@@ -375,12 +380,12 @@ emap_do_assert_mapped(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
 void
 emap_do_assert_not_mapped(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
 	emap_full_alloc_ctx_t context1 = {0};
-	emap_full_alloc_ctx_try_lookup(tsdn, emap, edata_base_get(edata),
-	    &context1);
+	emap_full_alloc_ctx_try_lookup(
+	    tsdn, emap, edata_base_get(edata), &context1);
 	assert(context1.edata == NULL);
 
 	emap_full_alloc_ctx_t context2 = {0};
-	emap_full_alloc_ctx_try_lookup(tsdn, emap, edata_last_get(edata),
-	    &context2);
+	emap_full_alloc_ctx_try_lookup(
+	    tsdn, emap, edata_last_get(edata), &context2);
 	assert(context2.edata == NULL);
 }
diff --git a/src/eset.c b/src/eset.c
index b4666e2c..4a427d78 100644
--- a/src/eset.c
+++ b/src/eset.c
@@ -48,32 +48,32 @@ eset_nbytes_get(eset_t *eset, pszind_t pind) {
 
 static void
 eset_stats_add(eset_t *eset, pszind_t pind, size_t sz) {
-	size_t cur = atomic_load_zu(&eset->bin_stats[pind].nextents,
-	    ATOMIC_RELAXED);
-	atomic_store_zu(&eset->bin_stats[pind].nextents, cur + 1,
-	    ATOMIC_RELAXED);
+	size_t cur = atomic_load_zu(
+	    &eset->bin_stats[pind].nextents, ATOMIC_RELAXED);
+	atomic_store_zu(
+	    &eset->bin_stats[pind].nextents, cur + 1, ATOMIC_RELAXED);
 	cur = atomic_load_zu(&eset->bin_stats[pind].nbytes, ATOMIC_RELAXED);
-	atomic_store_zu(&eset->bin_stats[pind].nbytes, cur + sz,
-	    ATOMIC_RELAXED);
+	atomic_store_zu(
+	    &eset->bin_stats[pind].nbytes, cur + sz, ATOMIC_RELAXED);
 }
 
 static void
 eset_stats_sub(eset_t *eset, pszind_t pind, size_t sz) {
-	size_t cur = atomic_load_zu(&eset->bin_stats[pind].nextents,
-	    ATOMIC_RELAXED);
-	atomic_store_zu(&eset->bin_stats[pind].nextents, cur - 1,
-	    ATOMIC_RELAXED);
+	size_t cur = atomic_load_zu(
+	    &eset->bin_stats[pind].nextents, ATOMIC_RELAXED);
+	atomic_store_zu(
+	    &eset->bin_stats[pind].nextents, cur - 1, ATOMIC_RELAXED);
 	cur = atomic_load_zu(&eset->bin_stats[pind].nbytes, ATOMIC_RELAXED);
-	atomic_store_zu(&eset->bin_stats[pind].nbytes, cur - sz,
-	    ATOMIC_RELAXED);
+	atomic_store_zu(
+	    &eset->bin_stats[pind].nbytes, cur - sz, ATOMIC_RELAXED);
 }
 
 void
 eset_insert(eset_t *eset, edata_t *edata) {
 	assert(edata_state_get(edata) == eset->state);
 
-	size_t size = edata_size_get(edata);
-	size_t psz = sz_psz_quantize_floor(size);
+	size_t   size = edata_size_get(edata);
+	size_t   psz = sz_psz_quantize_floor(size);
 	pszind_t pind = sz_psz2ind(psz);
 
 	edata_cmp_summary_t edata_cmp_summary = edata_cmp_summary_get(edata);
@@ -86,8 +86,9 @@ eset_insert(eset_t *eset, edata_t *edata) {
 		 * There's already a min element; update the summary if we're
 		 * about to insert a lower one.
 		 */
-		if (edata_cmp_summary_comp(edata_cmp_summary,
-		    eset->bins[pind].heap_min) < 0) {
+		if (edata_cmp_summary_comp(
+		        edata_cmp_summary, eset->bins[pind].heap_min)
+		    < 0) {
 			eset->bins[pind].heap_min = edata_cmp_summary;
 		}
 	}
@@ -104,19 +105,18 @@ eset_insert(eset_t *eset, edata_t *edata) {
 	 * don't need an atomic fetch-add; we can get by with a load followed by
 	 * a store.
 	 */
-	size_t cur_eset_npages =
-	    atomic_load_zu(&eset->npages, ATOMIC_RELAXED);
-	atomic_store_zu(&eset->npages, cur_eset_npages + npages,
-	    ATOMIC_RELAXED);
+	size_t cur_eset_npages = atomic_load_zu(&eset->npages, ATOMIC_RELAXED);
+	atomic_store_zu(
+	    &eset->npages, cur_eset_npages + npages, ATOMIC_RELAXED);
 }
 
 void
 eset_remove(eset_t *eset, edata_t *edata) {
-	assert(edata_state_get(edata) == eset->state ||
-	    edata_state_in_transition(edata_state_get(edata)));
+	assert(edata_state_get(edata) == eset->state
+	    || edata_state_in_transition(edata_state_get(edata)));
 
-	size_t size = edata_size_get(edata);
-	size_t psz = sz_psz_quantize_floor(size);
+	size_t   size = edata_size_get(edata);
+	size_t   psz = sz_psz_quantize_floor(size);
 	pszind_t pind = sz_psz2ind(psz);
 	if (config_stats) {
 		eset_stats_sub(eset, pind, size);
@@ -136,8 +136,9 @@ eset_remove(eset_t *eset, edata_t *edata) {
 		 * summaries of the removed element and the min element should
 		 * compare equal.
 		 */
-		if (edata_cmp_summary_comp(edata_cmp_summary,
-		    eset->bins[pind].heap_min) == 0) {
+		if (edata_cmp_summary_comp(
+		        edata_cmp_summary, eset->bins[pind].heap_min)
+		    == 0) {
 			eset->bins[pind].heap_min = edata_cmp_summary_get(
 			    edata_heap_first(&eset->bins[pind].heap));
 		}
@@ -148,35 +149,35 @@ eset_remove(eset_t *eset, edata_t *edata) {
 	 * As in eset_insert, we hold eset->mtx and so don't need atomic
 	 * operations for updating eset->npages.
 	 */
-	size_t cur_extents_npages =
-	    atomic_load_zu(&eset->npages, ATOMIC_RELAXED);
+	size_t cur_extents_npages = atomic_load_zu(
+	    &eset->npages, ATOMIC_RELAXED);
 	assert(cur_extents_npages >= npages);
-	atomic_store_zu(&eset->npages,
-	    cur_extents_npages - (size >> LG_PAGE), ATOMIC_RELAXED);
+	atomic_store_zu(&eset->npages, cur_extents_npages - (size >> LG_PAGE),
+	    ATOMIC_RELAXED);
 }
 
 static edata_t *
-eset_enumerate_alignment_search(eset_t *eset, size_t size, pszind_t bin_ind,
-    size_t alignment) {
+eset_enumerate_alignment_search(
+    eset_t *eset, size_t size, pszind_t bin_ind, size_t alignment) {
 	if (edata_heap_empty(&eset->bins[bin_ind].heap)) {
 		return NULL;
 	}
 
-	edata_t *edata = NULL;
+	edata_t                      *edata = NULL;
 	edata_heap_enumerate_helper_t helper;
 	edata_heap_enumerate_prepare(&eset->bins[bin_ind].heap, &helper,
-	    ESET_ENUMERATE_MAX_NUM, sizeof(helper.bfs_queue)/sizeof(void *));
-	while ((edata =
-	    edata_heap_enumerate_next(&eset->bins[bin_ind].heap, &helper)) !=
-	    NULL) {
+	    ESET_ENUMERATE_MAX_NUM, sizeof(helper.bfs_queue) / sizeof(void *));
+	while ((edata = edata_heap_enumerate_next(
+	            &eset->bins[bin_ind].heap, &helper))
+	    != NULL) {
 		uintptr_t base = (uintptr_t)edata_base_get(edata);
-		size_t candidate_size = edata_size_get(edata);
+		size_t    candidate_size = edata_size_get(edata);
 		if (candidate_size < size) {
 			continue;
 		}
 
-		uintptr_t next_align = ALIGNMENT_CEILING((uintptr_t)base,
-		    PAGE_CEILING(alignment));
+		uintptr_t next_align = ALIGNMENT_CEILING(
+		    (uintptr_t)base, PAGE_CEILING(alignment));
 		if (base > next_align || base + candidate_size <= next_align) {
 			/* Overflow or not crossing the next alignment. */
 			continue;
@@ -198,19 +199,20 @@ eset_enumerate_search(eset_t *eset, size_t size, pszind_t bin_ind,
 		return NULL;
 	}
 
-	edata_t *ret = NULL, *edata = NULL;
+	edata_t                      *ret = NULL, *edata = NULL;
 	edata_heap_enumerate_helper_t helper;
 	edata_heap_enumerate_prepare(&eset->bins[bin_ind].heap, &helper,
-	    ESET_ENUMERATE_MAX_NUM, sizeof(helper.bfs_queue)/sizeof(void *));
-	while ((edata =
-	    edata_heap_enumerate_next(&eset->bins[bin_ind].heap, &helper)) !=
-	    NULL) {
-		if ((!exact_only && edata_size_get(edata) >= size) ||
-		    (exact_only && edata_size_get(edata) == size)) {
-			edata_cmp_summary_t temp_summ =
-			    edata_cmp_summary_get(edata);
-			if (ret == NULL || edata_cmp_summary_comp(temp_summ,
-			    *ret_summ) < 0) {
+	    ESET_ENUMERATE_MAX_NUM, sizeof(helper.bfs_queue) / sizeof(void *));
+	while ((edata = edata_heap_enumerate_next(
+	            &eset->bins[bin_ind].heap, &helper))
+	    != NULL) {
+		if ((!exact_only && edata_size_get(edata) >= size)
+		    || (exact_only && edata_size_get(edata) == size)) {
+			edata_cmp_summary_t temp_summ = edata_cmp_summary_get(
+			    edata);
+			if (ret == NULL
+			    || edata_cmp_summary_comp(temp_summ, *ret_summ)
+			        < 0) {
 				ret = edata;
 				*ret_summ = temp_summ;
 			}
@@ -225,8 +227,8 @@ eset_enumerate_search(eset_t *eset, size_t size, pszind_t bin_ind,
  * requirement.  For each size, try only the first extent in the heap.
  */
 static edata_t *
-eset_fit_alignment(eset_t *eset, size_t min_size, size_t max_size,
-    size_t alignment) {
+eset_fit_alignment(
+    eset_t *eset, size_t min_size, size_t max_size, size_t alignment) {
 	pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(min_size));
 	pszind_t pind_max = sz_psz2ind(sz_psz_quantize_ceil(max_size));
 
@@ -234,26 +236,26 @@ eset_fit_alignment(eset_t *eset, size_t min_size, size_t max_size,
 	pszind_t pind_prev = sz_psz2ind(sz_psz_quantize_floor(min_size));
 	if (sz_large_size_classes_disabled() && pind != pind_prev) {
 		edata_t *ret = NULL;
-		ret = eset_enumerate_alignment_search(eset, min_size, pind_prev,
-		    alignment);
+		ret = eset_enumerate_alignment_search(
+		    eset, min_size, pind_prev, alignment);
 		if (ret != NULL) {
 			return ret;
 		}
 	}
 
 	for (pszind_t i =
-	    (pszind_t)fb_ffs(eset->bitmap, ESET_NPSIZES, (size_t)pind);
-	    i < pind_max;
-	    i = (pszind_t)fb_ffs(eset->bitmap, ESET_NPSIZES, (size_t)i + 1)) {
+	         (pszind_t)fb_ffs(eset->bitmap, ESET_NPSIZES, (size_t)pind);
+	     i < pind_max;
+	     i = (pszind_t)fb_ffs(eset->bitmap, ESET_NPSIZES, (size_t)i + 1)) {
 		assert(i < SC_NPSIZES);
 		assert(!edata_heap_empty(&eset->bins[i].heap));
-		edata_t *edata = edata_heap_first(&eset->bins[i].heap);
+		edata_t  *edata = edata_heap_first(&eset->bins[i].heap);
 		uintptr_t base = (uintptr_t)edata_base_get(edata);
-		size_t candidate_size = edata_size_get(edata);
+		size_t    candidate_size = edata_size_get(edata);
 		assert(candidate_size >= min_size);
 
-		uintptr_t next_align = ALIGNMENT_CEILING((uintptr_t)base,
-		    PAGE_CEILING(alignment));
+		uintptr_t next_align = ALIGNMENT_CEILING(
+		    (uintptr_t)base, PAGE_CEILING(alignment));
 		if (base > next_align || base + candidate_size <= next_align) {
 			/* Overflow or not crossing the next alignment. */
 			continue;
@@ -279,22 +281,23 @@ eset_fit_alignment(eset_t *eset, size_t min_size, size_t max_size,
  * for others.
  */
 static edata_t *
-eset_first_fit(eset_t *eset, size_t size, bool exact_only,
-    unsigned lg_max_fit) {
-	edata_t *ret = NULL;
+eset_first_fit(
+    eset_t *eset, size_t size, bool exact_only, unsigned lg_max_fit) {
+	edata_t                     *ret = NULL;
 	edata_cmp_summary_t ret_summ JEMALLOC_CC_SILENCE_INIT({0});
 
 	pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(size));
 
 	if (exact_only) {
 		if (sz_large_size_classes_disabled()) {
-			pszind_t pind_prev =
-			    sz_psz2ind(sz_psz_quantize_floor(size));
+			pszind_t pind_prev = sz_psz2ind(
+			    sz_psz_quantize_floor(size));
 			return eset_enumerate_search(eset, size, pind_prev,
 			    /* exact_only */ true, &ret_summ);
 		} else {
-			return edata_heap_empty(&eset->bins[pind].heap) ? NULL:
-			    edata_heap_first(&eset->bins[pind].heap);
+			return edata_heap_empty(&eset->bins[pind].heap)
+			    ? NULL
+			    : edata_heap_first(&eset->bins[pind].heap);
 		}
 	}
 
@@ -321,15 +324,15 @@ eset_first_fit(eset_t *eset, size_t size, bool exact_only,
 	 * usize and thus should be enumerated.
 	 */
 	pszind_t pind_prev = sz_psz2ind(sz_psz_quantize_floor(size));
-	if (sz_large_size_classes_disabled() && pind != pind_prev){
+	if (sz_large_size_classes_disabled() && pind != pind_prev) {
 		ret = eset_enumerate_search(eset, size, pind_prev,
 		    /* exact_only */ false, &ret_summ);
 	}
 
 	for (pszind_t i =
-	    (pszind_t)fb_ffs(eset->bitmap, ESET_NPSIZES, (size_t)pind);
-	    i < ESET_NPSIZES;
-	    i = (pszind_t)fb_ffs(eset->bitmap, ESET_NPSIZES, (size_t)i + 1)) {
+	         (pszind_t)fb_ffs(eset->bitmap, ESET_NPSIZES, (size_t)pind);
+	     i < ESET_NPSIZES;
+	     i = (pszind_t)fb_ffs(eset->bitmap, ESET_NPSIZES, (size_t)i + 1)) {
 		assert(!edata_heap_empty(&eset->bins[i].heap));
 		if (lg_max_fit == SC_PTR_BITS) {
 			/*
@@ -342,8 +345,9 @@ eset_first_fit(eset_t *eset, size_t size, bool exact_only,
 		if ((sz_pind2sz(i) >> lg_max_fit) > size) {
 			break;
 		}
-		if (ret == NULL || edata_cmp_summary_comp(
-		    eset->bins[i].heap_min, ret_summ) < 0) {
+		if (ret == NULL
+		    || edata_cmp_summary_comp(eset->bins[i].heap_min, ret_summ)
+		        < 0) {
 			/*
 			 * We grab the edata as early as possible, even though
 			 * we might change it later.  Practically, a large
@@ -354,9 +358,10 @@ eset_first_fit(eset_t *eset, size_t size, bool exact_only,
 			edata_t *edata = edata_heap_first(&eset->bins[i].heap);
 			assert(edata_size_get(edata) >= size);
 			assert(ret == NULL || edata_snad_comp(edata, ret) < 0);
-			assert(ret == NULL || edata_cmp_summary_comp(
-			    eset->bins[i].heap_min,
-			    edata_cmp_summary_get(edata)) == 0);
+			assert(ret == NULL
+			    || edata_cmp_summary_comp(eset->bins[i].heap_min,
+			           edata_cmp_summary_get(edata))
+			        == 0);
 			ret = edata;
 			ret_summ = eset->bins[i].heap_min;
 		}
diff --git a/src/extent.c b/src/extent.c
index 03a3fdd8..0a23bbd9 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -19,7 +19,7 @@ size_t opt_process_madvise_max_batch =
 #else
     0
 #endif
-    ;
+;
 
 static bool extent_commit_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length, bool growing_retained);
@@ -29,8 +29,8 @@ static bool extent_purge_forced_impl(tsdn_t *tsdn, ehooks_t *ehooks,
     edata_t *edata, size_t offset, size_t length, bool growing_retained);
 static edata_t *extent_split_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *edata, size_t size_a, size_t size_b, bool holding_core_locks);
-static bool extent_merge_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    edata_t *a, edata_t *b, bool holding_core_locks);
+static bool     extent_merge_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
+        edata_t *a, edata_t *b, bool holding_core_locks);
 
 /* Used exclusively for gdump triggering. */
 static atomic_zu_t curpages;
@@ -42,7 +42,7 @@ static atomic_zu_t highpages;
  * definition.
  */
 
-static void extent_deregister(tsdn_t *tsdn, pac_t *pac, edata_t *edata);
+static void     extent_deregister(tsdn_t *tsdn, pac_t *pac, edata_t *edata);
 static edata_t *extent_recycle(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *expand_edata, size_t usize, size_t alignment,
     bool zero, bool *commit, bool growing_retained, bool guarded);
@@ -51,8 +51,8 @@ static edata_t *extent_try_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 static edata_t *extent_alloc_retained(tsdn_t *tsdn, pac_t *pac,
     ehooks_t *ehooks, edata_t *expand_edata, size_t size, size_t alignment,
     bool zero, bool *commit, bool guarded);
-static bool extent_decommit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks,
-    edata_t *edata, size_t offset, size_t length);
+static bool     extent_decommit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks,
+        edata_t *edata, size_t offset, size_t length);
 
 /******************************************************************************/
 
@@ -73,8 +73,8 @@ extent_try_delayed_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	emap_update_edata_state(tsdn, pac->emap, edata, extent_state_active);
 
 	bool coalesced;
-	edata = extent_try_coalesce(tsdn, pac, ehooks, ecache,
-	    edata, &coalesced);
+	edata = extent_try_coalesce(
+	    tsdn, pac, ehooks, ecache, edata, &coalesced);
 	emap_update_edata_state(tsdn, pac->emap, edata, ecache->state);
 
 	if (!coalesced) {
@@ -90,10 +90,10 @@ ecache_alloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
     bool guarded) {
 	assert(size != 0);
 	assert(alignment != 0);
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 
-	bool commit = true;
+	bool     commit = true;
 	edata_t *edata = extent_recycle(tsdn, pac, ehooks, ecache, expand_edata,
 	    size, alignment, zero, &commit, false, guarded);
 	assert(edata == NULL || edata_pai_get(edata) == EXTENT_PAI_PAC);
@@ -107,10 +107,10 @@ ecache_alloc_grow(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
     bool guarded) {
 	assert(size != 0);
 	assert(alignment != 0);
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 
-	bool commit = true;
+	bool     commit = true;
 	edata_t *edata = extent_alloc_retained(tsdn, pac, ehooks, expand_edata,
 	    size, alignment, zero, &commit, guarded);
 	if (edata == NULL) {
@@ -131,10 +131,11 @@ ecache_alloc_grow(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 			 */
 			return NULL;
 		}
-		void *new_addr = (expand_edata == NULL) ? NULL :
-		    edata_past_get(expand_edata);
-		edata = extent_alloc_wrapper(tsdn, pac, ehooks, new_addr,
-		    size, alignment, zero, &commit,
+		void *new_addr = (expand_edata == NULL)
+		    ? NULL
+		    : edata_past_get(expand_edata);
+		edata = extent_alloc_wrapper(tsdn, pac, ehooks, new_addr, size,
+		    alignment, zero, &commit,
 		    /* growing_retained */ false);
 	}
 
@@ -148,8 +149,8 @@ ecache_dalloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 	assert(edata_base_get(edata) != NULL);
 	assert(edata_size_get(edata) != 0);
 	assert(edata_pai_get(edata) == EXTENT_PAI_PAC);
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 
 	edata_addr_set(edata, edata_base_get(edata));
 	edata_zeroed_set(edata, false);
@@ -158,8 +159,8 @@ ecache_dalloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 }
 
 edata_t *
-ecache_evict(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    ecache_t *ecache, size_t npages_min) {
+ecache_evict(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
+    size_t npages_min) {
 	malloc_mutex_lock(tsdn, &ecache->mtx);
 
 	/*
@@ -194,8 +195,8 @@ ecache_evict(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 			break;
 		}
 		/* Try to coalesce. */
-		if (extent_try_delayed_coalesce(tsdn, pac, ehooks, ecache,
-		    edata)) {
+		if (extent_try_delayed_coalesce(
+		        tsdn, pac, ehooks, ecache, edata)) {
 			break;
 		}
 		/*
@@ -211,8 +212,8 @@ ecache_evict(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	switch (ecache->state) {
 	case extent_state_dirty:
 	case extent_state_muzzy:
-		emap_update_edata_state(tsdn, pac->emap, edata,
-		    extent_state_active);
+		emap_update_edata_state(
+		    tsdn, pac->emap, edata, extent_state_active);
 		break;
 	case extent_state_retained:
 		extent_deregister(tsdn, pac, edata);
@@ -238,16 +239,16 @@ extents_abandon_vm(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
     edata_t *edata, bool growing_retained) {
 	size_t sz = edata_size_get(edata);
 	if (config_stats) {
-		atomic_fetch_add_zu(&pac->stats->abandoned_vm, sz,
-		    ATOMIC_RELAXED);
+		atomic_fetch_add_zu(
+		    &pac->stats->abandoned_vm, sz, ATOMIC_RELAXED);
 	}
 	/*
 	 * Leak extent after making sure its pages have already been purged, so
 	 * that this is only a virtual memory leak.
 	 */
 	if (ecache->state == extent_state_dirty) {
-		if (extent_purge_lazy_impl(tsdn, ehooks, edata, 0, sz,
-		    growing_retained)) {
+		if (extent_purge_lazy_impl(
+		        tsdn, ehooks, edata, 0, sz, growing_retained)) {
 			extent_purge_forced_impl(tsdn, ehooks, edata, 0,
 			    edata_size_get(edata), growing_retained);
 		}
@@ -256,20 +257,20 @@ extents_abandon_vm(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 }
 
 static void
-extent_deactivate_locked_impl(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache,
-    edata_t *edata) {
+extent_deactivate_locked_impl(
+    tsdn_t *tsdn, pac_t *pac, ecache_t *ecache, edata_t *edata) {
 	malloc_mutex_assert_owner(tsdn, &ecache->mtx);
 	assert(edata_arena_ind_get(edata) == ecache_ind_get(ecache));
 
 	emap_update_edata_state(tsdn, pac->emap, edata, ecache->state);
-	eset_t *eset = edata_guarded_get(edata) ? &ecache->guarded_eset :
-	    &ecache->eset;
+	eset_t *eset = edata_guarded_get(edata) ? &ecache->guarded_eset
+	                                        : &ecache->eset;
 	eset_insert(eset, edata);
 }
 
 static void
-extent_deactivate_locked(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache,
-    edata_t *edata) {
+extent_deactivate_locked(
+    tsdn_t *tsdn, pac_t *pac, ecache_t *ecache, edata_t *edata) {
 	assert(edata_state_get(edata) == extent_state_active);
 	extent_deactivate_locked_impl(tsdn, pac, ecache, edata);
 }
@@ -282,11 +283,11 @@ extent_deactivate_check_state_locked(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache,
 }
 
 static void
-extent_activate_locked(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache, eset_t *eset,
-    edata_t *edata) {
+extent_activate_locked(
+    tsdn_t *tsdn, pac_t *pac, ecache_t *ecache, eset_t *eset, edata_t *edata) {
 	assert(edata_arena_ind_get(edata) == ecache_ind_get(ecache));
-	assert(edata_state_get(edata) == ecache->state ||
-	    edata_state_get(edata) == extent_state_merging);
+	assert(edata_state_get(edata) == ecache->state
+	    || edata_state_get(edata) == extent_state_merging);
 
 	eset_remove(eset, edata);
 	emap_update_edata_state(tsdn, pac->emap, edata, extent_state_active);
@@ -296,16 +297,18 @@ void
 extent_gdump_add(tsdn_t *tsdn, const edata_t *edata) {
 	cassert(config_prof);
 	/* prof_gdump() requirement. */
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 
 	if (opt_prof && edata_state_get(edata) == extent_state_active) {
 		size_t nadd = edata_size_get(edata) >> LG_PAGE;
-		size_t cur = atomic_fetch_add_zu(&curpages, nadd,
-		    ATOMIC_RELAXED) + nadd;
+		size_t cur = atomic_fetch_add_zu(
+		                 &curpages, nadd, ATOMIC_RELAXED)
+		    + nadd;
 		size_t high = atomic_load_zu(&highpages, ATOMIC_RELAXED);
-		while (cur > high && !atomic_compare_exchange_weak_zu(
-		    &highpages, &high, cur, ATOMIC_RELAXED, ATOMIC_RELAXED)) {
+		while (cur > high
+		    && !atomic_compare_exchange_weak_zu(&highpages, &high, cur,
+		        ATOMIC_RELAXED, ATOMIC_RELAXED)) {
 			/*
 			 * Don't refresh cur, because it may have decreased
 			 * since this thread lost the highpages update race.
@@ -337,7 +340,7 @@ extent_register_impl(tsdn_t *tsdn, pac_t *pac, edata_t *edata, bool gdump_add) {
 	 * prevents other threads from accessing the edata.
 	 */
 	if (emap_register_boundary(tsdn, pac->emap, edata, SC_NSIZES,
-	    /* slab */ false)) {
+	        /* slab */ false)) {
 		return true;
 	}
 
@@ -368,8 +371,7 @@ extent_reregister(tsdn_t *tsdn, pac_t *pac, edata_t *edata) {
  * Removes all pointers to the given extent from the global rtree.
  */
 static void
-extent_deregister_impl(tsdn_t *tsdn, pac_t *pac, edata_t *edata,
-    bool gdump) {
+extent_deregister_impl(tsdn_t *tsdn, pac_t *pac, edata_t *edata, bool gdump) {
 	emap_deregister_boundary(tsdn, pac->emap, edata);
 
 	if (config_prof && gdump) {
@@ -383,8 +385,7 @@ extent_deregister(tsdn_t *tsdn, pac_t *pac, edata_t *edata) {
 }
 
 static void
-extent_deregister_no_gdump_sub(tsdn_t *tsdn, pac_t *pac,
-    edata_t *edata) {
+extent_deregister_no_gdump_sub(tsdn_t *tsdn, pac_t *pac, edata_t *edata) {
 	extent_deregister_impl(tsdn, pac, edata, false);
 }
 
@@ -411,7 +412,7 @@ extent_recycle_extract(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	}
 
 	edata_t *edata;
-	eset_t *eset = guarded ? &ecache->guarded_eset : &ecache->eset;
+	eset_t  *eset = guarded ? &ecache->guarded_eset : &ecache->eset;
 	if (expand_edata != NULL) {
 		edata = emap_try_acquire_edata_neighbor_expand(tsdn, pac->emap,
 		    expand_edata, EXTENT_PAI_PAC, ecache->state);
@@ -419,8 +420,8 @@ extent_recycle_extract(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 			/* NOLINTNEXTLINE(readability-suspicious-call-argument) */
 			extent_assert_can_expand(expand_edata, edata);
 			if (edata_size_get(edata) < size) {
-				emap_release_edata(tsdn, pac->emap, edata,
-				    ecache->state);
+				emap_release_edata(
+				    tsdn, pac->emap, edata, ecache->state);
 				edata = NULL;
 			}
 		}
@@ -435,7 +436,8 @@ extent_recycle_extract(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		 * put a cap on how big an extent we can split for a request.
 		 */
 		unsigned lg_max_fit = ecache->delay_coalesce
-		    ? (unsigned)opt_lg_extent_max_active_fit : SC_PTR_BITS;
+		    ? (unsigned)opt_lg_extent_max_active_fit
+		    : SC_PTR_BITS;
 
 		/*
 		 * If split and merge are not allowed (Windows w/o retain), try
@@ -446,8 +448,7 @@ extent_recycle_extract(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		 * allocations.
 		 */
 		bool exact_only = (!maps_coalesce && !opt_retain) || guarded;
-		edata = eset_fit(eset, size, alignment, exact_only,
-		    lg_max_fit);
+		edata = eset_fit(eset, size, alignment, exact_only, lg_max_fit);
 	}
 	if (edata == NULL) {
 		return NULL;
@@ -489,10 +490,11 @@ extent_split_interior(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     /* The result of splitting, in case of success. */
     edata_t **edata, edata_t **lead, edata_t **trail,
     /* The mess to clean up, in case of error. */
-    edata_t **to_leak, edata_t **to_salvage,
-    edata_t *expand_edata, size_t size, size_t alignment) {
+    edata_t **to_leak, edata_t **to_salvage, edata_t *expand_edata, size_t size,
+    size_t alignment) {
 	size_t leadsize = ALIGNMENT_CEILING((uintptr_t)edata_base_get(*edata),
-	    PAGE_CEILING(alignment)) - (uintptr_t)edata_base_get(*edata);
+	                      PAGE_CEILING(alignment))
+	    - (uintptr_t)edata_base_get(*edata);
 	assert(expand_edata == NULL || leadsize == 0);
 	if (edata_size_get(*edata) < leadsize + size) {
 		return extent_split_interior_cant_alloc;
@@ -547,14 +549,14 @@ extent_recycle_split(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	assert(!edata_guarded_get(edata) || size == edata_size_get(edata));
 	malloc_mutex_assert_owner(tsdn, &ecache->mtx);
 
-	edata_t *lead;
-	edata_t *trail;
-	edata_t *to_leak JEMALLOC_CC_SILENCE_INIT(NULL);
+	edata_t            *lead;
+	edata_t            *trail;
+	edata_t *to_leak    JEMALLOC_CC_SILENCE_INIT(NULL);
 	edata_t *to_salvage JEMALLOC_CC_SILENCE_INIT(NULL);
 
-	extent_split_interior_result_t result = extent_split_interior(
-	    tsdn, pac, ehooks, &edata, &lead, &trail, &to_leak, &to_salvage,
-	    expand_edata, size, alignment);
+	extent_split_interior_result_t result = extent_split_interior(tsdn, pac,
+	    ehooks, &edata, &lead, &trail, &to_leak, &to_salvage, expand_edata,
+	    size, alignment);
 
 	if (!maps_coalesce && result != extent_split_interior_ok
 	    && !opt_retain) {
@@ -615,8 +617,8 @@ extent_recycle(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 
 	malloc_mutex_lock(tsdn, &ecache->mtx);
 
-	edata_t *edata = extent_recycle_extract(tsdn, pac, ehooks, ecache,
-	    expand_edata, size, alignment, guarded);
+	edata_t *edata = extent_recycle_extract(
+	    tsdn, pac, ehooks, ecache, expand_edata, size, alignment, guarded);
 	if (edata == NULL) {
 		malloc_mutex_unlock(tsdn, &ecache->mtx);
 		return NULL;
@@ -630,8 +632,8 @@ extent_recycle(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 	}
 
 	assert(edata_state_get(edata) == extent_state_active);
-	if (extent_commit_zero(tsdn, ehooks, edata, *commit, zero,
-	    growing_retained)) {
+	if (extent_commit_zero(
+	        tsdn, ehooks, edata, *commit, zero, growing_retained)) {
 		extent_record(tsdn, pac, ehooks, ecache, edata);
 		return NULL;
 	}
@@ -660,16 +662,16 @@ extent_handle_huge_arena_thp(tsdn_t *tsdn, pac_thp_t *pac_thp,
 	 * be within the range of [0, 2 * (HUGEPAGE - 1)].
 	 */
 	void *huge_addr = HUGEPAGE_ADDR2BASE(addr);
-	void *huge_end = HUGEPAGE_ADDR2BASE((void *)((byte_t *)addr +
-	    (uintptr_t)(size + HUGEPAGE - 1)));
+	void *huge_end = HUGEPAGE_ADDR2BASE(
+	    (void *)((byte_t *)addr + (uintptr_t)(size + HUGEPAGE - 1)));
 	assert((uintptr_t)huge_end > (uintptr_t)huge_addr);
 
 	size_t huge_size = (uintptr_t)huge_end - (uintptr_t)huge_addr;
-	assert(huge_size <= (size + ((HUGEPAGE - 1) << 1)) &&
-		    huge_size >= size);
+	assert(
+	    huge_size <= (size + ((HUGEPAGE - 1) << 1)) && huge_size >= size);
 
-	if (opt_metadata_thp == metadata_thp_always ||
-	    pac_thp->auto_thp_switched) {
+	if (opt_metadata_thp == metadata_thp_always
+	    || pac_thp->auto_thp_switched) {
 		pages_huge(huge_addr, huge_size);
 	} else {
 		assert(opt_metadata_thp == metadata_thp_auto);
@@ -687,8 +689,10 @@ extent_handle_huge_arena_thp(tsdn_t *tsdn, pac_thp_t *pac_thp,
 			if (edata != NULL) {
 				edata_addr_set(edata, huge_addr);
 				edata_size_set(edata, huge_size);
-				edata_list_active_append(&pac_thp->thp_lazy_list, edata);
-				atomic_fetch_add_u(&pac_thp->n_thp_lazy, 1, ATOMIC_RELAXED);
+				edata_list_active_append(
+				    &pac_thp->thp_lazy_list, edata);
+				atomic_fetch_add_u(
+				    &pac_thp->n_thp_lazy, 1, ATOMIC_RELAXED);
 			}
 			malloc_mutex_unlock(tsdn, &pac_thp->lock);
 		}
@@ -702,8 +706,8 @@ extent_handle_huge_arena_thp(tsdn_t *tsdn, pac_thp_t *pac_thp,
  * virtual memory ranges retained by each shard.
  */
 static edata_t *
-extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    size_t size, size_t alignment, bool zero, bool *commit) {
+extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size,
+    size_t alignment, bool zero, bool *commit) {
 	malloc_mutex_assert_owner(tsdn, &pac->grow_mtx);
 
 	size_t alloc_size_min = size + PAGE_CEILING(alignment) - PAGE;
@@ -715,10 +719,10 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	 * Find the next extent size in the series that would be large enough to
 	 * satisfy this request.
 	 */
-	size_t alloc_size;
+	size_t   alloc_size;
 	pszind_t exp_grow_skip;
-	bool err = exp_grow_size_prepare(&pac->exp_grow, alloc_size_min,
-	    &alloc_size, &exp_grow_skip);
+	bool     err = exp_grow_size_prepare(
+            &pac->exp_grow, alloc_size_min, &alloc_size, &exp_grow_skip);
 	if (err) {
 		goto label_err;
 	}
@@ -730,8 +734,8 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	bool zeroed = false;
 	bool committed = false;
 
-	void *ptr = ehooks_alloc(tsdn, ehooks, NULL, alloc_size, PAGE, &zeroed,
-	    &committed);
+	void *ptr = ehooks_alloc(
+	    tsdn, ehooks, NULL, alloc_size, PAGE, &zeroed, &committed);
 
 	if (ptr == NULL) {
 		edata_cache_put(tsdn, pac->edata_cache, edata);
@@ -752,23 +756,23 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		*commit = true;
 	}
 
-	edata_t *lead;
-	edata_t *trail;
-	edata_t *to_leak JEMALLOC_CC_SILENCE_INIT(NULL);
+	edata_t            *lead;
+	edata_t            *trail;
+	edata_t *to_leak    JEMALLOC_CC_SILENCE_INIT(NULL);
 	edata_t *to_salvage JEMALLOC_CC_SILENCE_INIT(NULL);
 
-	extent_split_interior_result_t result = extent_split_interior(tsdn,
-	    pac, ehooks, &edata, &lead, &trail, &to_leak, &to_salvage, NULL,
-	    size, alignment);
+	extent_split_interior_result_t result = extent_split_interior(tsdn, pac,
+	    ehooks, &edata, &lead, &trail, &to_leak, &to_salvage, NULL, size,
+	    alignment);
 
 	if (result == extent_split_interior_ok) {
 		if (lead != NULL) {
-			extent_record(tsdn, pac, ehooks, &pac->ecache_retained,
-			    lead);
+			extent_record(
+			    tsdn, pac, ehooks, &pac->ecache_retained, lead);
 		}
 		if (trail != NULL) {
-			extent_record(tsdn, pac, ehooks, &pac->ecache_retained,
-			    trail);
+			extent_record(
+			    tsdn, pac, ehooks, &pac->ecache_retained, trail);
 		}
 	} else {
 		/*
@@ -792,15 +796,15 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	}
 
 	if (*commit && !edata_committed_get(edata)) {
-		if (extent_commit_impl(tsdn, ehooks, edata, 0,
-		    edata_size_get(edata), true)) {
-			extent_record(tsdn, pac, ehooks,
-			    &pac->ecache_retained, edata);
+		if (extent_commit_impl(
+		        tsdn, ehooks, edata, 0, edata_size_get(edata), true)) {
+			extent_record(
+			    tsdn, pac, ehooks, &pac->ecache_retained, edata);
 			goto label_err;
 		}
 		/* A successful commit should return zeroed memory. */
 		if (config_debug) {
-			void *addr = edata_addr_get(edata);
+			void   *addr = edata_addr_get(edata);
 			size_t *p = (size_t *)addr;
 			/* Check the first page only. */
 			for (size_t i = 0; i < PAGE / sizeof(size_t); i++) {
@@ -819,8 +823,9 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 
 	if (huge_arena_pac_thp.thp_madvise) {
 		/* Avoid using HUGEPAGE when the grow size is less than HUGEPAGE. */
-		if (ind != 0 && ind == huge_arena_ind && ehooks_are_default(ehooks) &&
-		    likely(alloc_size >= HUGEPAGE)) {
+		if (ind != 0 && ind == huge_arena_ind
+		    && ehooks_are_default(ehooks)
+		    && likely(alloc_size >= HUGEPAGE)) {
 			extent_handle_huge_arena_thp(tsdn, &huge_arena_pac_thp,
 			    pac->edata_cache, ptr, alloc_size);
 		}
@@ -831,8 +836,8 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		extent_gdump_add(tsdn, edata);
 	}
 	if (zero && !edata_zeroed_get(edata)) {
-		ehooks_zero(tsdn, ehooks, edata_base_get(edata),
-		    edata_size_get(edata));
+		ehooks_zero(
+		    tsdn, ehooks, edata_base_get(edata), edata_size_get(edata));
 	}
 	return edata;
 label_err:
@@ -858,8 +863,8 @@ extent_alloc_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 			extent_gdump_add(tsdn, edata);
 		}
 	} else if (opt_retain && expand_edata == NULL && !guarded) {
-		edata = extent_grow_retained(tsdn, pac, ehooks, size,
-		    alignment, zero, commit);
+		edata = extent_grow_retained(
+		    tsdn, pac, ehooks, size, alignment, zero, commit);
 		/* extent_grow_retained() always releases pac->grow_mtx. */
 	} else {
 		malloc_mutex_unlock(tsdn, &pac->grow_mtx);
@@ -875,12 +880,12 @@ extent_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 	extent_assert_can_coalesce(inner, outer);
 	eset_remove(&ecache->eset, outer);
 
-	bool err = extent_merge_impl(tsdn, pac, ehooks,
-	    forward ? inner : outer, forward ? outer : inner,
+	bool err = extent_merge_impl(tsdn, pac, ehooks, forward ? inner : outer,
+	    forward ? outer : inner,
 	    /* holding_core_locks */ true);
 	if (err) {
-		extent_deactivate_check_state_locked(tsdn, pac, ecache, outer,
-		    extent_state_merging);
+		extent_deactivate_check_state_locked(
+		    tsdn, pac, ecache, outer, extent_state_merging);
 	}
 
 	return err;
@@ -908,10 +913,12 @@ extent_try_coalesce_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		/* Try to coalesce forward. */
 		edata_t *next = emap_try_acquire_edata_neighbor(tsdn, pac->emap,
 		    edata, EXTENT_PAI_PAC, ecache->state, /* forward */ true);
-		size_t max_next_neighbor = max_size > edata_size_get(edata) ?  max_size - edata_size_get(edata) : 0;
+		size_t   max_next_neighbor = max_size > edata_size_get(edata)
+		      ? max_size - edata_size_get(edata)
+		      : 0;
 		if (next != NULL && edata_size_get(next) <= max_next_neighbor) {
-			if (!extent_coalesce(tsdn, pac, ehooks, ecache, edata,
-			    next, true)) {
+			if (!extent_coalesce(
+			        tsdn, pac, ehooks, ecache, edata, next, true)) {
 				if (ecache->delay_coalesce) {
 					/* Do minimal coalescing. */
 					*coalesced = true;
@@ -924,10 +931,12 @@ extent_try_coalesce_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		/* Try to coalesce backward. */
 		edata_t *prev = emap_try_acquire_edata_neighbor(tsdn, pac->emap,
 		    edata, EXTENT_PAI_PAC, ecache->state, /* forward */ false);
-		size_t max_prev_neighbor = max_size > edata_size_get(edata) ?  max_size - edata_size_get(edata) : 0;
+		size_t   max_prev_neighbor = max_size > edata_size_get(edata)
+		      ? max_size - edata_size_get(edata)
+		      : 0;
 		if (prev != NULL && edata_size_get(prev) <= max_prev_neighbor) {
 			if (!extent_coalesce(tsdn, pac, ehooks, ecache, edata,
-			    prev, false)) {
+			        prev, false)) {
 				edata = prev;
 				if (ecache->delay_coalesce) {
 					/* Do minimal coalescing. */
@@ -948,36 +957,33 @@ extent_try_coalesce_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 static edata_t *
 extent_try_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, bool *coalesced) {
-	return extent_try_coalesce_impl(tsdn, pac, ehooks, ecache, edata,
-	    SC_LARGE_MAXCLASS, coalesced);
+	return extent_try_coalesce_impl(
+	    tsdn, pac, ehooks, ecache, edata, SC_LARGE_MAXCLASS, coalesced);
 }
 
 static edata_t *
 extent_try_coalesce_large(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, size_t max_size, bool *coalesced) {
-	return extent_try_coalesce_impl(tsdn, pac, ehooks, ecache, edata,
-	    max_size, coalesced);
+	return extent_try_coalesce_impl(
+	    tsdn, pac, ehooks, ecache, edata, max_size, coalesced);
 }
 
 /* Purge a single extent to retained / unmapped directly. */
 static void
-extent_maximally_purge(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    edata_t *edata) {
+extent_maximally_purge(
+    tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *edata) {
 	size_t extent_size = edata_size_get(edata);
 	extent_dalloc_wrapper(tsdn, pac, ehooks, edata);
 	if (config_stats) {
 		/* Update stats accordingly. */
 		LOCKEDINT_MTX_LOCK(tsdn, *pac->stats_mtx);
-		locked_inc_u64(tsdn,
-		    LOCKEDINT_MTX(*pac->stats_mtx),
+		locked_inc_u64(tsdn, LOCKEDINT_MTX(*pac->stats_mtx),
 		    &pac->stats->decay_dirty.nmadvise, 1);
-		locked_inc_u64(tsdn,
-		    LOCKEDINT_MTX(*pac->stats_mtx),
-		    &pac->stats->decay_dirty.purged,
-		    extent_size >> LG_PAGE);
+		locked_inc_u64(tsdn, LOCKEDINT_MTX(*pac->stats_mtx),
+		    &pac->stats->decay_dirty.purged, extent_size >> LG_PAGE);
 		LOCKEDINT_MTX_UNLOCK(tsdn, *pac->stats_mtx);
-		atomic_fetch_sub_zu(&pac->stats->pac_mapped, extent_size,
-		    ATOMIC_RELAXED);
+		atomic_fetch_sub_zu(
+		    &pac->stats->pac_mapped, extent_size, ATOMIC_RELAXED);
 	}
 }
 
@@ -988,9 +994,9 @@ extent_maximally_purge(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 void
 extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
     edata_t *edata) {
-	assert((ecache->state != extent_state_dirty &&
-	    ecache->state != extent_state_muzzy) ||
-	    !edata_zeroed_get(edata));
+	assert((ecache->state != extent_state_dirty
+	           && ecache->state != extent_state_muzzy)
+	    || !edata_zeroed_get(edata));
 
 	malloc_mutex_lock(tsdn, &ecache->mtx);
 
@@ -1001,8 +1007,8 @@ extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 	}
 	if (!ecache->delay_coalesce) {
 		bool coalesced_unused;
-		edata = extent_try_coalesce(tsdn, pac, ehooks, ecache, edata,
-		    &coalesced_unused);
+		edata = extent_try_coalesce(
+		    tsdn, pac, ehooks, ecache, edata, &coalesced_unused);
 	} else if (edata_size_get(edata) >= SC_LARGE_MINCLASS) {
 		assert(ecache == &pac->ecache_dirty);
 		/* Always coalesce large extents eagerly. */
@@ -1027,17 +1033,21 @@ extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 		* the final coalescing that happens during the transition from dirty ecache
 		* to muzzy/retained ecache states.
 		*/
-		unsigned lg_max_coalesce = (unsigned)opt_lg_extent_max_active_fit;
+		unsigned lg_max_coalesce = (unsigned)
+		    opt_lg_extent_max_active_fit;
 		size_t edata_size = edata_size_get(edata);
-		size_t max_size = (SC_LARGE_MAXCLASS >> lg_max_coalesce) > edata_size ? (edata_size << lg_max_coalesce) : SC_LARGE_MAXCLASS;
-		bool coalesced;
+		size_t max_size = (SC_LARGE_MAXCLASS >> lg_max_coalesce)
+		        > edata_size
+		    ? (edata_size << lg_max_coalesce)
+		    : SC_LARGE_MAXCLASS;
+		bool   coalesced;
 		do {
 			assert(edata_state_get(edata) == extent_state_active);
 			edata = extent_try_coalesce_large(tsdn, pac, ehooks,
 			    ecache, edata, max_size, &coalesced);
 		} while (coalesced);
-		if (edata_size_get(edata) >=
-		    atomic_load_zu(&pac->oversize_threshold, ATOMIC_RELAXED)
+		if (edata_size_get(edata) >= atomic_load_zu(
+		        &pac->oversize_threshold, ATOMIC_RELAXED)
 		    && !background_thread_enabled()
 		    && extent_may_force_decay(pac)) {
 			/* Shortcut to purge the oversize extent eagerly. */
@@ -1053,10 +1063,9 @@ label_skip_coalesce:
 }
 
 void
-extent_dalloc_gap(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    edata_t *edata) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+extent_dalloc_gap(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *edata) {
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 
 	if (extent_register(tsdn, pac, edata)) {
 		edata_cache_put(tsdn, pac->edata_cache, edata);
@@ -1066,14 +1075,14 @@ extent_dalloc_gap(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 }
 
 static bool
-extent_dalloc_wrapper_try(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    edata_t *edata) {
+extent_dalloc_wrapper_try(
+    tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *edata) {
 	bool err;
 
 	assert(edata_base_get(edata) != NULL);
 	assert(edata_size_get(edata) != 0);
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 
 	edata_addr_set(edata, edata_base_get(edata));
 
@@ -1089,8 +1098,8 @@ extent_dalloc_wrapper_try(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 }
 
 edata_t *
-extent_alloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    void *new_addr, size_t size, size_t alignment, bool zero, bool *commit,
+extent_alloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, void *new_addr,
+    size_t size, size_t alignment, bool zero, bool *commit,
     bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
@@ -1100,14 +1109,14 @@ extent_alloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		return NULL;
 	}
 	size_t palignment = ALIGNMENT_CEILING(alignment, PAGE);
-	void *addr = ehooks_alloc(tsdn, ehooks, new_addr, size, palignment,
-	    &zero, commit);
+	void  *addr = ehooks_alloc(
+            tsdn, ehooks, new_addr, size, palignment, &zero, commit);
 	if (addr == NULL) {
 		edata_cache_put(tsdn, pac->edata_cache, edata);
 		return NULL;
 	}
-	edata_init(edata, ecache_ind_get(&pac->ecache_dirty), addr,
-	    size, /* slab */ false, SC_NSIZES, extent_sn_next(pac),
+	edata_init(edata, ecache_ind_get(&pac->ecache_dirty), addr, size,
+	    /* slab */ false, SC_NSIZES, extent_sn_next(pac),
 	    extent_state_active, zero, *commit, EXTENT_PAI_PAC,
 	    opt_retain ? EXTENT_IS_HEAD : EXTENT_NOT_HEAD);
 	/*
@@ -1125,8 +1134,8 @@ extent_alloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 }
 
 static void
-extent_dalloc_wrapper_finish(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    edata_t *edata) {
+extent_dalloc_wrapper_finish(
+    tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *edata) {
 	if (config_prof) {
 		extent_gdump_sub(tsdn, edata);
 	}
@@ -1134,11 +1143,11 @@ extent_dalloc_wrapper_finish(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 }
 
 void
-extent_dalloc_wrapper_purged(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    edata_t *edata) {
+extent_dalloc_wrapper_purged(
+    tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *edata) {
 	assert(edata_pai_get(edata) == EXTENT_PAI_PAC);
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 
 	/* Verify that will not go down the dalloc / munmap route. */
 	assert(ehooks_dalloc_will_fail(ehooks));
@@ -1148,19 +1157,19 @@ extent_dalloc_wrapper_purged(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 }
 
 void
-extent_dalloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    edata_t *edata) {
+extent_dalloc_wrapper(
+    tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *edata) {
 	assert(edata_pai_get(edata) == EXTENT_PAI_PAC);
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 
 	/* Avoid calling the default extent_dalloc unless have to. */
 	if (!ehooks_dalloc_will_fail(ehooks)) {
 		/* Remove guard pages for dalloc / unmap. */
 		if (edata_guarded_get(edata)) {
 			assert(ehooks_are_default(ehooks));
-			san_unguard_pages_two_sided(tsdn, ehooks, edata,
-			    pac->emap);
+			san_unguard_pages_two_sided(
+			    tsdn, ehooks, edata, pac->emap);
 		}
 		/*
 		 * Deregister first to avoid a race with other allocating
@@ -1177,15 +1186,15 @@ extent_dalloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	bool zeroed;
 	if (!edata_committed_get(edata)) {
 		zeroed = true;
-	} else if (!extent_decommit_wrapper(tsdn, ehooks, edata, 0,
-	    edata_size_get(edata))) {
+	} else if (!extent_decommit_wrapper(
+	               tsdn, ehooks, edata, 0, edata_size_get(edata))) {
 		zeroed = true;
 	} else if (!ehooks_purge_forced(tsdn, ehooks, edata_base_get(edata),
-	    edata_size_get(edata), 0, edata_size_get(edata))) {
+	               edata_size_get(edata), 0, edata_size_get(edata))) {
 		zeroed = true;
-	} else if (edata_state_get(edata) == extent_state_muzzy ||
-	    !ehooks_purge_lazy(tsdn, ehooks, edata_base_get(edata),
-	    edata_size_get(edata), 0, edata_size_get(edata))) {
+	} else if (edata_state_get(edata) == extent_state_muzzy
+	    || !ehooks_purge_lazy(tsdn, ehooks, edata_base_get(edata),
+	        edata_size_get(edata), 0, edata_size_get(edata))) {
 		zeroed = false;
 	} else {
 		zeroed = false;
@@ -1196,15 +1205,15 @@ extent_dalloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 }
 
 void
-extent_destroy_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    edata_t *edata) {
+extent_destroy_wrapper(
+    tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *edata) {
 	assert(edata_base_get(edata) != NULL);
 	assert(edata_size_get(edata) != 0);
 	extent_state_t state = edata_state_get(edata);
 	assert(state == extent_state_retained || state == extent_state_active);
 	assert(emap_edata_is_acquired(tsdn, pac->emap, edata));
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 
 	if (edata_guarded_get(edata)) {
 		assert(opt_retain);
@@ -1240,8 +1249,8 @@ extent_commit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
 static bool
 extent_decommit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 	bool err = ehooks_decommit(tsdn, ehooks, edata_base_get(edata),
 	    edata_size_get(edata), offset, length);
 	edata_committed_set(edata, edata_committed_get(edata) && err);
@@ -1261,8 +1270,8 @@ extent_purge_lazy_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
 bool
 extent_purge_lazy_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length) {
-	return extent_purge_lazy_impl(tsdn, ehooks, edata, offset,
-	    length, false);
+	return extent_purge_lazy_impl(
+	    tsdn, ehooks, edata, offset, length, false);
 }
 
 static bool
@@ -1278,8 +1287,8 @@ extent_purge_forced_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
 bool
 extent_purge_forced_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length) {
-	return extent_purge_forced_impl(tsdn, ehooks, edata, offset, length,
-	    false);
+	return extent_purge_forced_impl(
+	    tsdn, ehooks, edata, offset, length, false);
 }
 
 /*
@@ -1290,16 +1299,16 @@ extent_purge_forced_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
  * and returns the trail (except in case of error).
  */
 static edata_t *
-extent_split_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    edata_t *edata, size_t size_a, size_t size_b, bool holding_core_locks) {
+extent_split_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *edata,
+    size_t size_a, size_t size_b, bool holding_core_locks) {
 	assert(edata_size_get(edata) == size_a + size_b);
 	/* Only the shrink path may split w/o holding core locks. */
 	if (holding_core_locks) {
 		witness_assert_positive_depth_to_rank(
 		    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE);
 	} else {
-		witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-		    WITNESS_RANK_CORE, 0);
+		witness_assert_depth_to_rank(
+		    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 	}
 
 	if (ehooks_split_will_fail(ehooks)) {
@@ -1317,8 +1326,8 @@ extent_split_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	    edata_state_get(edata), edata_zeroed_get(edata),
 	    edata_committed_get(edata), EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
 	emap_prepare_t prepare;
-	bool err = emap_split_prepare(tsdn, pac->emap, &prepare, edata,
-	    size_a, trail, size_b);
+	bool           err = emap_split_prepare(
+            tsdn, pac->emap, &prepare, edata, size_a, trail, size_b);
 	if (err) {
 		goto label_error_b;
 	}
@@ -1340,8 +1349,8 @@ extent_split_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	}
 
 	edata_size_set(edata, size_a);
-	emap_split_commit(tsdn, pac->emap, &prepare, edata, size_a, trail,
-	    size_b);
+	emap_split_commit(
+	    tsdn, pac->emap, &prepare, edata, size_a, trail, size_b);
 
 	return trail;
 label_error_b:
@@ -1353,8 +1362,8 @@ label_error_a:
 edata_t *
 extent_split_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *edata,
     size_t size_a, size_t size_b, bool holding_core_locks) {
-	return extent_split_impl(tsdn, pac, ehooks, edata, size_a, size_b,
-	    holding_core_locks);
+	return extent_split_impl(
+	    tsdn, pac, ehooks, edata, size_a, size_b, holding_core_locks);
 }
 
 static bool
@@ -1365,8 +1374,8 @@ extent_merge_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *a,
 		witness_assert_positive_depth_to_rank(
 		    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE);
 	} else {
-		witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-		    WITNESS_RANK_CORE, 0);
+		witness_assert_depth_to_rank(
+		    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 	}
 
 	assert(edata_base_get(a) < edata_base_get(b));
@@ -1391,12 +1400,13 @@ extent_merge_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *a,
 	emap_prepare_t prepare;
 	emap_merge_prepare(tsdn, pac->emap, &prepare, a, b);
 
-	assert(edata_state_get(a) == extent_state_active ||
-	    edata_state_get(a) == extent_state_merging);
+	assert(edata_state_get(a) == extent_state_active
+	    || edata_state_get(a) == extent_state_merging);
 	edata_state_set(a, extent_state_active);
 	edata_size_set(a, edata_size_get(a) + edata_size_get(b));
-	edata_sn_set(a, (edata_sn_get(a) < edata_sn_get(b)) ?
-	    edata_sn_get(a) : edata_sn_get(b));
+	edata_sn_set(a,
+	    (edata_sn_get(a) < edata_sn_get(b)) ? edata_sn_get(a)
+	                                        : edata_sn_get(b));
 	edata_zeroed_set(a, edata_zeroed_get(a) && edata_zeroed_get(b));
 
 	emap_merge_commit(tsdn, pac->emap, &prepare, a, b);
@@ -1407,26 +1417,26 @@ extent_merge_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *a,
 }
 
 bool
-extent_merge_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    edata_t *a, edata_t *b) {
+extent_merge_wrapper(
+    tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *a, edata_t *b) {
 	return extent_merge_impl(tsdn, pac, ehooks, a, b,
 	    /* holding_core_locks */ false);
 }
 
 bool
-extent_commit_zero(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
-    bool commit, bool zero, bool growing_retained) {
+extent_commit_zero(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, bool commit,
+    bool zero, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 
 	if (commit && !edata_committed_get(edata)) {
 		if (extent_commit_impl(tsdn, ehooks, edata, 0,
-		    edata_size_get(edata), growing_retained)) {
+		        edata_size_get(edata), growing_retained)) {
 			return true;
 		}
 	}
 	if (zero && !edata_zeroed_get(edata)) {
-		void *addr = edata_base_get(edata);
+		void  *addr = edata_base_get(edata);
 		size_t size = edata_size_get(edata);
 		ehooks_zero(tsdn, ehooks, addr, size);
 	}
diff --git a/src/extent_dss.c b/src/extent_dss.c
index 32fb4112..3f7a15d0 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -11,14 +11,10 @@
 /* NOLINTNEXTLINE(performance-no-int-to-ptr) */
 #define SBRK_INVALID ((void *)-1)
 
-const char	*opt_dss = DSS_DEFAULT;
+const char *opt_dss = DSS_DEFAULT;
 
-const char	*const dss_prec_names[] = {
-	"disabled",
-	"primary",
-	"secondary",
-	"N/A"
-};
+const char *const dss_prec_names[] = {
+    "disabled", "primary", "secondary", "N/A"};
 
 /*
  * Current dss precedence default, used when creating new arenas.  NB: This is
@@ -26,17 +22,16 @@ const char	*const dss_prec_names[] = {
  * guarantee that sizeof(dss_prec_t) is the same as sizeof(unsigned), and we use
  * atomic operations to synchronize the setting.
  */
-static atomic_u_t	dss_prec_default = ATOMIC_INIT(
-    (unsigned)DSS_PREC_DEFAULT);
+static atomic_u_t dss_prec_default = ATOMIC_INIT((unsigned)DSS_PREC_DEFAULT);
 
 /* Base address of the DSS. */
-static void		*dss_base;
+static void *dss_base;
 /* Atomic boolean indicating whether a thread is currently extending DSS. */
-static atomic_b_t	dss_extending;
+static atomic_b_t dss_extending;
 /* Atomic boolean indicating whether the DSS is exhausted. */
-static atomic_b_t	dss_exhausted;
+static atomic_b_t dss_exhausted;
 /* Atomic current upper limit on DSS addresses. */
-static atomic_p_t	dss_max;
+static atomic_p_t dss_max;
 
 /******************************************************************************/
 
@@ -76,7 +71,7 @@ extent_dss_extending_start(void) {
 	while (true) {
 		bool expected = false;
 		if (atomic_compare_exchange_weak_b(&dss_extending, &expected,
-		    true, ATOMIC_ACQ_REL, ATOMIC_RELAXED)) {
+		        true, ATOMIC_ACQ_REL, ATOMIC_RELAXED)) {
 			break;
 		}
 		spin_adaptive(&spinner);
@@ -143,24 +138,24 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 				goto label_oom;
 			}
 
-			bool head_state = opt_retain ? EXTENT_IS_HEAD :
-			    EXTENT_NOT_HEAD;
+			bool head_state = opt_retain ? EXTENT_IS_HEAD
+			                             : EXTENT_NOT_HEAD;
 			/*
 			 * Compute how much page-aligned gap space (if any) is
 			 * necessary to satisfy alignment.  This space can be
 			 * recycled for later use.
 			 */
-			void *gap_addr_page = ALIGNMENT_ADDR2CEILING(max_cur,
-			    PAGE);
+			void *gap_addr_page = ALIGNMENT_ADDR2CEILING(
+			    max_cur, PAGE);
 			void *ret = ALIGNMENT_ADDR2CEILING(
 			    gap_addr_page, alignment);
-			size_t gap_size_page = (uintptr_t)ret -
-			    (uintptr_t)gap_addr_page;
+			size_t gap_size_page = (uintptr_t)ret
+			    - (uintptr_t)gap_addr_page;
 			if (gap_size_page != 0) {
 				edata_init(gap, arena_ind_get(arena),
 				    gap_addr_page, gap_size_page, false,
-				    SC_NSIZES, extent_sn_next(
-					&arena->pa_shard.pac),
+				    SC_NSIZES,
+				    extent_sn_next(&arena->pa_shard.pac),
 				    extent_state_active, false, true,
 				    EXTENT_PAI_PAC, head_state);
 			}
@@ -169,25 +164,25 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 			 * allocation space.
 			 */
 			void *dss_next = (void *)((byte_t *)ret + size);
-			if ((uintptr_t)ret < (uintptr_t)max_cur ||
-			    (uintptr_t)dss_next < (uintptr_t)max_cur) {
+			if ((uintptr_t)ret < (uintptr_t)max_cur
+			    || (uintptr_t)dss_next < (uintptr_t)max_cur) {
 				goto label_oom; /* Wrap-around. */
 			}
 			/* Compute the increment, including subpage bytes. */
-			void *gap_addr_subpage = max_cur;
-			size_t gap_size_subpage = (uintptr_t)ret -
-			    (uintptr_t)gap_addr_subpage;
+			void  *gap_addr_subpage = max_cur;
+			size_t gap_size_subpage = (uintptr_t)ret
+			    - (uintptr_t)gap_addr_subpage;
 			intptr_t incr = gap_size_subpage + size;
 
-			assert((uintptr_t)max_cur + incr == (uintptr_t)ret +
-			    size);
+			assert(
+			    (uintptr_t)max_cur + incr == (uintptr_t)ret + size);
 
 			/* Try to allocate. */
 			void *dss_prev = extent_dss_sbrk(incr);
 			if (dss_prev == max_cur) {
 				/* Success. */
-				atomic_store_p(&dss_max, dss_next,
-				    ATOMIC_RELEASE);
+				atomic_store_p(
+				    &dss_max, dss_next, ATOMIC_RELEASE);
 				extent_dss_extending_finish();
 
 				if (gap_size_page != 0) {
@@ -203,17 +198,16 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 					*commit = pages_decommit(ret, size);
 				}
 				if (*zero && *commit) {
-					edata_t edata = {0};
+					edata_t   edata = {0};
 					ehooks_t *ehooks = arena_get_ehooks(
 					    arena);
 
-					edata_init(&edata,
-					    arena_ind_get(arena), ret, size,
-					    size, false, SC_NSIZES,
+					edata_init(&edata, arena_ind_get(arena),
+					    ret, size, size, false, SC_NSIZES,
 					    extent_state_active, false, true,
 					    EXTENT_PAI_PAC, head_state);
 					if (extent_purge_forced_wrapper(tsdn,
-					    ehooks, &edata, 0, size)) {
+					        ehooks, &edata, 0, size)) {
 						memset(ret, 0, size);
 					}
 				}
@@ -225,8 +219,8 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 			 */
 			if (dss_prev == SBRK_INVALID) {
 				/* OOM. */
-				atomic_store_b(&dss_exhausted, true,
-				    ATOMIC_RELEASE);
+				atomic_store_b(
+				    &dss_exhausted, true, ATOMIC_RELEASE);
 				goto label_oom;
 			}
 		}
@@ -239,16 +233,16 @@ label_oom:
 
 static bool
 extent_in_dss_helper(void *addr, void *max) {
-	return ((uintptr_t)addr >= (uintptr_t)dss_base && (uintptr_t)addr <
-	    (uintptr_t)max);
+	return ((uintptr_t)addr >= (uintptr_t)dss_base
+	    && (uintptr_t)addr < (uintptr_t)max);
 }
 
 bool
 extent_in_dss(void *addr) {
 	cassert(have_dss);
 
-	return extent_in_dss_helper(addr, atomic_load_p(&dss_max,
-	    ATOMIC_ACQUIRE));
+	return extent_in_dss_helper(
+	    addr, atomic_load_p(&dss_max, ATOMIC_ACQUIRE));
 }
 
 bool
@@ -257,14 +251,14 @@ extent_dss_mergeable(void *addr_a, void *addr_b) {
 
 	cassert(have_dss);
 
-	if ((uintptr_t)addr_a < (uintptr_t)dss_base && (uintptr_t)addr_b <
-	    (uintptr_t)dss_base) {
+	if ((uintptr_t)addr_a < (uintptr_t)dss_base
+	    && (uintptr_t)addr_b < (uintptr_t)dss_base) {
 		return true;
 	}
 
 	max = atomic_load_p(&dss_max, ATOMIC_ACQUIRE);
-	return (extent_in_dss_helper(addr_a, max) ==
-	    extent_in_dss_helper(addr_b, max));
+	return (extent_in_dss_helper(addr_a, max)
+	    == extent_in_dss_helper(addr_b, max));
 }
 
 void
@@ -273,7 +267,8 @@ extent_dss_boot(void) {
 
 	dss_base = extent_dss_sbrk(0);
 	atomic_store_b(&dss_extending, false, ATOMIC_RELAXED);
-	atomic_store_b(&dss_exhausted, dss_base == SBRK_INVALID, ATOMIC_RELAXED);
+	atomic_store_b(
+	    &dss_exhausted, dss_base == SBRK_INVALID, ATOMIC_RELAXED);
 	atomic_store_p(&dss_max, dss_base, ATOMIC_RELAXED);
 }
 
diff --git a/src/extent_mmap.c b/src/extent_mmap.c
index 5f0ee2d2..d39bddc6 100644
--- a/src/extent_mmap.c
+++ b/src/extent_mmap.c
@@ -7,7 +7,7 @@
 /******************************************************************************/
 /* Data. */
 
-bool	opt_retain =
+bool opt_retain =
 #ifdef JEMALLOC_RETAIN
     true
 #else
@@ -18,8 +18,8 @@ bool	opt_retain =
 /******************************************************************************/
 
 void *
-extent_alloc_mmap(void *new_addr, size_t size, size_t alignment, bool *zero,
-    bool *commit) {
+extent_alloc_mmap(
+    void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit) {
 	assert(alignment == ALIGNMENT_CEILING(alignment, PAGE));
 	void *ret = pages_map(new_addr, size, alignment, commit);
 	if (ret == NULL) {
diff --git a/src/fxp.c b/src/fxp.c
index 96585f0a..faeab207 100644
--- a/src/fxp.c
+++ b/src/fxp.c
@@ -83,8 +83,8 @@ fxp_parse(fxp_t *result, const char *str, char **end) {
 	}
 
 	assert(fractional_part < frac_div);
-	uint32_t fractional_repr = (uint32_t)(
-	    (fractional_part << 16) / frac_div);
+	uint32_t fractional_repr = (uint32_t)((fractional_part << 16)
+	    / frac_div);
 
 	/* Success! */
 	*result = (integer_part << 16) + fractional_repr;
@@ -99,7 +99,7 @@ fxp_print(fxp_t a, char buf[FXP_BUF_SIZE]) {
 	uint32_t integer_part = fxp_round_down(a);
 	uint32_t fractional_part = (a & ((1U << 16) - 1));
 
-	int leading_fraction_zeros = 0;
+	int      leading_fraction_zeros = 0;
 	uint64_t fraction_digits = fractional_part;
 	for (int i = 0; i < FXP_FRACTIONAL_PART_DIGITS; i++) {
 		if (fraction_digits < (1U << 16)
@@ -113,12 +113,12 @@ fxp_print(fxp_t a, char buf[FXP_BUF_SIZE]) {
 		fraction_digits /= 10;
 	}
 
-	size_t printed = malloc_snprintf(buf, FXP_BUF_SIZE, "%"FMTu32".",
-	    integer_part);
+	size_t printed = malloc_snprintf(
+	    buf, FXP_BUF_SIZE, "%" FMTu32 ".", integer_part);
 	for (int i = 0; i < leading_fraction_zeros; i++) {
 		buf[printed] = '0';
 		printed++;
 	}
-	malloc_snprintf(&buf[printed], FXP_BUF_SIZE - printed, "%"FMTu64,
-	    fraction_digits);
+	malloc_snprintf(
+	    &buf[printed], FXP_BUF_SIZE - printed, "%" FMTu64, fraction_digits);
 }
diff --git a/src/hook.c b/src/hook.c
index 77a988d7..4270ad60 100644
--- a/src/hook.c
+++ b/src/hook.c
@@ -9,19 +9,19 @@
 typedef struct hooks_internal_s hooks_internal_t;
 struct hooks_internal_s {
 	hooks_t hooks;
-	bool in_use;
+	bool    in_use;
 };
 
 seq_define(hooks_internal_t, hooks)
 
-static atomic_u_t nhooks = ATOMIC_INIT(0);
-static seq_hooks_t hooks[HOOK_MAX];
+    static atomic_u_t nhooks = ATOMIC_INIT(0);
+static seq_hooks_t    hooks[HOOK_MAX];
 static malloc_mutex_t hooks_mu;
 
 bool
 hook_boot(void) {
-	return malloc_mutex_init(&hooks_mu, "hooks", WITNESS_RANK_HOOK,
-	    malloc_mutex_rank_exclusive);
+	return malloc_mutex_init(
+	    &hooks_mu, "hooks", WITNESS_RANK_HOOK, malloc_mutex_rank_exclusive);
 }
 
 static void *
@@ -84,20 +84,18 @@ hook_remove(tsdn_t *tsdn, void *opaque) {
 	malloc_mutex_unlock(tsdn, &hooks_mu);
 }
 
-#define FOR_EACH_HOOK_BEGIN(hooks_internal_ptr)				\
-for (int for_each_hook_counter = 0;					\
-    for_each_hook_counter < HOOK_MAX;					\
-    for_each_hook_counter++) {						\
-	bool for_each_hook_success = seq_try_load_hooks(		\
-	    (hooks_internal_ptr), &hooks[for_each_hook_counter]);	\
-	if (!for_each_hook_success) {					\
-		continue;						\
-	}								\
-	if (!(hooks_internal_ptr)->in_use) {				\
-		continue;						\
-	}
-#define FOR_EACH_HOOK_END						\
-}
+#define FOR_EACH_HOOK_BEGIN(hooks_internal_ptr)                                \
+	for (int for_each_hook_counter = 0; for_each_hook_counter < HOOK_MAX;  \
+	     for_each_hook_counter++) {                                        \
+		bool for_each_hook_success = seq_try_load_hooks(               \
+		    (hooks_internal_ptr), &hooks[for_each_hook_counter]);      \
+		if (!for_each_hook_success) {                                  \
+			continue;                                              \
+		}                                                              \
+		if (!(hooks_internal_ptr)->in_use) {                           \
+			continue;                                              \
+		}
+#define FOR_EACH_HOOK_END }
 
 static bool *
 hook_reentrantp(void) {
@@ -129,26 +127,25 @@ hook_reentrantp(void) {
 	 * untouched.
 	 */
 	static bool in_hook_global = true;
-	tsdn_t *tsdn = tsdn_fetch();
-	bool *in_hook = tsdn_in_hookp_get(tsdn);
-	if (in_hook!= NULL) {
+	tsdn_t     *tsdn = tsdn_fetch();
+	bool       *in_hook = tsdn_in_hookp_get(tsdn);
+	if (in_hook != NULL) {
 		return in_hook;
 	}
 	return &in_hook_global;
 }
 
-#define HOOK_PROLOGUE							\
-	if (likely(atomic_load_u(&nhooks, ATOMIC_RELAXED) == 0)) {	\
-		return;							\
-	}								\
-	bool *in_hook = hook_reentrantp();				\
-	if (*in_hook) {							\
-		return;							\
-	}								\
+#define HOOK_PROLOGUE                                                          \
+	if (likely(atomic_load_u(&nhooks, ATOMIC_RELAXED) == 0)) {             \
+		return;                                                        \
+	}                                                                      \
+	bool *in_hook = hook_reentrantp();                                     \
+	if (*in_hook) {                                                        \
+		return;                                                        \
+	}                                                                      \
 	*in_hook = true;
 
-#define HOOK_EPILOGUE							\
-	*in_hook = false;
+#define HOOK_EPILOGUE *in_hook = false;
 
 void
 hook_invoke_alloc(hook_alloc_t type, void *result, uintptr_t result_raw,
@@ -157,10 +154,10 @@ hook_invoke_alloc(hook_alloc_t type, void *result, uintptr_t result_raw,
 
 	hooks_internal_t hook;
 	FOR_EACH_HOOK_BEGIN(&hook)
-		hook_alloc h = hook.hooks.alloc_hook;
-		if (h != NULL) {
-			h(hook.hooks.extra, type, result, result_raw, args_raw);
-		}
+	hook_alloc h = hook.hooks.alloc_hook;
+	if (h != NULL) {
+		h(hook.hooks.extra, type, result, result_raw, args_raw);
+	}
 	FOR_EACH_HOOK_END
 
 	HOOK_EPILOGUE
@@ -171,10 +168,10 @@ hook_invoke_dalloc(hook_dalloc_t type, void *address, uintptr_t args_raw[3]) {
 	HOOK_PROLOGUE
 	hooks_internal_t hook;
 	FOR_EACH_HOOK_BEGIN(&hook)
-		hook_dalloc h = hook.hooks.dalloc_hook;
-		if (h != NULL) {
-			h(hook.hooks.extra, type, address, args_raw);
-		}
+	hook_dalloc h = hook.hooks.dalloc_hook;
+	if (h != NULL) {
+		h(hook.hooks.extra, type, address, args_raw);
+	}
 	FOR_EACH_HOOK_END
 	HOOK_EPILOGUE
 }
@@ -185,11 +182,11 @@ hook_invoke_expand(hook_expand_t type, void *address, size_t old_usize,
 	HOOK_PROLOGUE
 	hooks_internal_t hook;
 	FOR_EACH_HOOK_BEGIN(&hook)
-		hook_expand h = hook.hooks.expand_hook;
-		if (h != NULL) {
-			h(hook.hooks.extra, type, address, old_usize, new_usize,
-			    result_raw, args_raw);
-		}
+	hook_expand h = hook.hooks.expand_hook;
+	if (h != NULL) {
+		h(hook.hooks.extra, type, address, old_usize, new_usize,
+		    result_raw, args_raw);
+	}
 	FOR_EACH_HOOK_END
 	HOOK_EPILOGUE
 }
diff --git a/src/hpa.c b/src/hpa.c
index 48e356c6..03668f06 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -12,17 +12,17 @@
 static edata_t *hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
     size_t alignment, bool zero, bool guarded, bool frequent_reuse,
     bool *deferred_work_generated);
-static size_t hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t nallocs, edata_list_active_t *results, bool frequent_reuse,
-    bool *deferred_work_generated);
-static bool hpa_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated);
-static bool hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size, bool *deferred_work_generated);
-static void hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    bool *deferred_work_generated);
-static void hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self,
-    edata_list_active_t *list, bool *deferred_work_generated);
+static size_t   hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size,
+      size_t nallocs, edata_list_active_t *results, bool frequent_reuse,
+      bool *deferred_work_generated);
+static bool     hpa_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+        size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated);
+static bool     hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+        size_t old_size, size_t new_size, bool *deferred_work_generated);
+static void     hpa_dalloc(
+        tsdn_t *tsdn, pai_t *self, edata_t *edata, bool *deferred_work_generated);
+static void     hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self,
+        edata_list_active_t *list, bool *deferred_work_generated);
 static uint64_t hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self);
 
 bool
@@ -70,7 +70,8 @@ hpa_do_consistency_checks(hpa_shard_t *shard) {
 }
 
 bool
-hpa_central_init(hpa_central_t *central, base_t *base, const hpa_hooks_t *hooks) {
+hpa_central_init(
+    hpa_central_t *central, base_t *base, const hpa_hooks_t *hooks) {
 	/* malloc_conf processing should have filtered out these cases. */
 	assert(hpa_supported());
 	bool err;
@@ -89,8 +90,8 @@ hpa_central_init(hpa_central_t *central, base_t *base, const hpa_hooks_t *hooks)
 
 static hpdata_t *
 hpa_alloc_ps(tsdn_t *tsdn, hpa_central_t *central) {
-	return (hpdata_t *)base_alloc(tsdn, central->base, sizeof(hpdata_t),
-	    CACHELINE);
+	return (hpdata_t *)base_alloc(
+	    tsdn, central->base, sizeof(hpdata_t), CACHELINE);
 }
 
 static hpdata_t *
@@ -137,8 +138,8 @@ hpa_central_extract(tsdn_t *tsdn, hpa_central_t *central, size_t size,
 		 */
 		bool commit = true;
 		/* Allocate address space, bailing if we fail. */
-		void *new_eden = pages_map(NULL, HPA_EDEN_SIZE, HUGEPAGE,
-		    &commit);
+		void *new_eden = pages_map(
+		    NULL, HPA_EDEN_SIZE, HUGEPAGE, &commit);
 		if (new_eden == NULL) {
 			*oom = true;
 			malloc_mutex_unlock(tsdn, &central->grow_mtx);
@@ -243,8 +244,8 @@ hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap,
  * locking here.
  */
 static void
-hpa_shard_nonderived_stats_accum(hpa_shard_nonderived_stats_t *dst,
-    hpa_shard_nonderived_stats_t *src) {
+hpa_shard_nonderived_stats_accum(
+    hpa_shard_nonderived_stats_t *dst, hpa_shard_nonderived_stats_t *src) {
 	dst->npurge_passes += src->npurge_passes;
 	dst->npurges += src->npurges;
 	dst->nhugifies += src->nhugifies;
@@ -255,13 +256,13 @@ hpa_shard_nonderived_stats_accum(hpa_shard_nonderived_stats_t *dst,
 void
 hpa_shard_stats_accum(hpa_shard_stats_t *dst, hpa_shard_stats_t *src) {
 	psset_stats_accum(&dst->psset_stats, &src->psset_stats);
-	hpa_shard_nonderived_stats_accum(&dst->nonderived_stats,
-	    &src->nonderived_stats);
+	hpa_shard_nonderived_stats_accum(
+	    &dst->nonderived_stats, &src->nonderived_stats);
 }
 
 void
-hpa_shard_stats_merge(tsdn_t *tsdn, hpa_shard_t *shard,
-    hpa_shard_stats_t *dst) {
+hpa_shard_stats_merge(
+    tsdn_t *tsdn, hpa_shard_t *shard, hpa_shard_stats_t *dst) {
 	hpa_do_consistency_checks(shard);
 
 	malloc_mutex_lock(tsdn, &shard->grow_mtx);
@@ -295,8 +296,8 @@ hpa_ndirty_max(tsdn_t *tsdn, hpa_shard_t *shard) {
 	if (shard->opts.dirty_mult == (fxp_t)-1) {
 		return (size_t)-1;
 	}
-	return fxp_mul_frac(psset_nactive(&shard->psset),
-	    shard->opts.dirty_mult);
+	return fxp_mul_frac(
+	    psset_nactive(&shard->psset), shard->opts.dirty_mult);
 }
 
 static bool
@@ -307,7 +308,8 @@ hpa_hugify_blocked_by_ndirty(tsdn_t *tsdn, hpa_shard_t *shard) {
 		return false;
 	}
 	return hpa_adjusted_ndirty(tsdn, shard)
-	    + hpdata_nretained_get(to_hugify) > hpa_ndirty_max(tsdn, shard);
+	    + hpdata_nretained_get(to_hugify)
+	    > hpa_ndirty_max(tsdn, shard);
 }
 
 static bool
@@ -323,8 +325,8 @@ hpa_should_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 }
 
 static void
-hpa_update_purge_hugify_eligibility(tsdn_t *tsdn, hpa_shard_t *shard,
-    hpdata_t *ps) {
+hpa_update_purge_hugify_eligibility(
+    tsdn_t *tsdn, hpa_shard_t *shard, hpdata_t *ps) {
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
 	if (hpdata_changing_state_get(ps)) {
 		hpdata_purge_allowed_set(ps, false);
@@ -397,7 +399,7 @@ hpa_shard_has_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard) {
 #define HPA_PURGE_BATCH_MAX_DEFAULT 16
 
 #ifndef JEMALLOC_JET
-#define HPA_PURGE_BATCH_MAX HPA_PURGE_BATCH_MAX_DEFAULT
+#	define HPA_PURGE_BATCH_MAX HPA_PURGE_BATCH_MAX_DEFAULT
 #else
 size_t hpa_purge_max_batch_size_for_test = HPA_PURGE_BATCH_MAX_DEFAULT;
 size_t
@@ -406,20 +408,21 @@ hpa_purge_max_batch_size_for_test_set(size_t new_size) {
 	hpa_purge_max_batch_size_for_test = new_size;
 	return old_size;
 }
-#define HPA_PURGE_BATCH_MAX hpa_purge_max_batch_size_for_test
+#	define HPA_PURGE_BATCH_MAX hpa_purge_max_batch_size_for_test
 #endif
 
 static inline size_t
 hpa_process_madvise_max_iovec_len(void) {
-	assert(opt_process_madvise_max_batch <=
-		PROCESS_MADVISE_MAX_BATCH_LIMIT);
-	return opt_process_madvise_max_batch == 0 ?
-		HPA_MIN_VAR_VEC_SIZE : opt_process_madvise_max_batch;
+	assert(
+	    opt_process_madvise_max_batch <= PROCESS_MADVISE_MAX_BATCH_LIMIT);
+	return opt_process_madvise_max_batch == 0
+	    ? HPA_MIN_VAR_VEC_SIZE
+	    : opt_process_madvise_max_batch;
 }
 
 static inline void
-hpa_purge_actual_unlocked(hpa_shard_t *shard, hpa_purge_item_t *batch,
-	size_t batch_sz) {
+hpa_purge_actual_unlocked(
+    hpa_shard_t *shard, hpa_purge_item_t *batch, size_t batch_sz) {
 	assert(batch_sz > 0);
 
 	size_t len = hpa_process_madvise_max_iovec_len();
@@ -433,17 +436,18 @@ hpa_purge_actual_unlocked(hpa_shard_t *shard, hpa_purge_item_t *batch,
 
 		/* Actually do the purging, now that the lock is dropped. */
 		if (batch[i].dehugify) {
-			shard->central->hooks.dehugify(hpdata_addr_get(to_purge),
-		    	HUGEPAGE);
+			shard->central->hooks.dehugify(
+			    hpdata_addr_get(to_purge), HUGEPAGE);
 		}
-		void *purge_addr;
+		void  *purge_addr;
 		size_t purge_size;
 		size_t total_purged_on_one_hp = 0;
 		while (hpdata_purge_next(
-				to_purge, &batch[i].state, &purge_addr, &purge_size)) {
+		    to_purge, &batch[i].state, &purge_addr, &purge_size)) {
 			total_purged_on_one_hp += purge_size;
 			assert(total_purged_on_one_hp <= HUGEPAGE);
-			hpa_range_accum_add(&accum, purge_addr, purge_size, shard);
+			hpa_range_accum_add(
+			    &accum, purge_addr, purge_size, shard);
 		}
 	}
 	hpa_range_accum_finish(&accum, shard);
@@ -490,10 +494,10 @@ hpa_purge_start_hp(hpa_purge_batch_t *b, psset_t *psset) {
 	/* Gather all the metadata we'll need during the purge. */
 	hp_item->dehugify = hpdata_huge_get(hp_item->hp);
 	size_t nranges;
-	size_t ndirty =
-		hpdata_purge_begin(hp_item->hp, &hp_item->state, &nranges);
+	size_t ndirty = hpdata_purge_begin(
+	    hp_item->hp, &hp_item->state, &nranges);
 	/* We picked hp to purge, so it should have some dirty ranges */
-	assert(ndirty > 0 && nranges >0);
+	assert(ndirty > 0 && nranges > 0);
 	b->ndirty_in_batch += ndirty;
 	b->nranges += nranges;
 	return ndirty;
@@ -501,8 +505,8 @@ hpa_purge_start_hp(hpa_purge_batch_t *b, psset_t *psset) {
 
 /* Finish purge of one huge page. */
 static inline void
-hpa_purge_finish_hp(tsdn_t *tsdn, hpa_shard_t *shard,
-	hpa_purge_item_t *hp_item) {
+hpa_purge_finish_hp(
+    tsdn_t *tsdn, hpa_shard_t *shard, hpa_purge_item_t *hp_item) {
 	if (hp_item->dehugify) {
 		shard->stats.ndehugifies++;
 	}
@@ -523,9 +527,9 @@ hpa_purge_finish_hp(tsdn_t *tsdn, hpa_shard_t *shard,
 static inline bool
 hpa_batch_full(hpa_purge_batch_t *b) {
 	/* It's okay for ranges to go above */
-	return b->npurged_hp_total == b->max_hp ||
-		b->item_cnt == b->items_capacity ||
-		b->nranges >= b->range_watermark;
+	return b->npurged_hp_total == b->max_hp
+	    || b->item_cnt == b->items_capacity
+	    || b->nranges >= b->range_watermark;
 }
 
 static inline void
@@ -547,23 +551,25 @@ hpa_purge(tsdn_t *tsdn, hpa_shard_t *shard, size_t max_hp) {
 	assert(max_hp > 0);
 
 	assert(HPA_PURGE_BATCH_MAX > 0);
-	assert(HPA_PURGE_BATCH_MAX <
-		(VARIABLE_ARRAY_SIZE_MAX / sizeof(hpa_purge_item_t)));
+	assert(HPA_PURGE_BATCH_MAX
+	    < (VARIABLE_ARRAY_SIZE_MAX / sizeof(hpa_purge_item_t)));
 	VARIABLE_ARRAY(hpa_purge_item_t, items, HPA_PURGE_BATCH_MAX);
 	hpa_purge_batch_t batch = {
-		.max_hp = max_hp,
-		.npurged_hp_total = 0,
-		.items = &items[0],
-		.items_capacity = HPA_PURGE_BATCH_MAX,
-		.range_watermark = hpa_process_madvise_max_iovec_len(),
+	    .max_hp = max_hp,
+	    .npurged_hp_total = 0,
+	    .items = &items[0],
+	    .items_capacity = HPA_PURGE_BATCH_MAX,
+	    .range_watermark = hpa_process_madvise_max_iovec_len(),
 	};
 	assert(batch.range_watermark > 0);
 
 	while (1) {
 		hpa_batch_pass_start(&batch);
 		assert(hpa_batch_empty(&batch));
-		while(!hpa_batch_full(&batch) && hpa_should_purge(tsdn, shard)) {
-			size_t ndirty = hpa_purge_start_hp(&batch, &shard->psset);
+		while (
+		    !hpa_batch_full(&batch) && hpa_should_purge(tsdn, shard)) {
+			size_t ndirty = hpa_purge_start_hp(
+			    &batch, &shard->psset);
 			if (ndirty == 0) {
 				break;
 			}
@@ -582,8 +588,8 @@ hpa_purge(tsdn_t *tsdn, hpa_shard_t *shard, size_t max_hp) {
 		shard->npending_purge -= batch.ndirty_in_batch;
 		shard->stats.npurges += batch.ndirty_in_batch;
 		shard->central->hooks.curtime(&shard->last_purge,
-			/* first_reading */ false);
-		for (size_t i=0; i<batch.item_cnt; ++i) {
+		    /* first_reading */ false);
+		for (size_t i = 0; i < batch.item_cnt; ++i) {
 			hpa_purge_finish_hp(tsdn, shard, &batch.items[i]);
 		}
 	}
@@ -629,8 +635,8 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
 
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 
-	bool err = shard->central->hooks.hugify(hpdata_addr_get(to_hugify),
-	    HUGEPAGE, shard->opts.hugify_sync);
+	bool err = shard->central->hooks.hugify(
+	    hpdata_addr_get(to_hugify), HUGEPAGE, shard->opts.hugify_sync);
 
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	shard->stats.nhugifies++;
@@ -669,8 +675,8 @@ hpa_min_purge_interval_passed(tsdn_t *tsdn, hpa_shard_t *shard) {
  * hpa_shard_do_deferred_work() call.
  */
 static void
-hpa_shard_maybe_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard,
-    bool forced) {
+hpa_shard_maybe_do_deferred_work(
+    tsdn_t *tsdn, hpa_shard_t *shard, bool forced) {
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
 	if (!forced && shard->opts.deferral_allowed) {
 		return;
@@ -704,8 +710,7 @@ hpa_shard_maybe_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard,
 		 * of purging algorithm.
 		 */
 		ssize_t max_purge_nhp = shard->opts.experimental_max_purge_nhp;
-		if (max_purge_nhp != -1 &&
-		    max_purges > (size_t)max_purge_nhp) {
+		if (max_purge_nhp != -1 && max_purges > (size_t)max_purge_nhp) {
 			max_purges = max_purge_nhp;
 		}
 
@@ -725,9 +730,9 @@ hpa_shard_maybe_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard,
 }
 
 static edata_t *
-hpa_try_alloc_one_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
-    bool *oom) {
-	bool err;
+hpa_try_alloc_one_no_grow(
+    tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom) {
+	bool     err;
 	edata_t *edata = edata_cache_fast_get(tsdn, &shard->ecf);
 	if (edata == NULL) {
 		*oom = true;
@@ -754,8 +759,8 @@ hpa_try_alloc_one_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 	}
 
 	void *addr = hpdata_reserve_alloc(ps, size);
-	edata_init(edata, shard->ind, addr, size, /* slab */ false,
-	    SC_NSIZES, /* sn */ hpdata_age_get(ps), extent_state_active,
+	edata_init(edata, shard->ind, addr, size, /* slab */ false, SC_NSIZES,
+	    /* sn */ hpdata_age_get(ps), extent_state_active,
 	    /* zeroed */ false, /* committed */ true, EXTENT_PAI_HPA,
 	    EXTENT_NOT_HEAD);
 	edata_ps_set(edata, ps);
@@ -768,11 +773,11 @@ hpa_try_alloc_one_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 	 * dropped.  This would force us to deal with a pageslab eviction down
 	 * the error pathway, which is a pain.
 	 */
-	err = emap_register_boundary(tsdn, shard->emap, edata,
-	    SC_NSIZES, /* slab */ false);
+	err = emap_register_boundary(
+	    tsdn, shard->emap, edata, SC_NSIZES, /* slab */ false);
 	if (err) {
-		hpdata_unreserve(ps, edata_addr_get(edata),
-		    edata_size_get(edata));
+		hpdata_unreserve(
+		    ps, edata_addr_get(edata), edata_size_get(edata));
 		/*
 		 * We should arguably reset dirty state here, but this would
 		 * require some sort of prepare + commit functionality that's a
@@ -800,8 +805,8 @@ hpa_try_alloc_batch_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	size_t nsuccess = 0;
 	for (; nsuccess < nallocs; nsuccess++) {
-		edata_t *edata = hpa_try_alloc_one_no_grow(tsdn, shard, size,
-		    oom);
+		edata_t *edata = hpa_try_alloc_one_no_grow(
+		    tsdn, shard, size, oom);
 		if (edata == NULL) {
 			break;
 		}
@@ -819,12 +824,11 @@ hpa_alloc_batch_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
     size_t nallocs, edata_list_active_t *results,
     bool *deferred_work_generated) {
 	assert(size <= HUGEPAGE);
-	assert(size <= shard->opts.slab_max_alloc ||
-	    size == sz_s2u(size));
+	assert(size <= shard->opts.slab_max_alloc || size == sz_s2u(size));
 	bool oom = false;
 
-	size_t nsuccess = hpa_try_alloc_batch_no_grow(tsdn, shard, size, &oom,
-	    nallocs, results, deferred_work_generated);
+	size_t nsuccess = hpa_try_alloc_batch_no_grow(
+	    tsdn, shard, size, &oom, nallocs, results, deferred_work_generated);
 
 	if (nsuccess == nallocs || oom) {
 		return nsuccess;
@@ -851,8 +855,8 @@ hpa_alloc_batch_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 	 * deallocations (and allocations of smaller sizes) may still succeed
 	 * while we're doing this potentially expensive system call.
 	 */
-	hpdata_t *ps = hpa_central_extract(tsdn, shard->central, size,
-	    shard->age_counter++, &oom);
+	hpdata_t *ps = hpa_central_extract(
+	    tsdn, shard->central, size, shard->age_counter++, &oom);
 	if (ps == NULL) {
 		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 		return nsuccess;
@@ -894,8 +898,8 @@ hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
     bool *deferred_work_generated) {
 	assert(nallocs > 0);
 	assert((size & PAGE_MASK) == 0);
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 	hpa_shard_t *shard = hpa_from_pai(self);
 
 	/*
@@ -908,16 +912,16 @@ hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
 	 * huge page size).  These requests do not concern internal
 	 * fragmentation with huge pages (again, the full size will be used).
 	 */
-	if (!(frequent_reuse && size <= HUGEPAGE) &&
-	    (size > shard->opts.slab_max_alloc)) {
+	if (!(frequent_reuse && size <= HUGEPAGE)
+	    && (size > shard->opts.slab_max_alloc)) {
 		return 0;
 	}
 
-	size_t nsuccess = hpa_alloc_batch_psset(tsdn, shard, size, nallocs,
-	    results, deferred_work_generated);
+	size_t nsuccess = hpa_alloc_batch_psset(
+	    tsdn, shard, size, nallocs, results, deferred_work_generated);
 
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 
 	/*
 	 * Guard the sanity checks with config_debug because the loop cannot be
@@ -926,13 +930,13 @@ hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
 	 */
 	if (config_debug) {
 		edata_t *edata;
-		ql_foreach(edata, &results->head, ql_link_active) {
+		ql_foreach (edata, &results->head, ql_link_active) {
 			emap_assert_mapped(tsdn, shard->emap, edata);
 			assert(edata_pai_get(edata) == EXTENT_PAI_HPA);
 			assert(edata_state_get(edata) == extent_state_active);
 			assert(edata_arena_ind_get(edata) == shard->ind);
-			assert(edata_szind_get_maybe_invalid(edata) ==
-			    SC_NSIZES);
+			assert(
+			    edata_szind_get_maybe_invalid(edata) == SC_NSIZES);
 			assert(!edata_slab_get(edata));
 			assert(edata_committed_get(edata));
 			assert(edata_base_get(edata) == edata_addr_get(edata));
@@ -947,8 +951,8 @@ hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
     bool guarded, bool frequent_reuse, bool *deferred_work_generated) {
 	assert((size & PAGE_MASK) == 0);
 	assert(!guarded);
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 
 	/* We don't handle alignment or zeroing for now. */
 	if (alignment > PAGE || zero) {
@@ -975,8 +979,8 @@ hpa_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
 }
 
 static bool
-hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size, bool *deferred_work_generated) {
+hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
+    size_t new_size, bool *deferred_work_generated) {
 	/* Shrink not yet supported. */
 	return true;
 }
@@ -1021,7 +1025,7 @@ hpa_dalloc_locked(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *edata) {
 	hpdata_t *ps = edata_ps_get(edata);
 	/* Currently, all edatas come from pageslabs. */
 	assert(ps != NULL);
-	void *unreserve_addr = edata_addr_get(edata);
+	void  *unreserve_addr = edata_addr_get(edata);
 	size_t unreserve_size = edata_size_get(edata);
 	edata_cache_fast_put(tsdn, &shard->ecf, edata);
 
@@ -1037,7 +1041,7 @@ hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list,
 	hpa_shard_t *shard = hpa_from_pai(self);
 
 	edata_t *edata;
-	ql_foreach(edata, &list->head, ql_link_active) {
+	ql_foreach (edata, &list->head, ql_link_active) {
 		hpa_dalloc_prepare_unlocked(tsdn, shard, edata);
 	}
 
@@ -1048,15 +1052,14 @@ hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list,
 		hpa_dalloc_locked(tsdn, shard, edata);
 	}
 	hpa_shard_maybe_do_deferred_work(tsdn, shard, /* forced */ false);
-	*deferred_work_generated =
-	    hpa_shard_has_deferred_work(tsdn, shard);
+	*deferred_work_generated = hpa_shard_has_deferred_work(tsdn, shard);
 
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 }
 
 static void
-hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    bool *deferred_work_generated) {
+hpa_dalloc(
+    tsdn_t *tsdn, pai_t *self, edata_t *edata, bool *deferred_work_generated) {
 	assert(!edata_guarded_get(edata));
 	/* Just a dalloc_batch of size 1; this lets us share logic. */
 	edata_list_active_t dalloc_list;
@@ -1072,14 +1075,14 @@ hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
 static uint64_t
 hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) {
 	hpa_shard_t *shard = hpa_from_pai(self);
-	uint64_t time_ns = BACKGROUND_THREAD_DEFERRED_MAX;
+	uint64_t     time_ns = BACKGROUND_THREAD_DEFERRED_MAX;
 
 	malloc_mutex_lock(tsdn, &shard->mtx);
 
 	hpdata_t *to_hugify = psset_pick_hugify(&shard->psset);
 	if (to_hugify != NULL) {
-		nstime_t time_hugify_allowed =
-		    hpdata_time_hugify_allowed(to_hugify);
+		nstime_t time_hugify_allowed = hpdata_time_hugify_allowed(
+		    to_hugify);
 		uint64_t since_hugify_allowed_ms =
 		    shard->central->hooks.ms_since(&time_hugify_allowed);
 		/*
@@ -1087,8 +1090,8 @@ hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) {
 		 * sleep for the rest.
 		 */
 		if (since_hugify_allowed_ms < shard->opts.hugify_delay_ms) {
-			time_ns = shard->opts.hugify_delay_ms -
-			    since_hugify_allowed_ms;
+			time_ns = shard->opts.hugify_delay_ms
+			    - since_hugify_allowed_ms;
 			time_ns *= 1000 * 1000;
 		} else {
 			malloc_mutex_unlock(tsdn, &shard->mtx);
@@ -1110,8 +1113,8 @@ hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) {
 
 		if (since_last_purge_ms < shard->opts.min_purge_interval_ms) {
 			uint64_t until_purge_ns;
-			until_purge_ns = shard->opts.min_purge_interval_ms -
-			    since_last_purge_ms;
+			until_purge_ns = shard->opts.min_purge_interval_ms
+			    - since_last_purge_ms;
 			until_purge_ns *= 1000 * 1000;
 
 			if (until_purge_ns < time_ns) {
@@ -1176,8 +1179,8 @@ hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard) {
 }
 
 void
-hpa_shard_set_deferral_allowed(tsdn_t *tsdn, hpa_shard_t *shard,
-    bool deferral_allowed) {
+hpa_shard_set_deferral_allowed(
+    tsdn_t *tsdn, hpa_shard_t *shard, bool deferral_allowed) {
 	hpa_do_consistency_checks(shard);
 
 	malloc_mutex_lock(tsdn, &shard->mtx);
diff --git a/src/hpa_hooks.c b/src/hpa_hooks.c
index 072d490e..45bebe41 100644
--- a/src/hpa_hooks.c
+++ b/src/hpa_hooks.c
@@ -3,26 +3,18 @@
 
 #include "jemalloc/internal/hpa_hooks.h"
 
-static void *hpa_hooks_map(size_t size);
-static void hpa_hooks_unmap(void *ptr, size_t size);
-static void hpa_hooks_purge(void *ptr, size_t size);
-static bool hpa_hooks_hugify(void *ptr, size_t size, bool sync);
-static void hpa_hooks_dehugify(void *ptr, size_t size);
-static void hpa_hooks_curtime(nstime_t *r_nstime, bool first_reading);
+static void    *hpa_hooks_map(size_t size);
+static void     hpa_hooks_unmap(void *ptr, size_t size);
+static void     hpa_hooks_purge(void *ptr, size_t size);
+static bool     hpa_hooks_hugify(void *ptr, size_t size, bool sync);
+static void     hpa_hooks_dehugify(void *ptr, size_t size);
+static void     hpa_hooks_curtime(nstime_t *r_nstime, bool first_reading);
 static uint64_t hpa_hooks_ms_since(nstime_t *past_nstime);
-static bool hpa_hooks_vectorized_purge(
-	void *vec, size_t vlen, size_t nbytes);
+static bool hpa_hooks_vectorized_purge(void *vec, size_t vlen, size_t nbytes);
 
-const hpa_hooks_t hpa_hooks_default = {
-	&hpa_hooks_map,
-	&hpa_hooks_unmap,
-	&hpa_hooks_purge,
-	&hpa_hooks_hugify,
-	&hpa_hooks_dehugify,
-	&hpa_hooks_curtime,
-	&hpa_hooks_ms_since,
-	&hpa_hooks_vectorized_purge
-};
+const hpa_hooks_t hpa_hooks_default = {&hpa_hooks_map, &hpa_hooks_unmap,
+    &hpa_hooks_purge, &hpa_hooks_hugify, &hpa_hooks_dehugify,
+    &hpa_hooks_curtime, &hpa_hooks_ms_since, &hpa_hooks_vectorized_purge};
 
 static void *
 hpa_hooks_map(size_t size) {
@@ -82,13 +74,12 @@ hpa_hooks_ms_since(nstime_t *past_nstime) {
 	return nstime_ms_since(past_nstime);
 }
 
-
 /* Return true if we did not purge all nbytes, or on some error */
 static bool
 hpa_hooks_vectorized_purge(void *vec, size_t vlen, size_t nbytes) {
 #ifdef JEMALLOC_HAVE_PROCESS_MADVISE
-    return pages_purge_process_madvise(vec, vlen, nbytes);
+	return pages_purge_process_madvise(vec, vlen, nbytes);
 #else
-    return true;
+	return true;
 #endif
 }
diff --git a/src/hpdata.c b/src/hpdata.c
index f3e347c4..9d324952 100644
--- a/src/hpdata.c
+++ b/src/hpdata.c
@@ -17,8 +17,7 @@ hpdata_age_comp(const hpdata_t *a, const hpdata_t *b) {
 
 ph_gen(, hpdata_age_heap, hpdata_t, age_link, hpdata_age_comp)
 
-void
-hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
+    void hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
 	hpdata_addr_set(hpdata, addr);
 	hpdata_age_set(hpdata, age);
 	hpdata->h_huge = false;
@@ -66,8 +65,8 @@ hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz) {
 
 	size_t largest_unchosen_range = 0;
 	while (true) {
-		bool found = fb_urange_iter(hpdata->active_pages,
-		    HUGEPAGE_PAGES, start, &begin, &len);
+		bool found = fb_urange_iter(
+		    hpdata->active_pages, HUGEPAGE_PAGES, start, &begin, &len);
 		/*
 		 * A precondition to this function is that hpdata must be able
 		 * to serve the allocation.
@@ -97,8 +96,8 @@ hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz) {
 	 * We might be about to dirty some memory for the first time; update our
 	 * count if so.
 	 */
-	size_t new_dirty = fb_ucount(hpdata->touched_pages,  HUGEPAGE_PAGES,
-	    result, npages);
+	size_t new_dirty = fb_ucount(
+	    hpdata->touched_pages, HUGEPAGE_PAGES, result, npages);
 	fb_set_range(hpdata->touched_pages, HUGEPAGE_PAGES, result, npages);
 	hpdata->h_ntouched += new_dirty;
 
@@ -129,8 +128,8 @@ hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz) {
 	}
 
 	hpdata_assert_consistent(hpdata);
-	return (void *)(
-	    (byte_t *)hpdata_addr_get(hpdata) + (result << LG_PAGE));
+	return (
+	    void *)((byte_t *)hpdata_addr_get(hpdata) + (result << LG_PAGE));
 }
 
 void
@@ -148,10 +147,10 @@ hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz) {
 
 	fb_unset_range(hpdata->active_pages, HUGEPAGE_PAGES, begin, npages);
 	/* We might have just created a new, larger range. */
-	size_t new_begin = (fb_fls(hpdata->active_pages, HUGEPAGE_PAGES,
-	    begin) + 1);
-	size_t new_end = fb_ffs(hpdata->active_pages, HUGEPAGE_PAGES,
-	    begin + npages - 1);
+	size_t new_begin = (fb_fls(hpdata->active_pages, HUGEPAGE_PAGES, begin)
+	    + 1);
+	size_t new_end = fb_ffs(
+	    hpdata->active_pages, HUGEPAGE_PAGES, begin + npages - 1);
 	size_t new_range_len = new_end - new_begin;
 
 	if (new_range_len > old_longest_range) {
@@ -164,8 +163,8 @@ hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz) {
 }
 
 size_t
-hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
-	size_t *nranges) {
+hpdata_purge_begin(
+    hpdata_t *hpdata, hpdata_purge_state_t *purge_state, size_t *nranges) {
 	hpdata_assert_consistent(hpdata);
 	/*
 	 * See the comment below; we might purge any inactive extent, so it's
@@ -212,29 +211,29 @@ hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
 	fb_group_t dirty_pages[FB_NGROUPS(HUGEPAGE_PAGES)];
 	fb_init(dirty_pages, HUGEPAGE_PAGES);
 	fb_bit_not(dirty_pages, hpdata->active_pages, HUGEPAGE_PAGES);
-	fb_bit_and(dirty_pages, dirty_pages, hpdata->touched_pages,
-	    HUGEPAGE_PAGES);
+	fb_bit_and(
+	    dirty_pages, dirty_pages, hpdata->touched_pages, HUGEPAGE_PAGES);
 
 	fb_init(purge_state->to_purge, HUGEPAGE_PAGES);
 	size_t next_bit = 0;
 	*nranges = 0;
 	while (next_bit < HUGEPAGE_PAGES) {
-		size_t next_dirty = fb_ffs(dirty_pages, HUGEPAGE_PAGES,
-		    next_bit);
+		size_t next_dirty = fb_ffs(
+		    dirty_pages, HUGEPAGE_PAGES, next_bit);
 		/* Recall that fb_ffs returns nbits if no set bit is found. */
 		if (next_dirty == HUGEPAGE_PAGES) {
 			break;
 		}
-		size_t next_active = fb_ffs(hpdata->active_pages,
-		    HUGEPAGE_PAGES, next_dirty);
+		size_t next_active = fb_ffs(
+		    hpdata->active_pages, HUGEPAGE_PAGES, next_dirty);
 		/*
 		 * Don't purge past the end of the dirty extent, into retained
 		 * pages.  This helps the kernel a tiny bit, but honestly it's
 		 * mostly helpful for testing (where we tend to write test cases
 		 * that think in terms of the dirty ranges).
 		 */
-		ssize_t last_dirty = fb_fls(dirty_pages, HUGEPAGE_PAGES,
-		    next_active - 1);
+		ssize_t last_dirty = fb_fls(
+		    dirty_pages, HUGEPAGE_PAGES, next_active - 1);
 		assert(last_dirty >= 0);
 		assert((size_t)last_dirty >= next_dirty);
 		assert((size_t)last_dirty - next_dirty + 1 <= HUGEPAGE_PAGES);
@@ -249,9 +248,9 @@ hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
 	size_t ndirty = hpdata->h_ntouched - hpdata->h_nactive;
 	purge_state->ndirty_to_purge = ndirty;
 	assert(ndirty <= fb_scount(
-	    purge_state->to_purge, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES));
-	assert(ndirty == fb_scount(dirty_pages, HUGEPAGE_PAGES, 0,
-	    HUGEPAGE_PAGES));
+	           purge_state->to_purge, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES));
+	assert(ndirty
+	    == fb_scount(dirty_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES));
 	assert(*nranges <= ndirty);
 	assert(ndirty == 0 || *nranges > 0);
 
@@ -281,8 +280,8 @@ hpdata_purge_next(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
 		return false;
 	}
 
-	*r_purge_addr = (void *)(
-	    (byte_t *)hpdata_addr_get(hpdata) + purge_begin * PAGE);
+	*r_purge_addr = (void *)((byte_t *)hpdata_addr_get(hpdata)
+	    + purge_begin * PAGE);
 	*r_purge_size = purge_len * PAGE;
 
 	purge_state->next_purge_search_begin = purge_begin + purge_len;
@@ -299,12 +298,13 @@ hpdata_purge_end(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
 	/* See the comment in reserve. */
 	assert(!hpdata->h_in_psset || hpdata->h_updating);
 
-	assert(purge_state->npurged == fb_scount(purge_state->to_purge,
-	    HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES));
+	assert(purge_state->npurged
+	    == fb_scount(
+	        purge_state->to_purge, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES));
 	assert(purge_state->npurged >= purge_state->ndirty_to_purge);
 
-	fb_bit_not(purge_state->to_purge, purge_state->to_purge,
-	    HUGEPAGE_PAGES);
+	fb_bit_not(
+	    purge_state->to_purge, purge_state->to_purge, HUGEPAGE_PAGES);
 	fb_bit_and(hpdata->touched_pages, hpdata->touched_pages,
 	    purge_state->to_purge, HUGEPAGE_PAGES);
 	assert(hpdata->h_ntouched >= purge_state->ndirty_to_purge);
diff --git a/src/inspect.c b/src/inspect.c
index 2575b5c1..116e77a1 100644
--- a/src/inspect.c
+++ b/src/inspect.c
@@ -3,8 +3,8 @@
 #include "jemalloc/internal/inspect.h"
 
 void
-inspect_extent_util_stats_get(tsdn_t *tsdn, const void *ptr, size_t *nfree,
-    size_t *nregs, size_t *size) {
+inspect_extent_util_stats_get(
+    tsdn_t *tsdn, const void *ptr, size_t *nfree, size_t *nregs, size_t *size) {
 	assert(ptr != NULL && nfree != NULL && nregs != NULL && size != NULL);
 
 	const edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
@@ -57,7 +57,7 @@ inspect_extent_util_stats_verbose_get(tsdn_t *tsdn, const void *ptr,
 	    &arenas[edata_arena_ind_get(edata)], ATOMIC_RELAXED);
 	assert(arena != NULL);
 	const unsigned binshard = edata_binshard_get(edata);
-	bin_t *bin = arena_get_bin(arena, szind, binshard);
+	bin_t         *bin = arena_get_bin(arena, szind, binshard);
 
 	malloc_mutex_lock(tsdn, &bin->lock);
 	if (config_stats) {
diff --git a/src/jemalloc.c b/src/jemalloc.c
index c6621a79..876c49e8 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -29,11 +29,11 @@
 /* Data. */
 
 /* Runtime configuration options. */
-const char	*je_malloc_conf
+const char *je_malloc_conf
 #ifndef _WIN32
     JEMALLOC_ATTR(weak)
 #endif
-    ;
+        ;
 /*
  * The usual rule is that the closer to runtime you are, the higher priority
  * your configuration settings are (so the jemalloc config options get lower
@@ -51,23 +51,23 @@ const char	*je_malloc_conf
  * We don't actually want this to be widespread, so we'll give it a silly name
  * and not mention it in headers or documentation.
  */
-const char	*je_malloc_conf_2_conf_harder
+const char *je_malloc_conf_2_conf_harder
 #ifndef _WIN32
     JEMALLOC_ATTR(weak)
 #endif
-    ;
+        ;
 
 const char *opt_malloc_conf_symlink = NULL;
 const char *opt_malloc_conf_env_var = NULL;
 
-bool	opt_abort =
+bool opt_abort =
 #ifdef JEMALLOC_DEBUG
     true
 #else
     false
 #endif
     ;
-bool	opt_abort_conf =
+bool opt_abort_conf =
 #ifdef JEMALLOC_DEBUG
     true
 #else
@@ -75,29 +75,29 @@ bool	opt_abort_conf =
 #endif
     ;
 /* Intentionally default off, even with debug builds. */
-bool	opt_confirm_conf = false;
-const char	*opt_junk =
+bool        opt_confirm_conf = false;
+const char *opt_junk =
 #if (defined(JEMALLOC_DEBUG) && defined(JEMALLOC_FILL))
     "true"
 #else
     "false"
 #endif
     ;
-bool	opt_junk_alloc =
+bool opt_junk_alloc =
 #if (defined(JEMALLOC_DEBUG) && defined(JEMALLOC_FILL))
     true
 #else
     false
 #endif
     ;
-bool	opt_junk_free =
+bool opt_junk_free =
 #if (defined(JEMALLOC_DEBUG) && defined(JEMALLOC_FILL))
     true
 #else
     false
 #endif
     ;
-bool	opt_trust_madvise =
+bool opt_trust_madvise =
 #ifdef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS
     false
 #else
@@ -131,9 +131,9 @@ atomic_zu_t zero_realloc_count = ATOMIC_INIT(0);
 bool opt_disable_large_size_classes = true;
 
 const char *const zero_realloc_mode_names[] = {
-	"alloc",
-	"free",
-	"abort",
+    "alloc",
+    "free",
+    "abort",
 };
 
 /*
@@ -143,27 +143,31 @@ const char *const zero_realloc_mode_names[] = {
 static const uint8_t junk_alloc_byte = 0xa5;
 static const uint8_t junk_free_byte = 0x5a;
 
-static void default_junk_alloc(void *ptr, size_t usize) {
+static void
+default_junk_alloc(void *ptr, size_t usize) {
 	memset(ptr, junk_alloc_byte, usize);
 }
 
-static void default_junk_free(void *ptr, size_t usize) {
+static void
+default_junk_free(void *ptr, size_t usize) {
 	memset(ptr, junk_free_byte, usize);
 }
 
-void (*JET_MUTABLE junk_alloc_callback)(void *ptr, size_t size) = &default_junk_alloc;
-void (*JET_MUTABLE junk_free_callback)(void *ptr, size_t size) = &default_junk_free;
+void (*JET_MUTABLE junk_alloc_callback)(
+    void *ptr, size_t size) = &default_junk_alloc;
+void (*JET_MUTABLE junk_free_callback)(
+    void *ptr, size_t size) = &default_junk_free;
 void (*JET_MUTABLE invalid_conf_abort)(void) = &abort;
 
-bool	opt_utrace = false;
-bool	opt_xmalloc = false;
-bool	opt_experimental_infallible_new = false;
-bool	opt_experimental_tcache_gc = true;
-bool	opt_zero = false;
-unsigned	opt_narenas = 0;
-static fxp_t		opt_narenas_ratio = FXP_INIT_INT(4);
+bool         opt_utrace = false;
+bool         opt_xmalloc = false;
+bool         opt_experimental_infallible_new = false;
+bool         opt_experimental_tcache_gc = true;
+bool         opt_zero = false;
+unsigned     opt_narenas = 0;
+static fxp_t opt_narenas_ratio = FXP_INIT_INT(4);
 
-unsigned	ncpus;
+unsigned ncpus;
 
 unsigned opt_debug_double_free_max_scan =
     SAFETY_CHECK_DOUBLE_FREE_MAX_SCAN_DEFAULT;
@@ -175,9 +179,9 @@ size_t opt_calloc_madvise_threshold =
 static malloc_mutex_t arenas_lock;
 
 /* The global hpa, and whether it's on. */
-bool opt_hpa = false;
+bool             opt_hpa = false;
 hpa_shard_opts_t opt_hpa_opts = HPA_SHARD_OPTS_DEFAULT;
-sec_opts_t opt_hpa_sec_opts = SEC_OPTS_DEFAULT;
+sec_opts_t       opt_hpa_sec_opts = SEC_OPTS_DEFAULT;
 
 /*
  * Arenas that are used to service external requests.  Not all elements of the
@@ -190,48 +194,48 @@ sec_opts_t opt_hpa_sec_opts = SEC_OPTS_DEFAULT;
  * Points to an arena_t.
  */
 JEMALLOC_ALIGNED(CACHELINE)
-atomic_p_t		arenas[MALLOCX_ARENA_LIMIT];
-static atomic_u_t	narenas_total; /* Use narenas_total_*(). */
+atomic_p_t        arenas[MALLOCX_ARENA_LIMIT];
+static atomic_u_t narenas_total; /* Use narenas_total_*(). */
 /* Below three are read-only after initialization. */
-static arena_t		*a0; /* arenas[0]. */
-unsigned		narenas_auto;
-unsigned		manual_arena_base;
+static arena_t *a0; /* arenas[0]. */
+unsigned        narenas_auto;
+unsigned        manual_arena_base;
 
 malloc_init_t malloc_init_state = malloc_init_uninitialized;
 
 /* False should be the common case.  Set to true to trigger initialization. */
-bool			malloc_slow = true;
+bool malloc_slow = true;
 
 /* When malloc_slow is true, set the corresponding bits for sanity check. */
 enum {
-	flag_opt_junk_alloc	= (1U),
-	flag_opt_junk_free	= (1U << 1),
-	flag_opt_zero		= (1U << 2),
-	flag_opt_utrace		= (1U << 3),
-	flag_opt_xmalloc	= (1U << 4)
+	flag_opt_junk_alloc = (1U),
+	flag_opt_junk_free = (1U << 1),
+	flag_opt_zero = (1U << 2),
+	flag_opt_utrace = (1U << 3),
+	flag_opt_xmalloc = (1U << 4)
 };
-static uint8_t	malloc_slow_flags;
+static uint8_t malloc_slow_flags;
 
 #ifdef JEMALLOC_THREADED_INIT
 /* Used to let the initializing thread recursively allocate. */
-#  define NO_INITIALIZER	((unsigned long)0)
-#  define INITIALIZER		pthread_self()
-#  define IS_INITIALIZER	(malloc_initializer == pthread_self())
-static pthread_t		malloc_initializer = NO_INITIALIZER;
+#	define NO_INITIALIZER ((unsigned long)0)
+#	define INITIALIZER pthread_self()
+#	define IS_INITIALIZER (malloc_initializer == pthread_self())
+static pthread_t malloc_initializer = NO_INITIALIZER;
 #else
-#  define NO_INITIALIZER	false
-#  define INITIALIZER		true
-#  define IS_INITIALIZER	malloc_initializer
-static bool			malloc_initializer = NO_INITIALIZER;
+#	define NO_INITIALIZER false
+#	define INITIALIZER true
+#	define IS_INITIALIZER malloc_initializer
+static bool malloc_initializer = NO_INITIALIZER;
 #endif
 
 /* Used to avoid initialization races. */
 #ifdef _WIN32
-#if _WIN32_WINNT >= 0x0600
-static malloc_mutex_t	init_lock = SRWLOCK_INIT;
-#else
-static malloc_mutex_t	init_lock;
-static bool init_lock_initialized = false;
+#	if _WIN32_WINNT >= 0x0600
+static malloc_mutex_t init_lock = SRWLOCK_INIT;
+#	else
+static malloc_mutex_t init_lock;
+static bool           init_lock_initialized = false;
 
 JEMALLOC_ATTR(constructor)
 static void WINAPI
@@ -253,36 +257,38 @@ _init_init_lock(void) {
 	init_lock_initialized = true;
 }
 
-#ifdef _MSC_VER
-#  pragma section(".CRT$XCU", read)
-JEMALLOC_SECTION(".CRT$XCU") JEMALLOC_ATTR(used)
-static const void (WINAPI *init_init_lock)(void) = _init_init_lock;
-#endif
-#endif
+#		ifdef _MSC_VER
+#			pragma section(".CRT$XCU", read)
+JEMALLOC_SECTION(".CRT$XCU")
+JEMALLOC_ATTR(used) static const
+    void(WINAPI *init_init_lock)(void) = _init_init_lock;
+#		endif
+#	endif
 #else
-static malloc_mutex_t	init_lock = MALLOC_MUTEX_INITIALIZER;
+static malloc_mutex_t init_lock = MALLOC_MUTEX_INITIALIZER;
 #endif
 
 typedef struct {
-	void	*p;	/* Input pointer (as in realloc(p, s)). */
-	size_t	s;	/* Request size. */
-	void	*r;	/* Result pointer. */
+	void  *p; /* Input pointer (as in realloc(p, s)). */
+	size_t s; /* Request size. */
+	void  *r; /* Result pointer. */
 } malloc_utrace_t;
 
 #ifdef JEMALLOC_UTRACE
-#  define UTRACE(a, b, c) do {						\
-	if (unlikely(opt_utrace)) {					\
-		int utrace_serrno = errno;				\
-		malloc_utrace_t ut;					\
-		ut.p = (a);						\
-		ut.s = (b);						\
-		ut.r = (c);						\
-		UTRACE_CALL(&ut, sizeof(ut));				\
-		errno = utrace_serrno;					\
-	}								\
-} while (0)
+#	define UTRACE(a, b, c)                                                \
+		do {                                                           \
+			if (unlikely(opt_utrace)) {                            \
+				int             utrace_serrno = errno;         \
+				malloc_utrace_t ut;                            \
+				ut.p = (a);                                    \
+				ut.s = (b);                                    \
+				ut.r = (c);                                    \
+				UTRACE_CALL(&ut, sizeof(ut));                  \
+				errno = utrace_serrno;                         \
+			}                                                      \
+		} while (0)
 #else
-#  define UTRACE(a, b, c)
+#	define UTRACE(a, b, c)
 #endif
 
 /* Whether encountered any invalid config options. */
@@ -294,8 +300,8 @@ static bool had_conf_error = false;
  * definition.
  */
 
-static bool	malloc_init_hard_a0(void);
-static bool	malloc_init_hard(void);
+static bool malloc_init_hard_a0(void);
+static bool malloc_init_hard(void);
 
 /******************************************************************************/
 /*
@@ -442,8 +448,10 @@ arena_new_create_background_thread(tsdn_t *tsdn, unsigned ind) {
 
 	if (have_background_thread) {
 		if (background_thread_create(tsdn_tsd(tsdn), ind)) {
-			malloc_printf("<jemalloc>: error in background thread "
-				      "creation for arena %u. Abort.\n", ind);
+			malloc_printf(
+			    "<jemalloc>: error in background thread "
+			    "creation for arena %u. Abort.\n",
+			    ind);
 			abort();
 		}
 	}
@@ -479,8 +487,8 @@ arena_bind(tsd_t *tsd, unsigned ind, bool internal) {
 		    &arena->binshard_next, 1, ATOMIC_RELAXED);
 		tsd_binshards_t *bins = tsd_binshardsp_get(tsd);
 		for (unsigned i = 0; i < SC_NBINS; i++) {
-			assert(bin_infos[i].n_shards > 0 &&
-			    bin_infos[i].n_shards <= BIN_SHARDS_MAX);
+			assert(bin_infos[i].n_shards > 0
+			    && bin_infos[i].n_shards <= BIN_SHARDS_MAX);
 			bins->binshard[i] = shard % bin_infos[i].n_shards;
 		}
 	}
@@ -495,8 +503,8 @@ arena_migrate(tsd_t *tsd, arena_t *oldarena, arena_t *newarena) {
 	arena_nthreads_inc(newarena, false);
 	tsd_arena_set(tsd, newarena);
 
-	if (arena_nthreads_get(oldarena, false) == 0 &&
-	    !background_thread_enabled()) {
+	if (arena_nthreads_get(oldarena, false) == 0
+	    && !background_thread_enabled()) {
 		/*
 		 * Purge if the old arena has no associated threads anymore and
 		 * no background threads.
@@ -537,7 +545,7 @@ arena_choose_hard(tsd_t *tsd, bool internal) {
 
 	if (narenas_auto > 1) {
 		unsigned i, j, choose[2], first_null;
-		bool is_new_arena[2];
+		bool     is_new_arena[2];
 
 		/*
 		 * Determine binding for both non-internal and internal
@@ -562,11 +570,14 @@ arena_choose_hard(tsd_t *tsd, bool internal) {
 				 * number of threads assigned to it.
 				 */
 				for (j = 0; j < 2; j++) {
-					if (arena_nthreads_get(arena_get(
-					    tsd_tsdn(tsd), i, false), !!j) <
-					    arena_nthreads_get(arena_get(
-					    tsd_tsdn(tsd), choose[j], false),
-					    !!j)) {
+					if (arena_nthreads_get(
+					        arena_get(
+					            tsd_tsdn(tsd), i, false),
+					        !!j)
+					    < arena_nthreads_get(
+					        arena_get(tsd_tsdn(tsd),
+					            choose[j], false),
+					        !!j)) {
 						choose[j] = i;
 					}
 				}
@@ -585,16 +596,17 @@ arena_choose_hard(tsd_t *tsd, bool internal) {
 		}
 
 		for (j = 0; j < 2; j++) {
-			if (arena_nthreads_get(arena_get(tsd_tsdn(tsd),
-			    choose[j], false), !!j) == 0 || first_null ==
-			    narenas_auto) {
+			if (arena_nthreads_get(
+			        arena_get(tsd_tsdn(tsd), choose[j], false), !!j)
+			        == 0
+			    || first_null == narenas_auto) {
 				/*
 				 * Use an unloaded arena, or the least loaded
 				 * arena if all arenas are already initialized.
 				 */
 				if (!!j == internal) {
-					ret = arena_get(tsd_tsdn(tsd),
-					    choose[j], false);
+					ret = arena_get(
+					    tsd_tsdn(tsd), choose[j], false);
 				}
 			} else {
 				arena_t *arena;
@@ -604,8 +616,8 @@ arena_choose_hard(tsd_t *tsd, bool internal) {
 				arena = arena_init_locked(tsd_tsdn(tsd),
 				    choose[j], &arena_config_default);
 				if (arena == NULL) {
-					malloc_mutex_unlock(tsd_tsdn(tsd),
-					    &arenas_lock);
+					malloc_mutex_unlock(
+					    tsd_tsdn(tsd), &arenas_lock);
 					return NULL;
 				}
 				is_new_arena[j] = true;
@@ -657,7 +669,7 @@ arena_cleanup(tsd_t *tsd) {
 static void
 stats_print_atexit(void) {
 	if (config_stats) {
-		tsdn_t *tsdn;
+		tsdn_t  *tsdn;
 		unsigned narenas, i;
 
 		tsdn = tsdn_fetch();
@@ -675,13 +687,13 @@ stats_print_atexit(void) {
 				tcache_slow_t *tcache_slow;
 
 				malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
-				ql_foreach(tcache_slow, &arena->tcache_ql,
-				    link) {
-					tcache_stats_merge(tsdn,
-					    tcache_slow->tcache, arena);
+				ql_foreach (
+				    tcache_slow, &arena->tcache_ql, link) {
+					tcache_stats_merge(
+					    tsdn, tcache_slow->tcache, arena);
 				}
-				malloc_mutex_unlock(tsdn,
-				    &arena->tcache_ql_mtx);
+				malloc_mutex_unlock(
+				    tsdn, &arena->tcache_ql_mtx);
 			}
 		}
 	}
@@ -726,16 +738,16 @@ jemalloc_getenv(const char *name) {
 #ifdef JEMALLOC_FORCE_GETENV
 	return getenv(name);
 #else
-#  ifdef JEMALLOC_HAVE_SECURE_GETENV
+#	ifdef JEMALLOC_HAVE_SECURE_GETENV
 	return secure_getenv(name);
-#  else
-#    ifdef JEMALLOC_HAVE_ISSETUGID
+#	else
+#		ifdef JEMALLOC_HAVE_ISSETUGID
 	if (issetugid() != 0) {
 		return NULL;
 	}
-#    endif
+#		endif
 	return getenv(name);
-#  endif
+#	endif
 #endif
 }
 
@@ -759,16 +771,16 @@ malloc_ncpus(void) {
 	 * is available, to avoid using more arenas than necessary.
 	 */
 	{
-#  if defined(__FreeBSD__) || defined(__DragonFly__)
+#	if defined(__FreeBSD__) || defined(__DragonFly__)
 		cpuset_t set;
-#  else
+#	else
 		cpu_set_t set;
-#  endif
-#  if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
+#	endif
+#	if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
 		sched_getaffinity(0, sizeof(set), &set);
-#  else
+#	else
 		pthread_getaffinity_np(pthread_self(), sizeof(set), &set);
-#  endif
+#	endif
 		result = CPU_COUNT(&set);
 	}
 #else
@@ -785,8 +797,7 @@ malloc_ncpus(void) {
  * Since otherwise tricky things is possible with percpu arenas in use.
  */
 static bool
-malloc_cpu_count_is_deterministic(void)
-{
+malloc_cpu_count_is_deterministic(void) {
 #ifdef _WIN32
 	return true;
 #else
@@ -795,22 +806,22 @@ malloc_cpu_count_is_deterministic(void)
 	if (cpu_onln != cpu_conf) {
 		return false;
 	}
-#  if defined(CPU_COUNT)
-#    if defined(__FreeBSD__) || defined(__DragonFly__)
+#	if defined(CPU_COUNT)
+#		if defined(__FreeBSD__) || defined(__DragonFly__)
 	cpuset_t set;
-#    else
+#		else
 	cpu_set_t set;
-#    endif /* __FreeBSD__ */
-#    if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
+#		endif /* __FreeBSD__ */
+#		if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
 	sched_getaffinity(0, sizeof(set), &set);
-#    else /* !JEMALLOC_HAVE_SCHED_SETAFFINITY */
+#		else  /* !JEMALLOC_HAVE_SCHED_SETAFFINITY */
 	pthread_getaffinity_np(pthread_self(), sizeof(set), &set);
-#    endif /* JEMALLOC_HAVE_SCHED_SETAFFINITY */
+#		endif /* JEMALLOC_HAVE_SCHED_SETAFFINITY */
 	long cpu_affinity = CPU_COUNT(&set);
 	if (cpu_affinity != cpu_conf) {
 		return false;
 	}
-#  endif /* CPU_COUNT */
+#	endif         /* CPU_COUNT */
 	return true;
 #endif
 }
@@ -822,10 +833,13 @@ init_opt_stats_opts(const char *v, size_t vlen, char *dest) {
 
 	for (size_t i = 0; i < vlen; i++) {
 		switch (v[i]) {
-#define OPTION(o, v, d, s) case o: break;
+#define OPTION(o, v, d, s)                                                     \
+	case o:                                                                \
+		break;
 			STATS_PRINT_OPTIONS
 #undef OPTION
-		default: continue;
+		default:
+			continue;
 		}
 
 		if (strchr(dest, v[i]) != NULL) {
@@ -851,25 +865,75 @@ malloc_conf_format_error(const char *msg, const char *begin, const char *end) {
 static bool
 malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p,
     char const **v_p, size_t *vlen_p) {
-	bool accept;
+	bool        accept;
 	const char *opts = *opts_p;
 
 	*k_p = opts;
 
 	for (accept = false; !accept;) {
 		switch (*opts) {
-		case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
-		case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
-		case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
-		case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
-		case 'Y': case 'Z':
-		case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
-		case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
-		case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
-		case 's': case 't': case 'u': case 'v': case 'w': case 'x':
-		case 'y': case 'z':
-		case '0': case '1': case '2': case '3': case '4': case '5':
-		case '6': case '7': case '8': case '9':
+		case 'A':
+		case 'B':
+		case 'C':
+		case 'D':
+		case 'E':
+		case 'F':
+		case 'G':
+		case 'H':
+		case 'I':
+		case 'J':
+		case 'K':
+		case 'L':
+		case 'M':
+		case 'N':
+		case 'O':
+		case 'P':
+		case 'Q':
+		case 'R':
+		case 'S':
+		case 'T':
+		case 'U':
+		case 'V':
+		case 'W':
+		case 'X':
+		case 'Y':
+		case 'Z':
+		case 'a':
+		case 'b':
+		case 'c':
+		case 'd':
+		case 'e':
+		case 'f':
+		case 'g':
+		case 'h':
+		case 'i':
+		case 'j':
+		case 'k':
+		case 'l':
+		case 'm':
+		case 'n':
+		case 'o':
+		case 'p':
+		case 'q':
+		case 'r':
+		case 's':
+		case 't':
+		case 'u':
+		case 'v':
+		case 'w':
+		case 'x':
+		case 'y':
+		case 'z':
+		case '0':
+		case '1':
+		case '2':
+		case '3':
+		case '4':
+		case '5':
+		case '6':
+		case '7':
+		case '8':
+		case '9':
 		case '_':
 			opts++;
 			break;
@@ -882,8 +946,8 @@ malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p,
 		case '\0':
 			if (opts != *opts_p) {
 				malloc_conf_format_error(
-				    "Conf string ends with key",
-				    *opts_p, opts - 1);
+				    "Conf string ends with key", *opts_p,
+				    opts - 1);
 				had_conf_error = true;
 			}
 			return true;
@@ -908,8 +972,8 @@ malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p,
 			 */
 			if (*opts == '\0') {
 				malloc_conf_format_error(
-				    "Conf string ends with comma",
-				    *opts_p, opts - 1);
+				    "Conf string ends with comma", *opts_p,
+				    opts - 1);
 				had_conf_error = true;
 			}
 			*vlen_p = (uintptr_t)opts - 1 - (uintptr_t)*v_p;
@@ -932,16 +996,17 @@ malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p,
 static void
 malloc_abort_invalid_conf(void) {
 	assert(opt_abort_conf);
-	malloc_printf("<jemalloc>: Abort (abort_conf:true) on invalid conf "
+	malloc_printf(
+	    "<jemalloc>: Abort (abort_conf:true) on invalid conf "
 	    "value (see above).\n");
 	invalid_conf_abort();
 }
 
 static void
-malloc_conf_error(const char *msg, const char *k, size_t klen, const char *v,
-    size_t vlen) {
-	malloc_printf("<jemalloc>: %s: %.*s:%.*s\n", msg, (int)klen, k,
-	    (int)vlen, v);
+malloc_conf_error(
+    const char *msg, const char *k, size_t klen, const char *v, size_t vlen) {
+	malloc_printf(
+	    "<jemalloc>: %s: %.*s:%.*s\n", msg, (int)klen, k, (int)vlen, v);
 	/* If abort_conf is set, error out after processing all options. */
 	const char *experimental = "experimental_";
 	if (strncmp(k, experimental, strlen(experimental)) == 0) {
@@ -1002,48 +1067,50 @@ obtain_malloc_conf(unsigned which_source, char readlink_buf[PATH_MAX + 1]) {
 		break;
 #else
 		ssize_t linklen = 0;
-#  ifndef _WIN32
-		int saved_errno = errno;
+#	ifndef _WIN32
+		int         saved_errno = errno;
 		const char *linkname =
-#    ifdef JEMALLOC_PREFIX
-		    "/etc/"JEMALLOC_PREFIX"malloc.conf"
-#    else
+#		ifdef JEMALLOC_PREFIX
+		    "/etc/" JEMALLOC_PREFIX "malloc.conf"
+#		else
 		    "/etc/malloc.conf"
-#    endif
+#		endif
 		    ;
 
 		/*
 		 * Try to use the contents of the "/etc/malloc.conf" symbolic
 		 * link's name.
 		 */
-#    ifndef JEMALLOC_READLINKAT
+#		ifndef JEMALLOC_READLINKAT
 		linklen = readlink(linkname, readlink_buf, PATH_MAX);
-#    else
-		linklen = readlinkat(AT_FDCWD, linkname, readlink_buf, PATH_MAX);
-#    endif
+#		else
+		linklen = readlinkat(
+		    AT_FDCWD, linkname, readlink_buf, PATH_MAX);
+#		endif
 		if (linklen == -1) {
 			/* No configuration specified. */
 			linklen = 0;
 			/* Restore errno. */
 			set_errno(saved_errno);
 		}
-#  endif
+#	endif
 		readlink_buf[linklen] = '\0';
 		ret = readlink_buf;
 		break;
 #endif
-	} case 3: {
+	}
+	case 3: {
 #ifndef JEMALLOC_CONFIG_ENV
 		ret = NULL;
 		break;
 #else
 		const char *envname =
-#  ifdef JEMALLOC_PREFIX
-			JEMALLOC_CPREFIX"MALLOC_CONF"
-#  else
-			"MALLOC_CONF"
-#  endif
-			;
+#	ifdef JEMALLOC_PREFIX
+		    JEMALLOC_CPREFIX "MALLOC_CONF"
+#	else
+		    "MALLOC_CONF"
+#	endif
+		    ;
 
 		if ((ret = jemalloc_getenv(envname)) != NULL) {
 			opt_malloc_conf_env_var = ret;
@@ -1053,10 +1120,12 @@ obtain_malloc_conf(unsigned which_source, char readlink_buf[PATH_MAX + 1]) {
 		}
 		break;
 #endif
-	} case 4: {
+	}
+	case 4: {
 		ret = je_malloc_conf_2_conf_harder;
 		break;
-	} default:
+	}
+	default:
 		not_reached();
 		ret = NULL;
 	}
@@ -1072,15 +1141,16 @@ validate_hpa_settings(void) {
 		had_conf_error = true;
 		malloc_printf(
 		    "<jemalloc>: huge page size (%zu) greater than expected."
-		    "May not be supported or behave as expected.", HUGEPAGE);
+		    "May not be supported or behave as expected.",
+		    HUGEPAGE);
 	}
 #ifndef JEMALLOC_HAVE_MADVISE_COLLAPSE
 	if (opt_hpa_opts.hugify_sync) {
-	       had_conf_error = true;
-	       malloc_printf(
-		   "<jemalloc>: hpa_hugify_sync config option is enabled, "
-		   "but MADV_COLLAPSE support was not detected at build "
-		   "time.");
+		had_conf_error = true;
+		malloc_printf(
+		    "<jemalloc>: hpa_hugify_sync config option is enabled, "
+		    "but MADV_COLLAPSE support was not detected at build "
+		    "time.");
 	}
 #endif
 }
@@ -1090,17 +1160,17 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
     bool initial_call, const char *opts_cache[MALLOC_CONF_NSOURCES],
     char readlink_buf[PATH_MAX + 1]) {
 	static const char *opts_explain[MALLOC_CONF_NSOURCES] = {
-		"string specified via --with-malloc-conf",
-		"string pointed to by the global variable malloc_conf",
-		"\"name\" of the file referenced by the symbolic link named "
-		    "/etc/malloc.conf",
-		"value of the environment variable MALLOC_CONF",
-		"string pointed to by the global variable "
-		    "malloc_conf_2_conf_harder",
+	    "string specified via --with-malloc-conf",
+	    "string pointed to by the global variable malloc_conf",
+	    "\"name\" of the file referenced by the symbolic link named "
+	    "/etc/malloc.conf",
+	    "value of the environment variable MALLOC_CONF",
+	    "string pointed to by the global variable "
+	    "malloc_conf_2_conf_harder",
 	};
-	unsigned i;
+	unsigned    i;
 	const char *opts, *k, *v;
-	size_t klen, vlen;
+	size_t      klen, vlen;
 
 	for (i = 0; i < MALLOC_CONF_NSOURCES; i++) {
 		/* Get runtime configuration. */
@@ -1110,129 +1180,116 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 		opts = opts_cache[i];
 		if (!initial_call && opt_confirm_conf) {
 			malloc_printf(
-			    "<jemalloc>: malloc_conf #%u (%s): \"%s\"\n",
-			    i + 1, opts_explain[i], opts != NULL ? opts : "");
+			    "<jemalloc>: malloc_conf #%u (%s): \"%s\"\n", i + 1,
+			    opts_explain[i], opts != NULL ? opts : "");
 		}
 		if (opts == NULL) {
 			continue;
 		}
 
-		while (*opts != '\0' && !malloc_conf_next(&opts, &k, &klen, &v,
-		    &vlen)) {
-
-#define CONF_ERROR(msg, k, klen, v, vlen)				\
-			if (!initial_call) {				\
-				malloc_conf_error(			\
-				    msg, k, klen, v, vlen);		\
-				cur_opt_valid = false;			\
-			}
-#define CONF_CONTINUE	{						\
-				if (!initial_call && opt_confirm_conf	\
-				    && cur_opt_valid) {			\
-					malloc_printf("<jemalloc>: -- "	\
-					    "Set conf value: %.*s:%.*s"	\
-					    "\n", (int)klen, k,		\
-					    (int)vlen, v);		\
-				}					\
-				continue;				\
-			}
-#define CONF_MATCH(n)							\
-	(sizeof(n)-1 == klen && strncmp(n, k, klen) == 0)
-#define CONF_MATCH_VALUE(n)						\
-	(sizeof(n)-1 == vlen && strncmp(n, v, vlen) == 0)
-#define CONF_HANDLE_BOOL(o, n)						\
-			if (CONF_MATCH(n)) {				\
-				if (CONF_MATCH_VALUE("true")) {		\
-					o = true;			\
-				} else if (CONF_MATCH_VALUE("false")) {	\
-					o = false;			\
-				} else {				\
-					CONF_ERROR("Invalid conf value",\
-					    k, klen, v, vlen);		\
-				}					\
-				CONF_CONTINUE;				\
-			}
-      /*
+		while (*opts != '\0'
+		    && !malloc_conf_next(&opts, &k, &klen, &v, &vlen)) {
+#define CONF_ERROR(msg, k, klen, v, vlen)                                      \
+	if (!initial_call) {                                                   \
+		malloc_conf_error(msg, k, klen, v, vlen);                      \
+		cur_opt_valid = false;                                         \
+	}
+#define CONF_CONTINUE                                                          \
+	{                                                                      \
+		if (!initial_call && opt_confirm_conf && cur_opt_valid) {      \
+			malloc_printf(                                         \
+			    "<jemalloc>: -- "                                  \
+			    "Set conf value: %.*s:%.*s"                        \
+			    "\n",                                              \
+			    (int)klen, k, (int)vlen, v);                       \
+		}                                                              \
+		continue;                                                      \
+	}
+#define CONF_MATCH(n) (sizeof(n) - 1 == klen && strncmp(n, k, klen) == 0)
+#define CONF_MATCH_VALUE(n) (sizeof(n) - 1 == vlen && strncmp(n, v, vlen) == 0)
+#define CONF_HANDLE_BOOL(o, n)                                                 \
+	if (CONF_MATCH(n)) {                                                   \
+		if (CONF_MATCH_VALUE("true")) {                                \
+			o = true;                                              \
+		} else if (CONF_MATCH_VALUE("false")) {                        \
+			o = false;                                             \
+		} else {                                                       \
+			CONF_ERROR("Invalid conf value", k, klen, v, vlen);    \
+		}                                                              \
+		CONF_CONTINUE;                                                 \
+	}
+			/*
        * One of the CONF_MIN macros below expands, in one of the use points,
        * to "unsigned integer < 0", which is always false, triggering the
        * GCC -Wtype-limits warning, which we disable here and re-enable below.
        */
-      JEMALLOC_DIAGNOSTIC_PUSH
-      JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS
+			JEMALLOC_DIAGNOSTIC_PUSH
+			JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS
 
-#define CONF_DONT_CHECK_MIN(um, min)	false
-#define CONF_CHECK_MIN(um, min)	((um) < (min))
-#define CONF_DONT_CHECK_MAX(um, max)	false
-#define CONF_CHECK_MAX(um, max)	((um) > (max))
+#define CONF_DONT_CHECK_MIN(um, min) false
+#define CONF_CHECK_MIN(um, min) ((um) < (min))
+#define CONF_DONT_CHECK_MAX(um, max) false
+#define CONF_CHECK_MAX(um, max) ((um) > (max))
 
-#define CONF_VALUE_READ(max_t, result)					\
-	      char *end;						\
-	      set_errno(0);						\
-	      result = (max_t)malloc_strtoumax(v, &end, 0);
-#define CONF_VALUE_READ_FAIL()						\
-	      (get_errno() != 0 || (uintptr_t)end - (uintptr_t)v != vlen)
+#define CONF_VALUE_READ(max_t, result)                                         \
+	char *end;                                                             \
+	set_errno(0);                                                          \
+	result = (max_t)malloc_strtoumax(v, &end, 0);
+#define CONF_VALUE_READ_FAIL()                                                 \
+	(get_errno() != 0 || (uintptr_t)end - (uintptr_t)v != vlen)
 
-#define CONF_HANDLE_T(t, max_t, o, n, min, max, check_min, check_max, clip) \
-			if (CONF_MATCH(n)) {				\
-				max_t mv;				\
-				CONF_VALUE_READ(max_t, mv)		\
-				if (CONF_VALUE_READ_FAIL()) {		\
-					CONF_ERROR("Invalid conf value",\
-					    k, klen, v, vlen);		\
-				} else if (clip) {			\
-					if (check_min(mv, (t)(min))) {	\
-						o = (t)(min);		\
-					} else if (			\
-					    check_max(mv, (t)(max))) {	\
-						o = (t)(max);		\
-					} else {			\
-						o = (t)mv;		\
-					}				\
-				} else {				\
-					if (check_min(mv, (t)(min)) ||	\
-					    check_max(mv, (t)(max))) {	\
-						CONF_ERROR(		\
-						    "Out-of-range "	\
-						    "conf value",	\
-						    k, klen, v, vlen);	\
-					} else {			\
-						o = (t)mv;		\
-					}				\
-				}					\
-				CONF_CONTINUE;				\
-			}
-#define CONF_HANDLE_T_U(t, o, n, min, max, check_min, check_max, clip)	\
-	      CONF_HANDLE_T(t, uintmax_t, o, n, min, max, check_min,	\
-			    check_max, clip)
-#define CONF_HANDLE_T_SIGNED(t, o, n, min, max, check_min, check_max, clip)\
-	      CONF_HANDLE_T(t, intmax_t, o, n, min, max, check_min,	\
-			    check_max, clip)
+#define CONF_HANDLE_T(t, max_t, o, n, min, max, check_min, check_max, clip)    \
+	if (CONF_MATCH(n)) {                                                   \
+		max_t mv;                                                      \
+		CONF_VALUE_READ(max_t, mv)                                     \
+		if (CONF_VALUE_READ_FAIL()) {                                  \
+			CONF_ERROR("Invalid conf value", k, klen, v, vlen);    \
+		} else if (clip) {                                             \
+			if (check_min(mv, (t)(min))) {                         \
+				o = (t)(min);                                  \
+			} else if (check_max(mv, (t)(max))) {                  \
+				o = (t)(max);                                  \
+			} else {                                               \
+				o = (t)mv;                                     \
+			}                                                      \
+		} else {                                                       \
+			if (check_min(mv, (t)(min))                            \
+			    || check_max(mv, (t)(max))) {                      \
+				CONF_ERROR(                                    \
+				    "Out-of-range "                            \
+				    "conf value",                              \
+				    k, klen, v, vlen);                         \
+			} else {                                               \
+				o = (t)mv;                                     \
+			}                                                      \
+		}                                                              \
+		CONF_CONTINUE;                                                 \
+	}
+#define CONF_HANDLE_T_U(t, o, n, min, max, check_min, check_max, clip)         \
+	CONF_HANDLE_T(t, uintmax_t, o, n, min, max, check_min, check_max, clip)
+#define CONF_HANDLE_T_SIGNED(t, o, n, min, max, check_min, check_max, clip)    \
+	CONF_HANDLE_T(t, intmax_t, o, n, min, max, check_min, check_max, clip)
 
-#define CONF_HANDLE_UNSIGNED(o, n, min, max, check_min, check_max,	\
-    clip)								\
-			CONF_HANDLE_T_U(unsigned, o, n, min, max,	\
-			    check_min, check_max, clip)
-#define CONF_HANDLE_SIZE_T(o, n, min, max, check_min, check_max, clip)	\
-			CONF_HANDLE_T_U(size_t, o, n, min, max,		\
-			    check_min, check_max, clip)
-#define CONF_HANDLE_INT64_T(o, n, min, max, check_min, check_max, clip)	\
-			CONF_HANDLE_T_SIGNED(int64_t, o, n, min, max,	\
-			    check_min, check_max, clip)
-#define CONF_HANDLE_UINT64_T(o, n, min, max, check_min, check_max, clip)\
-			CONF_HANDLE_T_U(uint64_t, o, n, min, max,	\
-			    check_min, check_max, clip)
-#define CONF_HANDLE_SSIZE_T(o, n, min, max)				\
-			CONF_HANDLE_T_SIGNED(ssize_t, o, n, min, max,	\
-			    CONF_CHECK_MIN, CONF_CHECK_MAX, false)
-#define CONF_HANDLE_CHAR_P(o, n, d)					\
-			if (CONF_MATCH(n)) {				\
-				size_t cpylen = (vlen <=		\
-				    sizeof(o)-1) ? vlen :		\
-				    sizeof(o)-1;			\
-				strncpy(o, v, cpylen);			\
-				o[cpylen] = '\0';			\
-				CONF_CONTINUE;				\
-			}
+#define CONF_HANDLE_UNSIGNED(o, n, min, max, check_min, check_max, clip)       \
+	CONF_HANDLE_T_U(unsigned, o, n, min, max, check_min, check_max, clip)
+#define CONF_HANDLE_SIZE_T(o, n, min, max, check_min, check_max, clip)         \
+	CONF_HANDLE_T_U(size_t, o, n, min, max, check_min, check_max, clip)
+#define CONF_HANDLE_INT64_T(o, n, min, max, check_min, check_max, clip)        \
+	CONF_HANDLE_T_SIGNED(                                                  \
+	    int64_t, o, n, min, max, check_min, check_max, clip)
+#define CONF_HANDLE_UINT64_T(o, n, min, max, check_min, check_max, clip)       \
+	CONF_HANDLE_T_U(uint64_t, o, n, min, max, check_min, check_max, clip)
+#define CONF_HANDLE_SSIZE_T(o, n, min, max)                                    \
+	CONF_HANDLE_T_SIGNED(                                                  \
+	    ssize_t, o, n, min, max, CONF_CHECK_MIN, CONF_CHECK_MAX, false)
+#define CONF_HANDLE_CHAR_P(o, n, d)                                            \
+	if (CONF_MATCH(n)) {                                                   \
+		size_t cpylen = (vlen <= sizeof(o) - 1) ? vlen                 \
+		                                        : sizeof(o) - 1;       \
+		strncpy(o, v, cpylen);                                         \
+		o[cpylen] = '\0';                                              \
+		CONF_CONTINUE;                                                 \
+	}
 
 			bool cur_opt_valid = true;
 
@@ -1245,27 +1302,29 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			CONF_HANDLE_BOOL(opt_abort_conf, "abort_conf")
 			CONF_HANDLE_BOOL(opt_cache_oblivious, "cache_oblivious")
 			CONF_HANDLE_BOOL(opt_trust_madvise, "trust_madvise")
-			CONF_HANDLE_BOOL(opt_huge_arena_pac_thp, "huge_arena_pac_thp")
+			CONF_HANDLE_BOOL(
+			    opt_huge_arena_pac_thp, "huge_arena_pac_thp")
 			if (strncmp("metadata_thp", k, klen) == 0) {
-				int m;
+				int  m;
 				bool match = false;
 				for (m = 0; m < metadata_thp_mode_limit; m++) {
 					if (strncmp(metadata_thp_mode_names[m],
-					    v, vlen) == 0) {
+					        v, vlen)
+					    == 0) {
 						opt_metadata_thp = m;
 						match = true;
 						break;
 					}
 				}
 				if (!match) {
-					CONF_ERROR("Invalid conf value",
-					    k, klen, v, vlen);
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
 				}
 				CONF_CONTINUE;
 			}
 			CONF_HANDLE_BOOL(opt_retain, "retain")
 			if (strncmp("dss", k, klen) == 0) {
-				int m;
+				int  m;
 				bool match = false;
 				for (m = 0; m < dss_prec_limit; m++) {
 					if (strncmp(dss_prec_names[m], v, vlen)
@@ -1283,8 +1342,8 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 					}
 				}
 				if (!match) {
-					CONF_ERROR("Invalid conf value",
-					    k, klen, v, vlen);
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
 				}
 				CONF_CONTINUE;
 			}
@@ -1301,31 +1360,32 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			}
 			if (CONF_MATCH("narenas_ratio")) {
 				char *end;
-				bool err = fxp_parse(&opt_narenas_ratio, v,
-				    &end);
+				bool  err = fxp_parse(
+                                    &opt_narenas_ratio, v, &end);
 				if (err || (size_t)(end - v) != vlen) {
-					CONF_ERROR("Invalid conf value",
-					    k, klen, v, vlen);
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
 				}
 				CONF_CONTINUE;
 			}
 			if (CONF_MATCH("bin_shards")) {
 				const char *bin_shards_segment_cur = v;
-				size_t vlen_left = vlen;
+				size_t      vlen_left = vlen;
 				do {
 					size_t size_start;
 					size_t size_end;
 					size_t nshards;
-					bool err = multi_setting_parse_next(
-					    &bin_shards_segment_cur, &vlen_left,
-					    &size_start, &size_end, &nshards);
-					if (err || bin_update_shard_size(
-					    bin_shard_sizes, size_start,
-					    size_end, nshards)) {
+					bool   err = multi_setting_parse_next(
+                                            &bin_shards_segment_cur, &vlen_left,
+                                            &size_start, &size_end, &nshards);
+					if (err
+					    || bin_update_shard_size(
+					        bin_shard_sizes, size_start,
+					        size_end, nshards)) {
 						CONF_ERROR(
 						    "Invalid settings for "
-						    "bin_shards", k, klen, v,
-						    vlen);
+						    "bin_shards",
+						    k, klen, v, vlen);
 						break;
 					}
 				} while (vlen_left > 0);
@@ -1337,12 +1397,11 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    /* clip */ true)
 			CONF_HANDLE_SIZE_T(opt_bin_info_remote_free_max_batch,
 			    "remote_free_max_batch", 0,
-			    BIN_REMOTE_FREE_ELEMS_MAX,
-			    CONF_DONT_CHECK_MIN, CONF_CHECK_MAX,
+			    BIN_REMOTE_FREE_ELEMS_MAX, CONF_DONT_CHECK_MIN,
+			    CONF_CHECK_MAX,
 			    /* clip */ true)
 			CONF_HANDLE_SIZE_T(opt_bin_info_remote_free_max,
-			    "remote_free_max", 0,
-			    BIN_REMOTE_FREE_ELEMS_MAX,
+			    "remote_free_max", 0, BIN_REMOTE_FREE_ELEMS_MAX,
 			    CONF_DONT_CHECK_MIN, CONF_CHECK_MAX,
 			    /* clip */ true)
 
@@ -1350,9 +1409,10 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				bool err = tcache_bin_info_default_init(
 				    v, vlen);
 				if (err) {
-					CONF_ERROR("Invalid settings for "
-					    "tcache_ncached_max", k, klen, v,
-					    vlen);
+					CONF_ERROR(
+					    "Invalid settings for "
+					    "tcache_ncached_max",
+					    k, klen, v, vlen);
 				}
 				CONF_CONTINUE;
 			}
@@ -1360,13 +1420,15 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    "mutex_max_spin", -1, INT64_MAX, CONF_CHECK_MIN,
 			    CONF_DONT_CHECK_MAX, false);
 			CONF_HANDLE_SSIZE_T(opt_dirty_decay_ms,
-			    "dirty_decay_ms", -1, NSTIME_SEC_MAX * KQU(1000) <
-			    QU(SSIZE_MAX) ? NSTIME_SEC_MAX * KQU(1000) :
-			    SSIZE_MAX);
+			    "dirty_decay_ms", -1,
+			    NSTIME_SEC_MAX * KQU(1000) < QU(SSIZE_MAX)
+			        ? NSTIME_SEC_MAX * KQU(1000)
+			        : SSIZE_MAX);
 			CONF_HANDLE_SSIZE_T(opt_muzzy_decay_ms,
-			    "muzzy_decay_ms", -1, NSTIME_SEC_MAX * KQU(1000) <
-			    QU(SSIZE_MAX) ? NSTIME_SEC_MAX * KQU(1000) :
-			    SSIZE_MAX);
+			    "muzzy_decay_ms", -1,
+			    NSTIME_SEC_MAX * KQU(1000) < QU(SSIZE_MAX)
+			        ? NSTIME_SEC_MAX * KQU(1000)
+			        : SSIZE_MAX);
 			CONF_HANDLE_SIZE_T(opt_process_madvise_max_batch,
 			    "process_madvise_max_batch", 0,
 			    PROCESS_MADVISE_MAX_BATCH_LIMIT,
@@ -1374,16 +1436,16 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    /* clip */ true)
 			CONF_HANDLE_BOOL(opt_stats_print, "stats_print")
 			if (CONF_MATCH("stats_print_opts")) {
-				init_opt_stats_opts(v, vlen,
-				    opt_stats_print_opts);
+				init_opt_stats_opts(
+				    v, vlen, opt_stats_print_opts);
 				CONF_CONTINUE;
 			}
 			CONF_HANDLE_INT64_T(opt_stats_interval,
-			    "stats_interval", -1, INT64_MAX,
-			    CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, false)
+			    "stats_interval", -1, INT64_MAX, CONF_CHECK_MIN,
+			    CONF_DONT_CHECK_MAX, false)
 			if (CONF_MATCH("stats_interval_opts")) {
-				init_opt_stats_opts(v, vlen,
-				    opt_stats_interval_opts);
+				init_opt_stats_opts(
+				    v, vlen, opt_stats_interval_opts);
 				CONF_CONTINUE;
 			}
 			if (config_fill) {
@@ -1405,8 +1467,7 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 						opt_junk_alloc = false;
 						opt_junk_free = true;
 					} else {
-						CONF_ERROR(
-						    "Invalid conf value",
+						CONF_ERROR("Invalid conf value",
 						    k, klen, v, vlen);
 					}
 					CONF_CONTINUE;
@@ -1428,15 +1489,15 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			CONF_HANDLE_BOOL(opt_experimental_tcache_gc,
 			    "experimental_tcache_gc")
 			CONF_HANDLE_BOOL(opt_tcache, "tcache")
-			CONF_HANDLE_SIZE_T(opt_tcache_max, "tcache_max",
-			    0, TCACHE_MAXCLASS_LIMIT, CONF_DONT_CHECK_MIN,
+			CONF_HANDLE_SIZE_T(opt_tcache_max, "tcache_max", 0,
+			    TCACHE_MAXCLASS_LIMIT, CONF_DONT_CHECK_MIN,
 			    CONF_CHECK_MAX, /* clip */ true)
 			if (CONF_MATCH("lg_tcache_max")) {
 				size_t m;
 				CONF_VALUE_READ(size_t, m)
 				if (CONF_VALUE_READ_FAIL()) {
-					CONF_ERROR("Invalid conf value",
-					    k, klen, v, vlen);
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
 				} else {
 					/* clip if necessary */
 					if (m > TCACHE_LG_MAXCLASS_LIMIT) {
@@ -1454,14 +1515,14 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    "lg_tcache_nslots_mul", -16, 16)
 			/* Ditto with values past 2048. */
 			CONF_HANDLE_UNSIGNED(opt_tcache_nslots_small_min,
-			    "tcache_nslots_small_min", 1, 2048,
-			    CONF_CHECK_MIN, CONF_CHECK_MAX, /* clip */ true)
+			    "tcache_nslots_small_min", 1, 2048, CONF_CHECK_MIN,
+			    CONF_CHECK_MAX, /* clip */ true)
 			CONF_HANDLE_UNSIGNED(opt_tcache_nslots_small_max,
-			    "tcache_nslots_small_max", 1, 2048,
-			    CONF_CHECK_MIN, CONF_CHECK_MAX, /* clip */ true)
+			    "tcache_nslots_small_max", 1, 2048, CONF_CHECK_MIN,
+			    CONF_CHECK_MAX, /* clip */ true)
 			CONF_HANDLE_UNSIGNED(opt_tcache_nslots_large,
-			    "tcache_nslots_large", 1, 2048,
-			    CONF_CHECK_MIN, CONF_CHECK_MAX, /* clip */ true)
+			    "tcache_nslots_large", 1, 2048, CONF_CHECK_MIN,
+			    CONF_CHECK_MAX, /* clip */ true)
 			CONF_HANDLE_SIZE_T(opt_tcache_gc_incr_bytes,
 			    "tcache_gc_incr_bytes", 1024, SIZE_T_MAX,
 			    CONF_CHECK_MIN, CONF_DONT_CHECK_MAX,
@@ -1471,18 +1532,19 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX,
 			    /* clip */ false)
 			CONF_HANDLE_UNSIGNED(opt_lg_tcache_flush_small_div,
-			    "lg_tcache_flush_small_div", 1, 16,
-			    CONF_CHECK_MIN, CONF_CHECK_MAX, /* clip */ true)
+			    "lg_tcache_flush_small_div", 1, 16, CONF_CHECK_MIN,
+			    CONF_CHECK_MAX, /* clip */ true)
 			CONF_HANDLE_UNSIGNED(opt_lg_tcache_flush_large_div,
-			    "lg_tcache_flush_large_div", 1, 16,
-			    CONF_CHECK_MIN, CONF_CHECK_MAX, /* clip */ true)
+			    "lg_tcache_flush_large_div", 1, 16, CONF_CHECK_MIN,
+			    CONF_CHECK_MAX, /* clip */ true)
 			CONF_HANDLE_UNSIGNED(opt_debug_double_free_max_scan,
 			    "debug_double_free_max_scan", 0, UINT_MAX,
 			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX,
 			    /* clip */ false)
 			CONF_HANDLE_SIZE_T(opt_calloc_madvise_threshold,
 			    "calloc_madvise_threshold", 0, SC_LARGE_MAXCLASS,
-			    CONF_DONT_CHECK_MIN, CONF_CHECK_MAX, /* clip */ false)
+			    CONF_DONT_CHECK_MIN, CONF_CHECK_MAX,
+			    /* clip */ false)
 
 			/*
 			 * The runtime option of oversize_threshold remains
@@ -1502,10 +1564,11 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 
 			if (strncmp("percpu_arena", k, klen) == 0) {
 				bool match = false;
-				for (int m = percpu_arena_mode_names_base; m <
-				    percpu_arena_mode_names_limit; m++) {
+				for (int m = percpu_arena_mode_names_base;
+				     m < percpu_arena_mode_names_limit; m++) {
 					if (strncmp(percpu_arena_mode_names[m],
-					    v, vlen) == 0) {
+					        v, vlen)
+					    == 0) {
 						if (!have_percpu_arena) {
 							CONF_ERROR(
 							    "No getcpu support",
@@ -1517,18 +1580,17 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 					}
 				}
 				if (!match) {
-					CONF_ERROR("Invalid conf value",
-					    k, klen, v, vlen);
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
 				}
 				CONF_CONTINUE;
 			}
-			CONF_HANDLE_BOOL(opt_background_thread,
-			    "background_thread");
+			CONF_HANDLE_BOOL(
+			    opt_background_thread, "background_thread");
 			CONF_HANDLE_SIZE_T(opt_max_background_threads,
-					   "max_background_threads", 1,
-					   opt_max_background_threads,
-					   CONF_CHECK_MIN, CONF_CHECK_MAX,
-					   true);
+			    "max_background_threads", 1,
+			    opt_max_background_threads, CONF_CHECK_MIN,
+			    CONF_CHECK_MAX, true);
 			CONF_HANDLE_BOOL(opt_hpa, "hpa")
 			CONF_HANDLE_SIZE_T(opt_hpa_opts.slab_max_alloc,
 			    "hpa_slab_max_alloc", PAGE, HUGEPAGE,
@@ -1544,12 +1606,11 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			if (CONF_MATCH("hpa_hugification_threshold_ratio")) {
 				fxp_t ratio;
 				char *end;
-				bool err = fxp_parse(&ratio, v,
-				    &end);
+				bool  err = fxp_parse(&ratio, v, &end);
 				if (err || (size_t)(end - v) != vlen
 				    || ratio > FXP_INIT_INT(1)) {
-					CONF_ERROR("Invalid conf value",
-					    k, klen, v, vlen);
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
 				} else {
 					opt_hpa_opts.hugification_threshold =
 					    fxp_mul_frac(HUGEPAGE, ratio);
@@ -1557,16 +1618,14 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				CONF_CONTINUE;
 			}
 
-			CONF_HANDLE_UINT64_T(
-			    opt_hpa_opts.hugify_delay_ms, "hpa_hugify_delay_ms",
-			    0, 0, CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX,
-			    false);
+			CONF_HANDLE_UINT64_T(opt_hpa_opts.hugify_delay_ms,
+			    "hpa_hugify_delay_ms", 0, 0, CONF_DONT_CHECK_MIN,
+			    CONF_DONT_CHECK_MAX, false);
 
 			CONF_HANDLE_BOOL(
 			    opt_hpa_opts.hugify_sync, "hpa_hugify_sync");
 
-			CONF_HANDLE_UINT64_T(
-			    opt_hpa_opts.min_purge_interval_ms,
+			CONF_HANDLE_UINT64_T(opt_hpa_opts.min_purge_interval_ms,
 			    "hpa_min_purge_interval_ms", 0, 0,
 			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, false);
 
@@ -1581,11 +1640,10 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				}
 				fxp_t ratio;
 				char *end;
-				bool err = fxp_parse(&ratio, v,
-				    &end);
+				bool  err = fxp_parse(&ratio, v, &end);
 				if (err || (size_t)(end - v) != vlen) {
-					CONF_ERROR("Invalid conf value",
-					    k, klen, v, vlen);
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
 				} else {
 					opt_hpa_opts.dirty_mult = ratio;
 				}
@@ -1596,8 +1654,9 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    "hpa_sec_nshards", 0, 0, CONF_CHECK_MIN,
 			    CONF_DONT_CHECK_MAX, true);
 			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.max_alloc,
-			    "hpa_sec_max_alloc", PAGE, USIZE_GROW_SLOW_THRESHOLD,
-			    CONF_CHECK_MIN, CONF_CHECK_MAX, true);
+			    "hpa_sec_max_alloc", PAGE,
+			    USIZE_GROW_SLOW_THRESHOLD, CONF_CHECK_MIN,
+			    CONF_CHECK_MAX, true);
 			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.max_bytes,
 			    "hpa_sec_max_bytes", PAGE, 0, CONF_CHECK_MIN,
 			    CONF_DONT_CHECK_MAX, true);
@@ -1613,23 +1672,23 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 					sc_data_init(sc_data);
 					CONF_CONTINUE;
 				}
-				bool err;
+				bool        err;
 				const char *slab_size_segment_cur = v;
-				size_t vlen_left = vlen;
+				size_t      vlen_left = vlen;
 				do {
 					size_t slab_start;
 					size_t slab_end;
 					size_t pgs;
 					err = multi_setting_parse_next(
-					    &slab_size_segment_cur,
-					    &vlen_left, &slab_start, &slab_end,
-					    &pgs);
+					    &slab_size_segment_cur, &vlen_left,
+					    &slab_start, &slab_end, &pgs);
 					if (!err) {
 						sc_data_update_slab_size(
 						    sc_data, slab_start,
 						    slab_end, (int)pgs);
 					} else {
-						CONF_ERROR("Invalid settings "
+						CONF_ERROR(
+						    "Invalid settings "
 						    "for slab_sizes",
 						    k, klen, v, vlen);
 					}
@@ -1638,22 +1697,24 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			}
 			if (config_prof) {
 				CONF_HANDLE_BOOL(opt_prof, "prof")
-				CONF_HANDLE_CHAR_P(opt_prof_prefix,
-				    "prof_prefix", "jeprof")
+				CONF_HANDLE_CHAR_P(
+				    opt_prof_prefix, "prof_prefix", "jeprof")
 				CONF_HANDLE_BOOL(opt_prof_active, "prof_active")
 				CONF_HANDLE_BOOL(opt_prof_thread_active_init,
 				    "prof_thread_active_init")
 				CONF_HANDLE_SIZE_T(opt_lg_prof_sample,
-				    "lg_prof_sample", 0, (sizeof(uint64_t) << 3)
-				    - 1, CONF_DONT_CHECK_MIN, CONF_CHECK_MAX,
-				    true)
-				CONF_HANDLE_SIZE_T(opt_experimental_lg_prof_threshold,
-				    "experimental_lg_prof_threshold", 0, (sizeof(uint64_t) << 3)
-				    - 1, CONF_DONT_CHECK_MIN, CONF_CHECK_MAX,
-				    true)
+				    "lg_prof_sample", 0,
+				    (sizeof(uint64_t) << 3) - 1,
+				    CONF_DONT_CHECK_MIN, CONF_CHECK_MAX, true)
+				CONF_HANDLE_SIZE_T(
+				    opt_experimental_lg_prof_threshold,
+				    "experimental_lg_prof_threshold", 0,
+				    (sizeof(uint64_t) << 3) - 1,
+				    CONF_DONT_CHECK_MIN, CONF_CHECK_MAX, true)
 				CONF_HANDLE_BOOL(opt_prof_accum, "prof_accum")
-				CONF_HANDLE_UNSIGNED(opt_prof_bt_max, "prof_bt_max",
-				    1, PROF_BT_MAX_LIMIT, CONF_CHECK_MIN, CONF_CHECK_MAX,
+				CONF_HANDLE_UNSIGNED(opt_prof_bt_max,
+				    "prof_bt_max", 1, PROF_BT_MAX_LIMIT,
+				    CONF_CHECK_MIN, CONF_CHECK_MAX,
 				    /* clip */ true)
 				CONF_HANDLE_SSIZE_T(opt_lg_prof_interval,
 				    "lg_prof_interval", -1,
@@ -1661,10 +1722,11 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				CONF_HANDLE_BOOL(opt_prof_gdump, "prof_gdump")
 				CONF_HANDLE_BOOL(opt_prof_final, "prof_final")
 				CONF_HANDLE_BOOL(opt_prof_leak, "prof_leak")
-				CONF_HANDLE_BOOL(opt_prof_leak_error,
-				    "prof_leak_error")
+				CONF_HANDLE_BOOL(
+				    opt_prof_leak_error, "prof_leak_error")
 				CONF_HANDLE_BOOL(opt_prof_log, "prof_log")
-				CONF_HANDLE_BOOL(opt_prof_pid_namespace, "prof_pid_namespace")
+				CONF_HANDLE_BOOL(opt_prof_pid_namespace,
+				    "prof_pid_namespace")
 				CONF_HANDLE_SSIZE_T(opt_prof_recent_alloc_max,
 				    "prof_recent_alloc_max", -1, SSIZE_MAX)
 				CONF_HANDLE_BOOL(opt_prof_stats, "prof_stats")
@@ -1703,9 +1765,10 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			}
 			if (config_log) {
 				if (CONF_MATCH("log")) {
-					size_t cpylen = (
-					    vlen <= sizeof(log_var_names) ?
-					    vlen : sizeof(log_var_names) - 1);
+					size_t cpylen = (vlen
+					            <= sizeof(log_var_names)
+					        ? vlen
+					        : sizeof(log_var_names) - 1);
 					strncpy(log_var_names, v, cpylen);
 					log_var_names[cpylen] = '\0';
 					CONF_CONTINUE;
@@ -1714,12 +1777,13 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			if (CONF_MATCH("thp")) {
 				bool match = false;
 				for (int m = 0; m < thp_mode_names_limit; m++) {
-					if (strncmp(thp_mode_names[m],v, vlen)
+					if (strncmp(thp_mode_names[m], v, vlen)
 					    == 0) {
-						if (!have_madvise_huge && !have_memcntl) {
+						if (!have_madvise_huge
+						    && !have_memcntl) {
 							CONF_ERROR(
-							    "No THP support",
-							    k, klen, v, vlen);
+							    "No THP support", k,
+							    klen, v, vlen);
 						}
 						opt_thp = m;
 						match = true;
@@ -1727,34 +1791,34 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 					}
 				}
 				if (!match) {
-					CONF_ERROR("Invalid conf value",
-					    k, klen, v, vlen);
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
 				}
 				CONF_CONTINUE;
 			}
 			if (CONF_MATCH("zero_realloc")) {
 				if (CONF_MATCH_VALUE("alloc")) {
-					opt_zero_realloc_action
-					    = zero_realloc_action_alloc;
+					opt_zero_realloc_action =
+					    zero_realloc_action_alloc;
 				} else if (CONF_MATCH_VALUE("free")) {
-					opt_zero_realloc_action
-					    = zero_realloc_action_free;
+					opt_zero_realloc_action =
+					    zero_realloc_action_free;
 				} else if (CONF_MATCH_VALUE("abort")) {
-					opt_zero_realloc_action
-					    = zero_realloc_action_abort;
+					opt_zero_realloc_action =
+					    zero_realloc_action_abort;
 				} else {
-					CONF_ERROR("Invalid conf value",
-					    k, klen, v, vlen);
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
 				}
 				CONF_CONTINUE;
 			}
-			if (config_uaf_detection &&
-			    CONF_MATCH("lg_san_uaf_align")) {
+			if (config_uaf_detection
+			    && CONF_MATCH("lg_san_uaf_align")) {
 				ssize_t a;
 				CONF_VALUE_READ(ssize_t, a)
 				if (CONF_VALUE_READ_FAIL() || a < -1) {
-					CONF_ERROR("Invalid conf value",
-					    k, klen, v, vlen);
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
 				}
 				if (a == -1) {
 					opt_lg_san_uaf_align = -1;
@@ -1807,8 +1871,8 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 #undef CONF_HANDLE_SIZE_T
 #undef CONF_HANDLE_SSIZE_T
 #undef CONF_HANDLE_CHAR_P
-    /* Re-enable diagnostic "-Wtype-limits" */
-    JEMALLOC_DIAGNOSTIC_POP
+			/* Re-enable diagnostic "-Wtype-limits" */
+			JEMALLOC_DIAGNOSTIC_POP
 		}
 		validate_hpa_settings();
 		if (opt_abort_conf && had_conf_error) {
@@ -1821,7 +1885,8 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 static bool
 malloc_conf_init_check_deps(void) {
 	if (opt_prof_leak_error && !opt_prof_final) {
-		malloc_printf("<jemalloc>: prof_leak_error is set w/o "
+		malloc_printf(
+		    "<jemalloc>: prof_leak_error is set w/o "
 		    "prof_final.\n");
 		return true;
 	}
@@ -1836,13 +1901,13 @@ malloc_conf_init_check_deps(void) {
 static void
 malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
     char readlink_buf[PATH_MAX + 1]) {
-	const char *opts_cache[MALLOC_CONF_NSOURCES] = {NULL, NULL, NULL, NULL,
-		NULL};
+	const char *opts_cache[MALLOC_CONF_NSOURCES] = {
+	    NULL, NULL, NULL, NULL, NULL};
 
 	/* The first call only set the confirm_conf option and opts_cache */
 	malloc_conf_init_helper(NULL, NULL, true, opts_cache, readlink_buf);
-	malloc_conf_init_helper(sc_data, bin_shard_sizes, false, opts_cache,
-	    NULL);
+	malloc_conf_init_helper(
+	    sc_data, bin_shard_sizes, false, opts_cache, NULL);
 	if (malloc_conf_init_check_deps()) {
 		/* check_deps does warning msg only; abort below if needed. */
 		if (opt_abort_conf) {
@@ -1855,8 +1920,9 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 
 static bool
 malloc_init_hard_needed(void) {
-	if (malloc_initialized() || (IS_INITIALIZER && malloc_init_state ==
-	    malloc_init_recursible)) {
+	if (malloc_initialized()
+	    || (IS_INITIALIZER
+	        && malloc_init_state == malloc_init_recursible)) {
 		/*
 		 * Another thread initialized the allocator before this one
 		 * acquired init_lock, or this thread is the initializing
@@ -1946,7 +2012,8 @@ malloc_init_hard_a0_locked(void) {
 		prof_boot1();
 	}
 	if (opt_hpa && !hpa_supported()) {
-		malloc_printf("<jemalloc>: HPA not supported in the current "
+		malloc_printf(
+		    "<jemalloc>: HPA not supported in the current "
 		    "configuration; %s.",
 		    opt_abort_conf ? "aborting" : "disabling");
 		if (opt_abort_conf) {
@@ -1962,7 +2029,7 @@ malloc_init_hard_a0_locked(void) {
 		return true;
 	}
 	if (malloc_mutex_init(&arenas_lock, "arenas", WITNESS_RANK_ARENAS,
-	    malloc_mutex_rank_exclusive)) {
+	        malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 	hook_boot();
@@ -1984,7 +2051,8 @@ malloc_init_hard_a0_locked(void) {
 	a0 = arena_get(TSDN_NULL, 0, false);
 
 	if (opt_hpa && !hpa_supported()) {
-		malloc_printf("<jemalloc>: HPA not supported in the current "
+		malloc_printf(
+		    "<jemalloc>: HPA not supported in the current "
 		    "configuration; %s.",
 		    opt_abort_conf ? "aborting" : "disabling");
 		if (opt_abort_conf) {
@@ -2035,7 +2103,8 @@ malloc_init_hard_recursible(void) {
 			 */
 			if (opt_narenas == 0) {
 				opt_percpu_arena = percpu_arena_disabled;
-				malloc_write("<jemalloc>: Number of CPUs "
+				malloc_write(
+				    "<jemalloc>: Number of CPUs "
 				    "detected is not deterministic. Per-CPU "
 				    "arena disabled.\n");
 				if (opt_abort_conf) {
@@ -2049,11 +2118,12 @@ malloc_init_hard_recursible(void) {
 	}
 
 #if (defined(JEMALLOC_HAVE_PTHREAD_ATFORK) && !defined(JEMALLOC_MUTEX_INIT_CB) \
-    && !defined(JEMALLOC_ZONE) && !defined(_WIN32) && \
-    !defined(__native_client__))
+    && !defined(JEMALLOC_ZONE) && !defined(_WIN32)                             \
+    && !defined(__native_client__))
 	/* LinuxThreads' pthread_atfork() allocates. */
 	if (pthread_atfork(jemalloc_prefork, jemalloc_postfork_parent,
-	    jemalloc_postfork_child) != 0) {
+	        jemalloc_postfork_child)
+	    != 0) {
 		malloc_write("<jemalloc>: Error in pthread_atfork()\n");
 		if (opt_abort) {
 			abort();
@@ -2077,8 +2147,8 @@ malloc_narenas_default(void) {
 	 * default.
 	 */
 	if (ncpus > 1) {
-		fxp_t fxp_ncpus = FXP_INIT_INT(ncpus);
-		fxp_t goal = fxp_mul(fxp_ncpus, opt_narenas_ratio);
+		fxp_t    fxp_ncpus = FXP_INIT_INT(ncpus);
+		fxp_t    goal = fxp_mul(fxp_ncpus, opt_narenas_ratio);
 		uint32_t int_goal = fxp_round_nearest(goal);
 		if (int_goal == 0) {
 			return 1;
@@ -2108,28 +2178,35 @@ malloc_init_narenas(tsdn_t *tsdn) {
 	if (opt_percpu_arena != percpu_arena_disabled) {
 		if (!have_percpu_arena || malloc_getcpu() < 0) {
 			opt_percpu_arena = percpu_arena_disabled;
-			malloc_printf("<jemalloc>: perCPU arena getcpu() not "
-			    "available. Setting narenas to %u.\n", opt_narenas ?
-			    opt_narenas : malloc_narenas_default());
+			malloc_printf(
+			    "<jemalloc>: perCPU arena getcpu() not "
+			    "available. Setting narenas to %u.\n",
+			    opt_narenas ? opt_narenas
+			                : malloc_narenas_default());
 			if (opt_abort) {
 				abort();
 			}
 		} else {
 			if (ncpus >= MALLOCX_ARENA_LIMIT) {
-				malloc_printf("<jemalloc>: narenas w/ percpu"
-				    "arena beyond limit (%d)\n", ncpus);
+				malloc_printf(
+				    "<jemalloc>: narenas w/ percpu"
+				    "arena beyond limit (%d)\n",
+				    ncpus);
 				if (opt_abort) {
 					abort();
 				}
 				return true;
 			}
 			/* NB: opt_percpu_arena isn't fully initialized yet. */
-			if (percpu_arena_as_initialized(opt_percpu_arena) ==
-			    per_phycpu_arena && ncpus % 2 != 0) {
-				malloc_printf("<jemalloc>: invalid "
+			if (percpu_arena_as_initialized(opt_percpu_arena)
+			        == per_phycpu_arena
+			    && ncpus % 2 != 0) {
+				malloc_printf(
+				    "<jemalloc>: invalid "
 				    "configuration -- per physical CPU arena "
 				    "with odd number (%u) of CPUs (no hyper "
-				    "threading?).\n", ncpus);
+				    "threading?).\n",
+				    ncpus);
 				if (opt_abort)
 					abort();
 			}
@@ -2217,24 +2294,23 @@ malloc_init_hard(void) {
 	 * than LARGE_MINCLASS.  It could only happen if some constants
 	 * are configured miserably wrong.
 	 */
-	assert(SC_LG_TINY_MAXCLASS <=
-	    (size_t)1ULL << (LG_PAGE + SC_LG_NGROUP));
+	assert(SC_LG_TINY_MAXCLASS <= (size_t)1ULL << (LG_PAGE + SC_LG_NGROUP));
 
 #if defined(_WIN32) && _WIN32_WINNT < 0x0600
 	_init_init_lock();
 #endif
 	malloc_mutex_lock(TSDN_NULL, &init_lock);
 
-#define UNLOCK_RETURN(tsdn, ret, reentrancy)		\
-	malloc_init_hard_cleanup(tsdn, reentrancy);	\
+#define UNLOCK_RETURN(tsdn, ret, reentrancy)                                   \
+	malloc_init_hard_cleanup(tsdn, reentrancy);                            \
 	return ret;
 
 	if (!malloc_init_hard_needed()) {
 		UNLOCK_RETURN(TSDN_NULL, false, false)
 	}
 
-	if (malloc_init_state != malloc_init_a0_initialized &&
-	    malloc_init_hard_a0_locked()) {
+	if (malloc_init_state != malloc_init_a0_initialized
+	    && malloc_init_hard_a0_locked()) {
 		UNLOCK_RETURN(TSDN_NULL, true, false)
 	}
 
@@ -2262,11 +2338,11 @@ malloc_init_hard(void) {
 		 * background_thread_enabled wasn't initialized yet, but we
 		 * need it to set correct value for deferral_allowed.
 		 */
-		arena_t *a0 = arena_get(tsd_tsdn(tsd), 0, false);
+		arena_t         *a0 = arena_get(tsd_tsdn(tsd), 0, false);
 		hpa_shard_opts_t hpa_shard_opts = opt_hpa_opts;
 		hpa_shard_opts.deferral_allowed = background_thread_enabled();
 		if (pa_shard_enable_hpa(tsd_tsdn(tsd), &a0->pa_shard,
-		    &hpa_shard_opts, &opt_hpa_sec_opts)) {
+		        &hpa_shard_opts, &opt_hpa_sec_opts)) {
 			UNLOCK_RETURN(tsd_tsdn(tsd), true, true)
 		}
 	}
@@ -2282,8 +2358,8 @@ malloc_init_hard(void) {
 	post_reentrancy(tsd);
 	malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock);
 
-	witness_assert_lockless(witness_tsd_tsdn(
-	    tsd_witness_tsdp_get_unsafe(tsd)));
+	witness_assert_lockless(
+	    witness_tsd_tsdn(tsd_witness_tsdp_get_unsafe(tsd)));
 	malloc_tsd_boot1();
 	/* Update TSD after tsd_boot1. */
 	tsd = tsd_fetch();
@@ -2378,12 +2454,12 @@ static_opts_init(static_opts_t *static_opts) {
 
 typedef struct dynamic_opts_s dynamic_opts_t;
 struct dynamic_opts_s {
-	void **result;
-	size_t usize;
-	size_t num_items;
-	size_t item_size;
-	size_t alignment;
-	bool zero;
+	void   **result;
+	size_t   usize;
+	size_t   num_items;
+	size_t   item_size;
+	size_t   alignment;
+	bool     zero;
 	unsigned tcache_ind;
 	unsigned arena_ind;
 };
@@ -2414,8 +2490,9 @@ aligned_usize_get(size_t size, size_t alignment, size_t *usize, szind_t *ind,
 			if (unlikely(*ind >= SC_NSIZES)) {
 				return true;
 			}
-			*usize = sz_large_size_classes_disabled()? sz_s2u(size):
-			    sz_index2size(*ind);
+			*usize = sz_large_size_classes_disabled()
+			    ? sz_s2u(size)
+			    : sz_index2size(*ind);
 			assert(*usize > 0 && *usize <= SC_LARGE_MAXCLASS);
 			return false;
 		}
@@ -2465,8 +2542,8 @@ JEMALLOC_ALWAYS_INLINE void *
 imalloc_no_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
     size_t size, size_t usize, szind_t ind, bool slab) {
 	/* Fill in the tcache. */
-	tcache_t *tcache = tcache_get_from_ind(tsd, dopts->tcache_ind,
-	    sopts->slow, /* is_alloc */ true);
+	tcache_t *tcache = tcache_get_from_ind(
+	    tsd, dopts->tcache_ind, sopts->slow, /* is_alloc */ true);
 
 	/* Fill in the arena. */
 	arena_t *arena;
@@ -2496,7 +2573,7 @@ imalloc_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
 	 */
 	if (sz_can_use_slab(usize)) {
 		assert((dopts->alignment & PROF_SAMPLE_ALIGNMENT_MASK) == 0);
-		size_t bumped_usize = sz_sa2u(usize, dopts->alignment);
+		size_t  bumped_usize = sz_sa2u(usize, dopts->alignment);
 		szind_t bumped_ind = sz_size2index(bumped_usize);
 		dopts->tcache_ind = TCACHE_IND_NONE;
 		ret = imalloc_no_sample(sopts, dopts, tsd, bumped_usize,
@@ -2519,8 +2596,8 @@ imalloc_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
  * *size to the product either way.
  */
 JEMALLOC_ALWAYS_INLINE bool
-compute_size_with_overflow(bool may_overflow, dynamic_opts_t *dopts,
-    size_t *size) {
+compute_size_with_overflow(
+    bool may_overflow, dynamic_opts_t *dopts, size_t *size) {
 	/*
 	 * This function is just num_items * item_size, except that we may have
 	 * to check for overflow.
@@ -2576,26 +2653,26 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 	int8_t reentrancy_level;
 
 	/* Compute the amount of memory the user wants. */
-	if (unlikely(compute_size_with_overflow(sopts->may_overflow, dopts,
-	    &size))) {
+	if (unlikely(compute_size_with_overflow(
+	        sopts->may_overflow, dopts, &size))) {
 		goto label_oom;
 	}
 
 	if (unlikely(dopts->alignment < sopts->min_alignment
-	    || (dopts->alignment & (dopts->alignment - 1)) != 0)) {
+	        || (dopts->alignment & (dopts->alignment - 1)) != 0)) {
 		goto label_invalid_alignment;
 	}
 
 	/* This is the beginning of the "core" algorithm. */
 	dopts->zero = zero_get(dopts->zero, sopts->slow);
 	if (aligned_usize_get(size, dopts->alignment, &usize, &ind,
-	    sopts->bump_empty_aligned_alloc)) {
+	        sopts->bump_empty_aligned_alloc)) {
 		goto label_oom;
 	}
 	dopts->usize = usize;
 	/* Validate the user input. */
 	if (sopts->assert_nonempty_alloc) {
-		assert (size != 0);
+		assert(size != 0);
 	}
 
 	check_entry_exit_locking(tsd_tsdn(tsd));
@@ -2610,8 +2687,8 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 		 * We should never specify particular arenas or tcaches from
 		 * within our internal allocations.
 		 */
-		assert(dopts->tcache_ind == TCACHE_IND_AUTOMATIC ||
-		    dopts->tcache_ind == TCACHE_IND_NONE);
+		assert(dopts->tcache_ind == TCACHE_IND_AUTOMATIC
+		    || dopts->tcache_ind == TCACHE_IND_NONE);
 		assert(dopts->arena_ind == ARENA_IND_AUTOMATIC);
 		dopts->tcache_ind = TCACHE_IND_NONE;
 		/* We know that arena 0 has already been initialized. */
@@ -2628,15 +2705,14 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 	if (config_prof && opt_prof) {
 		bool prof_active = prof_active_get_unlocked();
 		bool sample_event = te_prof_sample_event_lookahead(tsd, usize);
-		prof_tctx_t *tctx = prof_alloc_prep(tsd, prof_active,
-		    sample_event);
+		prof_tctx_t *tctx = prof_alloc_prep(
+		    tsd, prof_active, sample_event);
 
 		emap_alloc_ctx_t alloc_ctx;
 		if (likely(tctx == PROF_TCTX_SENTINEL)) {
 			alloc_ctx.slab = sz_can_use_slab(usize);
-			allocation = imalloc_no_sample(
-			    sopts, dopts, tsd, usize, usize, ind,
-			    alloc_ctx.slab);
+			allocation = imalloc_no_sample(sopts, dopts, tsd, usize,
+			    usize, ind, alloc_ctx.slab);
 		} else if (tctx != NULL) {
 			allocation = imalloc_sample(
 			    sopts, dopts, tsd, usize, ind);
@@ -2780,8 +2856,8 @@ imalloc(static_opts_t *sopts, dynamic_opts_t *dopts) {
 JEMALLOC_NOINLINE
 void *
 malloc_default(size_t size) {
-	void *ret;
-	static_opts_t sopts;
+	void          *ret;
+	static_opts_t  sopts;
 	dynamic_opts_t dopts;
 
 	/*
@@ -2819,13 +2895,12 @@ malloc_default(size_t size) {
  * Begin malloc(3)-compatible functions.
  */
 
-JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
-void JEMALLOC_NOTHROW *
-JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1)
-je_malloc(size_t size) {
+JEMALLOC_EXPORT
+    JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
+    JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1) je_malloc(size_t size) {
 	LOG("core.malloc.entry", "size: %zu", size);
 
-	void * ret = imalloc_fastpath(size, &malloc_default);
+	void *ret = imalloc_fastpath(size, &malloc_default);
 
 	LOG("core.malloc.exit", "result: %p", ret);
 	return ret;
@@ -2833,13 +2908,15 @@ je_malloc(size_t size) {
 
 JEMALLOC_EXPORT int JEMALLOC_NOTHROW
 JEMALLOC_ATTR(nonnull(1))
-je_posix_memalign(void **memptr, size_t alignment, size_t size) {
-	int ret;
-	static_opts_t sopts;
+    je_posix_memalign(void **memptr, size_t alignment, size_t size) {
+	int            ret;
+	static_opts_t  sopts;
 	dynamic_opts_t dopts;
 
-	LOG("core.posix_memalign.entry", "mem ptr: %p, alignment: %zu, "
-	    "size: %zu", memptr, alignment, size);
+	LOG("core.posix_memalign.entry",
+	    "mem ptr: %p, alignment: %zu, "
+	    "size: %zu",
+	    memptr, alignment, size);
 
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
@@ -2858,10 +2935,10 @@ je_posix_memalign(void **memptr, size_t alignment, size_t size) {
 
 	ret = imalloc(&sopts, &dopts);
 	if (sopts.slow) {
-		uintptr_t args[3] = {(uintptr_t)memptr, (uintptr_t)alignment,
-			(uintptr_t)size};
-		hook_invoke_alloc(hook_alloc_posix_memalign, *memptr,
-		    (uintptr_t)ret, args);
+		uintptr_t args[3] = {
+		    (uintptr_t)memptr, (uintptr_t)alignment, (uintptr_t)size};
+		hook_invoke_alloc(
+		    hook_alloc_posix_memalign, *memptr, (uintptr_t)ret, args);
 	}
 
 	LOG("core.posix_memalign.exit", "result: %d, alloc ptr: %p", ret,
@@ -2870,13 +2947,13 @@ je_posix_memalign(void **memptr, size_t alignment, size_t size) {
 	return ret;
 }
 
-JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
-void JEMALLOC_NOTHROW *
-JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(2)
-je_aligned_alloc(size_t alignment, size_t size) {
+JEMALLOC_EXPORT
+    JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
+    JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(2)
+        je_aligned_alloc(size_t alignment, size_t size) {
 	void *ret;
 
-	static_opts_t sopts;
+	static_opts_t  sopts;
 	dynamic_opts_t dopts;
 
 	LOG("core.aligned_alloc.entry", "alignment: %zu, size: %zu\n",
@@ -2902,8 +2979,8 @@ je_aligned_alloc(size_t alignment, size_t size) {
 	imalloc(&sopts, &dopts);
 	if (sopts.slow) {
 		uintptr_t args[3] = {(uintptr_t)alignment, (uintptr_t)size};
-		hook_invoke_alloc(hook_alloc_aligned_alloc, ret,
-		    (uintptr_t)ret, args);
+		hook_invoke_alloc(
+		    hook_alloc_aligned_alloc, ret, (uintptr_t)ret, args);
 	}
 
 	LOG("core.aligned_alloc.exit", "result: %p", ret);
@@ -2911,12 +2988,12 @@ je_aligned_alloc(size_t alignment, size_t size) {
 	return ret;
 }
 
-JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
-void JEMALLOC_NOTHROW *
-JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE2(1, 2)
-je_calloc(size_t num, size_t size) {
-	void *ret;
-	static_opts_t sopts;
+JEMALLOC_EXPORT
+    JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
+    JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE2(1, 2)
+        je_calloc(size_t num, size_t size) {
+	void          *ret;
+	static_opts_t  sopts;
 	dynamic_opts_t dopts;
 
 	LOG("core.calloc.entry", "num: %zu, size: %zu", num, size);
@@ -2959,8 +3036,8 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) {
 	assert(malloc_initialized() || IS_INITIALIZER);
 
 	emap_alloc_ctx_t alloc_ctx;
-	emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr,
-	    &alloc_ctx);
+	emap_alloc_ctx_lookup(
+	    tsd_tsdn(tsd), &arena_emap_global, ptr, &alloc_ctx);
 	assert(alloc_ctx.szind != SC_NSIZES);
 
 	size_t usize = emap_alloc_ctx_usize_get(&alloc_ctx);
@@ -2969,14 +3046,12 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) {
 	}
 
 	if (likely(!slow_path)) {
-		idalloctm(tsd_tsdn(tsd), ptr, tcache, &alloc_ctx, false,
-		    false);
+		idalloctm(tsd_tsdn(tsd), ptr, tcache, &alloc_ctx, false, false);
 	} else {
 		if (config_fill && slow_path && opt_junk_free) {
 			junk_free_callback(ptr, usize);
 		}
-		idalloctm(tsd_tsdn(tsd), ptr, tcache, &alloc_ctx, false,
-		    true);
+		idalloctm(tsd_tsdn(tsd), ptr, tcache, &alloc_ctx, false, true);
 	}
 	thread_dalloc_event(tsd, usize);
 }
@@ -2995,32 +3070,32 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 	assert(malloc_initialized() || IS_INITIALIZER);
 
 	emap_alloc_ctx_t alloc_ctx;
-	szind_t szind = sz_size2index(usize);
+	szind_t          szind = sz_size2index(usize);
 	if (!config_prof) {
-		emap_alloc_ctx_init(&alloc_ctx, szind, (szind < SC_NBINS),
-		    usize);
+		emap_alloc_ctx_init(
+		    &alloc_ctx, szind, (szind < SC_NBINS), usize);
 	} else {
 		if (likely(!prof_sample_aligned(ptr))) {
 			/*
 			 * When the ptr is not page aligned, it was not sampled.
 			 * usize can be trusted to determine szind and slab.
 			 */
-			emap_alloc_ctx_init(&alloc_ctx, szind,
-			    (szind < SC_NBINS), usize);
+			emap_alloc_ctx_init(
+			    &alloc_ctx, szind, (szind < SC_NBINS), usize);
 		} else if (opt_prof) {
 			/*
 			 * Small sampled allocs promoted can still get correct
 			 * usize here.  Check comments in edata_usize_get.
 			 */
-			emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global,
-			    ptr, &alloc_ctx);
+			emap_alloc_ctx_lookup(
+			    tsd_tsdn(tsd), &arena_emap_global, ptr, &alloc_ctx);
 
 			if (config_opt_safety_checks) {
 				/* Small alloc may have !slab (sampled). */
-				size_t true_size =
-				    emap_alloc_ctx_usize_get(&alloc_ctx);
-				if (unlikely(alloc_ctx.szind !=
-				    sz_size2index(usize))) {
+				size_t true_size = emap_alloc_ctx_usize_get(
+				    &alloc_ctx);
+				if (unlikely(alloc_ctx.szind
+				        != sz_size2index(usize))) {
 					safety_check_fail_sized_dealloc(
 					    /* current_dealloc */ true, ptr,
 					    /* true_size */ true_size,
@@ -3028,8 +3103,8 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 				}
 			}
 		} else {
-			emap_alloc_ctx_init(&alloc_ctx, szind,
-			    (szind < SC_NBINS), usize);
+			emap_alloc_ctx_init(
+			    &alloc_ctx, szind, (szind < SC_NBINS), usize);
 		}
 	}
 	bool fail = maybe_check_alloc_ctx(tsd, ptr, &alloc_ctx);
@@ -3047,14 +3122,12 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 		prof_free(tsd, ptr, usize, &alloc_ctx);
 	}
 	if (likely(!slow_path)) {
-		isdalloct(tsd_tsdn(tsd), ptr, usize, tcache, &alloc_ctx,
-		    false);
+		isdalloct(tsd_tsdn(tsd), ptr, usize, tcache, &alloc_ctx, false);
 	} else {
 		if (config_fill && slow_path && opt_junk_free) {
 			junk_free_callback(ptr, usize);
 		}
-		isdalloct(tsd_tsdn(tsd), ptr, usize, tcache, &alloc_ctx,
-		    true);
+		isdalloct(tsd_tsdn(tsd), ptr, usize, tcache, &alloc_ctx, true);
 	}
 	thread_dalloc_event(tsd, usize);
 }
@@ -3125,12 +3198,11 @@ je_free_aligned_sized(void *ptr, size_t alignment, size_t size) {
  */
 
 #ifdef JEMALLOC_OVERRIDE_MEMALIGN
-JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
-void JEMALLOC_NOTHROW *
-JEMALLOC_ATTR(malloc)
-je_memalign(size_t alignment, size_t size) {
-	void *ret;
-	static_opts_t sopts;
+JEMALLOC_EXPORT
+    JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
+    JEMALLOC_ATTR(malloc) je_memalign(size_t alignment, size_t size) {
+	void          *ret;
+	static_opts_t  sopts;
 	dynamic_opts_t dopts;
 
 	LOG("core.memalign.entry", "alignment: %zu, size: %zu\n", alignment,
@@ -3155,8 +3227,8 @@ je_memalign(size_t alignment, size_t size) {
 	imalloc(&sopts, &dopts);
 	if (sopts.slow) {
 		uintptr_t args[3] = {alignment, size};
-		hook_invoke_alloc(hook_alloc_memalign, ret, (uintptr_t)ret,
-		    args);
+		hook_invoke_alloc(
+		    hook_alloc_memalign, ret, (uintptr_t)ret, args);
 	}
 
 	LOG("core.memalign.exit", "result: %p", ret);
@@ -3165,13 +3237,12 @@ je_memalign(size_t alignment, size_t size) {
 #endif
 
 #ifdef JEMALLOC_OVERRIDE_VALLOC
-JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
-void JEMALLOC_NOTHROW *
-JEMALLOC_ATTR(malloc)
-je_valloc(size_t size) {
+JEMALLOC_EXPORT
+    JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
+    JEMALLOC_ATTR(malloc) je_valloc(size_t size) {
 	void *ret;
 
-	static_opts_t sopts;
+	static_opts_t  sopts;
 	dynamic_opts_t dopts;
 
 	LOG("core.valloc.entry", "size: %zu\n", size);
@@ -3203,13 +3274,12 @@ je_valloc(size_t size) {
 #endif
 
 #ifdef JEMALLOC_OVERRIDE_PVALLOC
-JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
-void JEMALLOC_NOTHROW *
-JEMALLOC_ATTR(malloc)
-je_pvalloc(size_t size) {
+JEMALLOC_EXPORT
+    JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
+    JEMALLOC_ATTR(malloc) je_pvalloc(size_t size) {
 	void *ret;
 
-	static_opts_t sopts;
+	static_opts_t  sopts;
 	dynamic_opts_t dopts;
 
 	LOG("core.pvalloc.entry", "size: %zu\n", size);
@@ -3236,8 +3306,8 @@ je_pvalloc(size_t size) {
 	imalloc(&sopts, &dopts);
 	if (sopts.slow) {
 		uintptr_t args[3] = {size};
-		hook_invoke_alloc(hook_alloc_pvalloc, ret, (uintptr_t)ret,
-		    args);
+		hook_invoke_alloc(
+		    hook_alloc_pvalloc, ret, (uintptr_t)ret, args);
 	}
 
 	LOG("core.pvalloc.exit", "result: %p\n", ret);
@@ -3255,59 +3325,59 @@ je_pvalloc(size_t size) {
  * passed an extra argument for the caller return address, which will be
  * ignored.
  */
-#include <features.h> // defines __GLIBC__ if we are compiling against glibc
+#	include <features.h> // defines __GLIBC__ if we are compiling against glibc
 
 JEMALLOC_EXPORT void (*__free_hook)(void *ptr) = je_free;
 JEMALLOC_EXPORT void *(*__malloc_hook)(size_t size) = je_malloc;
 JEMALLOC_EXPORT void *(*__realloc_hook)(void *ptr, size_t size) = je_realloc;
-#  ifdef JEMALLOC_GLIBC_MEMALIGN_HOOK
-JEMALLOC_EXPORT void *(*__memalign_hook)(size_t alignment, size_t size) =
-    je_memalign;
-#  endif
+#	ifdef JEMALLOC_GLIBC_MEMALIGN_HOOK
+JEMALLOC_EXPORT void *(*__memalign_hook)(
+    size_t alignment, size_t size) = je_memalign;
+#	endif
 
-#  ifdef __GLIBC__
+#	ifdef __GLIBC__
 /*
  * To enable static linking with glibc, the libc specific malloc interface must
  * be implemented also, so none of glibc's malloc.o functions are added to the
  * link.
  */
-#    define ALIAS(je_fn)	__attribute__((alias (#je_fn), used))
+#		define ALIAS(je_fn) __attribute__((alias(#je_fn), used))
 /* To force macro expansion of je_ prefix before stringification. */
-#    define PREALIAS(je_fn)	ALIAS(je_fn)
-#    ifdef JEMALLOC_OVERRIDE___LIBC_CALLOC
+#		define PREALIAS(je_fn) ALIAS(je_fn)
+#		ifdef JEMALLOC_OVERRIDE___LIBC_CALLOC
 void *__libc_calloc(size_t n, size_t size) PREALIAS(je_calloc);
-#    endif
-#    ifdef JEMALLOC_OVERRIDE___LIBC_FREE
-void __libc_free(void* ptr) PREALIAS(je_free);
-#    endif
-#    ifdef JEMALLOC_OVERRIDE___LIBC_FREE_SIZED
-void __libc_free_sized(void* ptr, size_t size) PREALIAS(je_free_sized);
-#    endif
-#    ifdef JEMALLOC_OVERRIDE___LIBC_FREE_ALIGNED_SIZED
-void __libc_free_aligned_sized(
-    void* ptr, size_t alignment, size_t size) PREALIAS(je_free_aligned_sized);
-#    endif
-#    ifdef JEMALLOC_OVERRIDE___LIBC_MALLOC
+#		endif
+#		ifdef JEMALLOC_OVERRIDE___LIBC_FREE
+void __libc_free(void *ptr) PREALIAS(je_free);
+#		endif
+#		ifdef JEMALLOC_OVERRIDE___LIBC_FREE_SIZED
+void __libc_free_sized(void *ptr, size_t size) PREALIAS(je_free_sized);
+#		endif
+#		ifdef JEMALLOC_OVERRIDE___LIBC_FREE_ALIGNED_SIZED
+void __libc_free_aligned_sized(void *ptr, size_t alignment, size_t size)
+    PREALIAS(je_free_aligned_sized);
+#		endif
+#		ifdef JEMALLOC_OVERRIDE___LIBC_MALLOC
 void *__libc_malloc(size_t size) PREALIAS(je_malloc);
-#    endif
-#    ifdef JEMALLOC_OVERRIDE___LIBC_MEMALIGN
+#		endif
+#		ifdef JEMALLOC_OVERRIDE___LIBC_MEMALIGN
 void *__libc_memalign(size_t align, size_t s) PREALIAS(je_memalign);
-#    endif
-#    ifdef JEMALLOC_OVERRIDE___LIBC_REALLOC
-void *__libc_realloc(void* ptr, size_t size) PREALIAS(je_realloc);
-#    endif
-#    ifdef JEMALLOC_OVERRIDE___LIBC_VALLOC
+#		endif
+#		ifdef JEMALLOC_OVERRIDE___LIBC_REALLOC
+void *__libc_realloc(void *ptr, size_t size) PREALIAS(je_realloc);
+#		endif
+#		ifdef JEMALLOC_OVERRIDE___LIBC_VALLOC
 void *__libc_valloc(size_t size) PREALIAS(je_valloc);
-#    endif
-#    ifdef JEMALLOC_OVERRIDE___LIBC_PVALLOC
+#		endif
+#		ifdef JEMALLOC_OVERRIDE___LIBC_PVALLOC
 void *__libc_pvalloc(size_t size) PREALIAS(je_pvalloc);
-#    endif
-#    ifdef JEMALLOC_OVERRIDE___POSIX_MEMALIGN
-int __posix_memalign(void** r, size_t a, size_t s) PREALIAS(je_posix_memalign);
-#    endif
-#    undef PREALIAS
-#    undef ALIAS
-#  endif
+#		endif
+#		ifdef JEMALLOC_OVERRIDE___POSIX_MEMALIGN
+int __posix_memalign(void **r, size_t a, size_t s) PREALIAS(je_posix_memalign);
+#		endif
+#		undef PREALIAS
+#		undef ALIAS
+#	endif
 #endif
 
 /*
@@ -3340,23 +3410,23 @@ mallocx_arena_get(int flags) {
 
 #ifdef JEMALLOC_EXPERIMENTAL_SMALLOCX_API
 
-#define JEMALLOC_SMALLOCX_CONCAT_HELPER(x, y) x ## y
-#define JEMALLOC_SMALLOCX_CONCAT_HELPER2(x, y)  \
-  JEMALLOC_SMALLOCX_CONCAT_HELPER(x, y)
+#	define JEMALLOC_SMALLOCX_CONCAT_HELPER(x, y) x##y
+#	define JEMALLOC_SMALLOCX_CONCAT_HELPER2(x, y)                         \
+		JEMALLOC_SMALLOCX_CONCAT_HELPER(x, y)
 
 typedef struct {
-	void *ptr;
+	void  *ptr;
 	size_t size;
 } smallocx_return_t;
 
-JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
-smallocx_return_t JEMALLOC_NOTHROW
-/*
+JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN smallocx_return_t
+    JEMALLOC_NOTHROW
+    /*
  * The attribute JEMALLOC_ATTR(malloc) cannot be used due to:
  *  - https://gcc.gnu.org/bugzilla/show_bug.cgi?id=86488
  */
-JEMALLOC_SMALLOCX_CONCAT_HELPER2(je_smallocx_, JEMALLOC_VERSION_GID_IDENT)
-  (size_t size, int flags) {
+    JEMALLOC_SMALLOCX_CONCAT_HELPER2(je_smallocx_, JEMALLOC_VERSION_GID_IDENT)(
+        size_t size, int flags) {
 	/*
 	 * Note: the attribute JEMALLOC_ALLOC_SIZE(1) cannot be
 	 * used here because it makes writing beyond the `size`
@@ -3365,8 +3435,8 @@ JEMALLOC_SMALLOCX_CONCAT_HELPER2(je_smallocx_, JEMALLOC_VERSION_GID_IDENT)
 	 * up to `smallocx_return_t::size`.
 	 */
 	smallocx_return_t ret;
-	static_opts_t sopts;
-	dynamic_opts_t dopts;
+	static_opts_t     sopts;
+	dynamic_opts_t    dopts;
 
 	LOG("core.smallocx.entry", "size: %zu, flags: %d", size, flags);
 
@@ -3395,16 +3465,16 @@ JEMALLOC_SMALLOCX_CONCAT_HELPER2(je_smallocx_, JEMALLOC_VERSION_GID_IDENT)
 	LOG("core.smallocx.exit", "result: %p, size: %zu", ret.ptr, ret.size);
 	return ret;
 }
-#undef JEMALLOC_SMALLOCX_CONCAT_HELPER
-#undef JEMALLOC_SMALLOCX_CONCAT_HELPER2
+#	undef JEMALLOC_SMALLOCX_CONCAT_HELPER
+#	undef JEMALLOC_SMALLOCX_CONCAT_HELPER2
 #endif
 
-JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
-void JEMALLOC_NOTHROW *
-JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1)
-je_mallocx(size_t size, int flags) {
-	void *ret;
-	static_opts_t sopts;
+JEMALLOC_EXPORT
+    JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
+    JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1)
+        je_mallocx(size_t size, int flags) {
+	void          *ret;
+	static_opts_t  sopts;
 	dynamic_opts_t dopts;
 
 	LOG("core.mallocx.entry", "size: %zu, flags: %d", size, flags);
@@ -3429,8 +3499,8 @@ je_mallocx(size_t size, int flags) {
 	imalloc(&sopts, &dopts);
 	if (sopts.slow) {
 		uintptr_t args[3] = {size, flags};
-		hook_invoke_alloc(hook_alloc_mallocx, ret, (uintptr_t)ret,
-		    args);
+		hook_invoke_alloc(
+		    hook_alloc_mallocx, ret, (uintptr_t)ret, args);
 	}
 
 	LOG("core.mallocx.exit", "result: %p", ret);
@@ -3456,8 +3526,8 @@ irallocx_prof_sample(tsdn_t *tsdn, void *old_ptr, size_t old_usize,
 	if (sz_can_use_slab(usize)) {
 		size_t bumped_usize = sz_sa2u(usize, alignment);
 		p = iralloct_explicit_slab(tsdn, old_ptr, old_usize,
-		    bumped_usize, alignment, zero, /* slab */ false,
-		    tcache, arena, hook_args);
+		    bumped_usize, alignment, zero, /* slab */ false, tcache,
+		    arena, hook_args);
 		if (p == NULL) {
 			return NULL;
 		}
@@ -3474,15 +3544,14 @@ irallocx_prof_sample(tsdn_t *tsdn, void *old_ptr, size_t old_usize,
 
 JEMALLOC_ALWAYS_INLINE void *
 irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
-    size_t alignment, size_t usize, bool zero, tcache_t *tcache,
-    arena_t *arena, emap_alloc_ctx_t *alloc_ctx,
-    hook_ralloc_args_t *hook_args) {
+    size_t alignment, size_t usize, bool zero, tcache_t *tcache, arena_t *arena,
+    emap_alloc_ctx_t *alloc_ctx, hook_ralloc_args_t *hook_args) {
 	prof_info_t old_prof_info;
 	prof_info_get_and_reset_recent(tsd, old_ptr, alloc_ctx, &old_prof_info);
-	bool prof_active = prof_active_get_unlocked();
-	bool sample_event = te_prof_sample_event_lookahead(tsd, usize);
+	bool         prof_active = prof_active_get_unlocked();
+	bool         sample_event = te_prof_sample_event_lookahead(tsd, usize);
 	prof_tctx_t *tctx = prof_alloc_prep(tsd, prof_active, sample_event);
-	void *p;
+	void        *p;
 	if (unlikely(tctx != PROF_TCTX_SENTINEL)) {
 		p = irallocx_prof_sample(tsd_tsdn(tsd), old_ptr, old_usize,
 		    usize, alignment, zero, tcache, arena, tctx, hook_args);
@@ -3495,19 +3564,19 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
 		return NULL;
 	}
 	assert(usize == isalloc(tsd_tsdn(tsd), p));
-	prof_realloc(tsd, p, size, usize, tctx, prof_active, old_ptr,
-	    old_usize, &old_prof_info, sample_event);
+	prof_realloc(tsd, p, size, usize, tctx, prof_active, old_ptr, old_usize,
+	    &old_prof_info, sample_event);
 
 	return p;
 }
 
 static void *
 do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
-	void *p;
-	tsd_t *tsd;
-	size_t usize;
-	size_t old_usize;
-	size_t alignment = MALLOCX_ALIGN_GET(flags);
+	void    *p;
+	tsd_t   *tsd;
+	size_t   usize;
+	size_t   old_usize;
+	size_t   alignment = MALLOCX_ALIGN_GET(flags);
 	arena_t *arena;
 
 	assert(ptr != NULL);
@@ -3523,13 +3592,13 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 		goto label_oom;
 	}
 
-	unsigned tcache_ind = mallocx_tcache_get(flags);
+	unsigned  tcache_ind = mallocx_tcache_get(flags);
 	tcache_t *tcache = tcache_get_from_ind(tsd, tcache_ind,
 	    /* slow */ true, /* is_alloc */ true);
 
 	emap_alloc_ctx_t alloc_ctx;
-	emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr,
-	    &alloc_ctx);
+	emap_alloc_ctx_lookup(
+	    tsd_tsdn(tsd), &arena_emap_global, ptr, &alloc_ctx);
 	assert(alloc_ctx.szind != SC_NSIZES);
 	old_usize = emap_alloc_ctx_usize_get(&alloc_ctx);
 	assert(old_usize == isalloc(tsd_tsdn(tsd), ptr));
@@ -3537,8 +3606,8 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 		goto label_oom;
 	}
 
-	hook_ralloc_args_t hook_args = {is_realloc, {(uintptr_t)ptr, size,
-		flags, 0}};
+	hook_ralloc_args_t hook_args = {
+	    is_realloc, {(uintptr_t)ptr, size, flags, 0}};
 	if (config_prof && opt_prof) {
 		p = irallocx_prof(tsd, ptr, old_usize, size, alignment, usize,
 		    zero, tcache, arena, &alloc_ctx, &hook_args);
@@ -3563,7 +3632,7 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 	if (config_fill && unlikely(opt_junk_alloc) && usize > old_usize
 	    && !zero) {
 		size_t excess_len = usize - old_usize;
-		void *excess_start = (void *)((byte_t *)p + old_usize);
+		void  *excess_start = (void *)((byte_t *)p + old_usize);
 		junk_alloc_callback(excess_start, excess_len);
 	}
 
@@ -3582,12 +3651,11 @@ label_oom:
 	return NULL;
 }
 
-JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
-void JEMALLOC_NOTHROW *
-JEMALLOC_ALLOC_SIZE(2)
-je_rallocx(void *ptr, size_t size, int flags) {
-	LOG("core.rallocx.entry", "ptr: %p, size: %zu, flags: %d", ptr,
-	    size, flags);
+JEMALLOC_EXPORT
+    JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
+    JEMALLOC_ALLOC_SIZE(2) je_rallocx(void *ptr, size_t size, int flags) {
+	LOG("core.rallocx.entry", "ptr: %p, size: %zu, flags: %d", ptr, size,
+	    flags);
 	void *ret = do_rallocx(ptr, size, flags, false);
 	LOG("core.rallocx.exit", "result: %p", ret);
 	return ret;
@@ -3621,7 +3689,8 @@ do_realloc_nonnull_zero(void *ptr) {
 		check_entry_exit_locking(tsd_tsdn(tsd));
 		return NULL;
 	} else {
-		safety_check_fail("Called realloc(non-null-ptr, 0) with "
+		safety_check_fail(
+		    "Called realloc(non-null-ptr, 0) with "
 		    "zero_realloc:abort set\n");
 		/* In real code, this will never run; the safety check failure
 		 * will call abort.  In the unit test, we just want to bail out
@@ -3632,10 +3701,9 @@ do_realloc_nonnull_zero(void *ptr) {
 	}
 }
 
-JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
-void JEMALLOC_NOTHROW *
-JEMALLOC_ALLOC_SIZE(2)
-je_realloc(void *ptr, size_t size) {
+JEMALLOC_EXPORT
+    JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
+    JEMALLOC_ALLOC_SIZE(2) je_realloc(void *ptr, size_t size) {
 	LOG("core.realloc.entry", "ptr: %p, size: %zu\n", ptr, size);
 
 	if (likely(ptr != NULL && size != 0)) {
@@ -3650,7 +3718,7 @@ je_realloc(void *ptr, size_t size) {
 		/* realloc(NULL, size) is equivalent to malloc(size). */
 		void *ret;
 
-		static_opts_t sopts;
+		static_opts_t  sopts;
 		dynamic_opts_t dopts;
 
 		static_opts_init(&sopts);
@@ -3668,8 +3736,8 @@ je_realloc(void *ptr, size_t size) {
 		imalloc(&sopts, &dopts);
 		if (sopts.slow) {
 			uintptr_t args[3] = {(uintptr_t)ptr, size};
-			hook_invoke_alloc(hook_alloc_realloc, ret,
-			    (uintptr_t)ret, args);
+			hook_invoke_alloc(
+			    hook_alloc_realloc, ret, (uintptr_t)ret, args);
 		}
 		LOG("core.realloc.exit", "result: %p", ret);
 		return ret;
@@ -3681,8 +3749,8 @@ ixallocx_helper(tsdn_t *tsdn, void *ptr, size_t old_usize, size_t size,
     size_t extra, size_t alignment, bool zero) {
 	size_t newsize;
 
-	if (ixalloc(tsdn, ptr, old_usize, size, extra, alignment, zero,
-	    &newsize)) {
+	if (ixalloc(
+	        tsdn, ptr, old_usize, size, extra, alignment, zero, &newsize)) {
 		return old_usize;
 	}
 
@@ -3697,8 +3765,8 @@ ixallocx_prof_sample(tsdn_t *tsdn, void *ptr, size_t old_usize, size_t size,
 		return old_usize;
 	}
 
-	return ixallocx_helper(tsdn, ptr, old_usize, size, extra, alignment,
-	    zero);
+	return ixallocx_helper(
+	    tsdn, ptr, old_usize, size, extra, alignment, zero);
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
@@ -3718,8 +3786,8 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 	 * prof_realloc() will use the actual usize to decide whether to sample.
 	 */
 	size_t usize_max;
-	if (aligned_usize_get(size + extra, alignment, &usize_max, NULL,
-	    false)) {
+	if (aligned_usize_get(
+	        size + extra, alignment, &usize_max, NULL, false)) {
 		/*
 		 * usize_max is out of range, and chances are that allocation
 		 * will fail, but use the maximum possible value and carry on
@@ -3758,10 +3826,10 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 		 * to edata has already been done.
 		 */
 		emap_alloc_ctx_t new_alloc_ctx;
-		emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr,
-		    &new_alloc_ctx);
-		prof_info_get_and_reset_recent(tsd, ptr, &new_alloc_ctx,
-		    &prof_info);
+		emap_alloc_ctx_lookup(
+		    tsd_tsdn(tsd), &arena_emap_global, ptr, &new_alloc_ctx);
+		prof_info_get_and_reset_recent(
+		    tsd, ptr, &new_alloc_ctx, &prof_info);
 		assert(usize <= usize_max);
 		sample_event = te_prof_sample_event_lookahead(tsd, usize);
 		prof_realloc(tsd, ptr, size, usize, tctx, prof_active, ptr,
@@ -3777,10 +3845,12 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	tsd_t *tsd;
 	size_t usize, old_usize;
 	size_t alignment = MALLOCX_ALIGN_GET(flags);
-	bool zero = zero_get(MALLOCX_ZERO_GET(flags), /* slow */ true);
+	bool   zero = zero_get(MALLOCX_ZERO_GET(flags), /* slow */ true);
 
-	LOG("core.xallocx.entry", "ptr: %p, size: %zu, extra: %zu, "
-	    "flags: %d", ptr, size, extra, flags);
+	LOG("core.xallocx.entry",
+	    "ptr: %p, size: %zu, extra: %zu, "
+	    "flags: %d",
+	    ptr, size, extra, flags);
 
 	assert(ptr != NULL);
 	assert(size != 0);
@@ -3794,12 +3864,12 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	 * object associated with the ptr (though the content of the edata_t
 	 * object can be changed).
 	 */
-	edata_t *old_edata = emap_edata_lookup(tsd_tsdn(tsd),
-	    &arena_emap_global, ptr);
+	edata_t *old_edata = emap_edata_lookup(
+	    tsd_tsdn(tsd), &arena_emap_global, ptr);
 
 	emap_alloc_ctx_t alloc_ctx;
-	emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr,
-	    &alloc_ctx);
+	emap_alloc_ctx_lookup(
+	    tsd_tsdn(tsd), &arena_emap_global, ptr, &alloc_ctx);
 	assert(alloc_ctx.szind != SC_NSIZES);
 	old_usize = emap_alloc_ctx_usize_get(&alloc_ctx);
 	assert(old_usize == isalloc(tsd_tsdn(tsd), ptr));
@@ -3841,17 +3911,17 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	thread_alloc_event(tsd, usize);
 	thread_dalloc_event(tsd, old_usize);
 
-	if (config_fill && unlikely(opt_junk_alloc) && usize > old_usize &&
-	    !zero) {
+	if (config_fill && unlikely(opt_junk_alloc) && usize > old_usize
+	    && !zero) {
 		size_t excess_len = usize - old_usize;
-		void *excess_start = (void *)((byte_t *)ptr + old_usize);
+		void  *excess_start = (void *)((byte_t *)ptr + old_usize);
 		junk_alloc_callback(excess_start, excess_len);
 	}
 label_not_resized:
 	if (unlikely(!tsd_fast(tsd))) {
 		uintptr_t args[4] = {(uintptr_t)ptr, size, extra, flags};
-		hook_invoke_expand(hook_expand_xallocx, ptr, old_usize,
-		    usize, (uintptr_t)usize, args);
+		hook_invoke_expand(hook_expand_xallocx, ptr, old_usize, usize,
+		    (uintptr_t)usize, args);
 	}
 
 	UTRACE(ptr, size, ptr);
@@ -3862,9 +3932,8 @@ label_not_resized:
 }
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
-JEMALLOC_ATTR(pure)
-je_sallocx(const void *ptr, int flags) {
-	size_t usize;
+JEMALLOC_ATTR(pure) je_sallocx(const void *ptr, int flags) {
+	size_t  usize;
 	tsdn_t *tsdn;
 
 	LOG("core.sallocx.entry", "ptr: %p, flags: %d", ptr, flags);
@@ -3896,10 +3965,10 @@ je_dallocx(void *ptr, int flags) {
 	assert(malloc_initialized() || IS_INITIALIZER);
 
 	tsd_t *tsd = tsd_fetch_min();
-	bool fast = tsd_fast(tsd);
+	bool   fast = tsd_fast(tsd);
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
-	unsigned tcache_ind = mallocx_tcache_get(flags);
+	unsigned  tcache_ind = mallocx_tcache_get(flags);
 	tcache_t *tcache = tcache_get_from_ind(tsd, tcache_ind, !fast,
 	    /* is_alloc */ false);
 
@@ -3933,11 +4002,11 @@ sdallocx_default(void *ptr, size_t size, int flags) {
 	assert(malloc_initialized() || IS_INITIALIZER);
 
 	tsd_t *tsd = tsd_fetch_min();
-	bool fast = tsd_fast(tsd);
+	bool   fast = tsd_fast(tsd);
 	size_t usize = inallocx(tsd_tsdn(tsd), size, flags);
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
-	unsigned tcache_ind = mallocx_tcache_get(flags);
+	unsigned  tcache_ind = mallocx_tcache_get(flags);
 	tcache_t *tcache = tcache_get_from_ind(tsd, tcache_ind, !fast,
 	    /* is_alloc */ false);
 
@@ -3955,8 +4024,8 @@ sdallocx_default(void *ptr, size_t size, int flags) {
 
 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
 je_sdallocx(void *ptr, size_t size, int flags) {
-	LOG("core.sdallocx.entry", "ptr: %p, size: %zu, flags: %d", ptr,
-		size, flags);
+	LOG("core.sdallocx.entry", "ptr: %p, size: %zu, flags: %d", ptr, size,
+	    flags);
 
 	je_sdallocx_impl(ptr, size, flags);
 
@@ -3964,9 +4033,8 @@ je_sdallocx(void *ptr, size_t size, int flags) {
 }
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
-JEMALLOC_ATTR(pure)
-je_nallocx(size_t size, int flags) {
-	size_t usize;
+JEMALLOC_ATTR(pure) je_nallocx(size_t size, int flags) {
+	size_t  usize;
 	tsdn_t *tsdn;
 
 	assert(size != 0);
@@ -3991,9 +4059,9 @@ je_nallocx(size_t size, int flags) {
 }
 
 JEMALLOC_EXPORT int JEMALLOC_NOTHROW
-je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen) {
-	int ret;
+je_mallctl(
+    const char *name, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int    ret;
 	tsd_t *tsd;
 
 	LOG("core.mallctl.entry", "name: %s", name);
@@ -4034,8 +4102,8 @@ je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp) {
 
 JEMALLOC_EXPORT int JEMALLOC_NOTHROW
 je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-  void *newp, size_t newlen) {
-	int ret;
+    void *newp, size_t newlen) {
+	int    ret;
 	tsd_t *tsd;
 
 	LOG("core.mallctlbymib.entry", "");
@@ -4055,8 +4123,8 @@ je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 
 #define STATS_PRINT_BUFSIZE 65536
 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
-je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
-    const char *opts) {
+je_malloc_stats_print(
+    void (*write_cb)(void *, const char *), void *cbopaque, const char *opts) {
 	tsdn_t *tsdn;
 
 	LOG("core.malloc_stats_print.entry", "");
@@ -4127,12 +4195,12 @@ je_malloc_size(const void *ptr) {
 static void
 batch_alloc_prof_sample_assert(tsd_t *tsd, size_t batch, size_t usize) {
 	assert(config_prof && opt_prof);
-	bool prof_sample_event = te_prof_sample_event_lookahead(tsd,
-	    batch * usize);
+	bool prof_sample_event = te_prof_sample_event_lookahead(
+	    tsd, batch * usize);
 	assert(!prof_sample_event);
 	size_t surplus;
-	prof_sample_event = te_prof_sample_event_lookahead_surplus(tsd,
-	    (batch + 1) * usize, &surplus);
+	prof_sample_event = te_prof_sample_event_lookahead_surplus(
+	    tsd, (batch + 1) * usize, &surplus);
 	assert(prof_sample_event);
 	assert(surplus < usize);
 }
@@ -4157,14 +4225,14 @@ batch_alloc(void **ptrs, size_t num, size_t size, int flags) {
 		goto label_done;
 	}
 	szind_t ind = sz_size2index(usize);
-	bool zero = zero_get(MALLOCX_ZERO_GET(flags), /* slow */ true);
+	bool    zero = zero_get(MALLOCX_ZERO_GET(flags), /* slow */ true);
 
 	/*
 	 * The cache bin and arena will be lazily initialized; it's hard to
 	 * know in advance whether each of them needs to be initialized.
 	 */
 	cache_bin_t *bin = NULL;
-	arena_t *arena = NULL;
+	arena_t     *arena = NULL;
 
 	size_t nregs = 0;
 	if (likely(ind < SC_NBINS)) {
@@ -4175,10 +4243,10 @@ batch_alloc(void **ptrs, size_t num, size_t size, int flags) {
 	while (filled < num) {
 		size_t batch = num - filled;
 		size_t surplus = SIZE_MAX; /* Dead store. */
-		bool prof_sample_event = config_prof && opt_prof
+		bool   prof_sample_event = config_prof && opt_prof
 		    && prof_active_get_unlocked()
-		    && te_prof_sample_event_lookahead_surplus(tsd,
-		    batch * usize, &surplus);
+		    && te_prof_sample_event_lookahead_surplus(
+		        tsd, batch * usize, &surplus);
 
 		if (prof_sample_event) {
 			/*
@@ -4194,8 +4262,8 @@ batch_alloc(void **ptrs, size_t num, size_t size, int flags) {
 		if (likely(ind < SC_NBINS) && batch >= nregs) {
 			if (arena == NULL) {
 				unsigned arena_ind = mallocx_arena_get(flags);
-				if (arena_get_from_ind(tsd, arena_ind,
-				    &arena)) {
+				if (arena_get_from_ind(
+				        tsd, arena_ind, &arena)) {
 					goto label_done;
 				}
 				if (arena == NULL) {
@@ -4212,13 +4280,14 @@ batch_alloc(void **ptrs, size_t num, size_t size, int flags) {
 			filled += n;
 		}
 
-		unsigned tcache_ind = mallocx_tcache_get(flags);
+		unsigned  tcache_ind = mallocx_tcache_get(flags);
 		tcache_t *tcache = tcache_get_from_ind(tsd, tcache_ind,
 		    /* slow */ true, /* is_alloc */ true);
-		if (likely(tcache != NULL &&
-		    ind < tcache_nbins_get(tcache->tcache_slow) &&
-		    !tcache_bin_disabled(ind, &tcache->bins[ind],
-		    tcache->tcache_slow)) && progress < batch) {
+		if (likely(tcache != NULL
+		        && ind < tcache_nbins_get(tcache->tcache_slow)
+		        && !tcache_bin_disabled(
+		            ind, &tcache->bins[ind], tcache->tcache_slow))
+		    && progress < batch) {
 			if (bin == NULL) {
 				bin = &tcache->bins[ind];
 			}
@@ -4249,22 +4318,22 @@ batch_alloc(void **ptrs, size_t num, size_t size, int flags) {
 				 * additional benefit is that the tcache will
 				 * not be empty for the next allocation request.
 				 */
-				size_t n = cache_bin_alloc_batch(bin, bin_batch,
-				    ptrs + filled);
+				size_t n = cache_bin_alloc_batch(
+				    bin, bin_batch, ptrs + filled);
 				if (config_stats) {
 					bin->tstats.nrequests += n;
 				}
 				if (zero) {
 					for (size_t i = 0; i < n; ++i) {
-						memset(ptrs[filled + i], 0,
-						    usize);
+						memset(
+						    ptrs[filled + i], 0, usize);
 					}
 				}
 				if (config_prof && opt_prof
 				    && unlikely(ind >= SC_NBINS)) {
 					for (size_t i = 0; i < n; ++i) {
-						prof_tctx_reset_sampled(tsd,
-						    ptrs[filled + i]);
+						prof_tctx_reset_sampled(
+						    tsd, ptrs[filled + i]);
 					}
 				}
 				progress += n;
@@ -4340,7 +4409,7 @@ JEMALLOC_EXPORT void
 _malloc_prefork(void)
 #endif
 {
-	tsd_t *tsd;
+	tsd_t   *tsd;
 	unsigned i, j, narenas;
 	arena_t *arena;
 
@@ -4370,8 +4439,8 @@ _malloc_prefork(void)
 	/* Break arena prefork into stages to preserve lock order. */
 	for (i = 0; i < 9; i++) {
 		for (j = 0; j < narenas; j++) {
-			if ((arena = arena_get(tsd_tsdn(tsd), j, false)) !=
-			    NULL) {
+			if ((arena = arena_get(tsd_tsdn(tsd), j, false))
+			    != NULL) {
 				switch (i) {
 				case 0:
 					arena_prefork0(tsd_tsdn(tsd), arena);
@@ -4400,11 +4469,11 @@ _malloc_prefork(void)
 				case 8:
 					arena_prefork8(tsd_tsdn(tsd), arena);
 					break;
-				default: not_reached();
+				default:
+					not_reached();
 				}
 			}
 		}
-
 	}
 	prof_prefork1(tsd_tsdn(tsd));
 	stats_prefork(tsd_tsdn(tsd));
@@ -4419,7 +4488,7 @@ JEMALLOC_EXPORT void
 _malloc_postfork(void)
 #endif
 {
-	tsd_t *tsd;
+	tsd_t   *tsd;
 	unsigned i, narenas;
 
 #ifdef JEMALLOC_MUTEX_INIT_CB
@@ -4454,7 +4523,7 @@ _malloc_postfork(void)
 
 void
 jemalloc_postfork_child(void) {
-	tsd_t *tsd;
+	tsd_t   *tsd;
 	unsigned i, narenas;
 
 	assert(malloc_initialized());
diff --git a/src/jemalloc_cpp.cpp b/src/jemalloc_cpp.cpp
index fffd6aee..4e838d3b 100644
--- a/src/jemalloc_cpp.cpp
+++ b/src/jemalloc_cpp.cpp
@@ -24,45 +24,52 @@ extern "C" {
 //
 // ... but it needs to work with jemalloc namespaces.
 
-void	*operator new(std::size_t size);
-void	*operator new[](std::size_t size);
-void	*operator new(std::size_t size, const std::nothrow_t &) noexcept;
-void	*operator new[](std::size_t size, const std::nothrow_t &) noexcept;
-void	operator delete(void *ptr) noexcept;
-void	operator delete[](void *ptr) noexcept;
-void	operator delete(void *ptr, const std::nothrow_t &) noexcept;
-void	operator delete[](void *ptr, const std::nothrow_t &) noexcept;
+void *operator new(std::size_t size);
+void *operator new[](std::size_t size);
+void *operator new(std::size_t size, const std::nothrow_t &) noexcept;
+void *operator new[](std::size_t size, const std::nothrow_t &) noexcept;
+void  operator delete(void *ptr) noexcept;
+void  operator delete[](void *ptr) noexcept;
+void  operator delete(void *ptr, const std::nothrow_t &) noexcept;
+void  operator delete[](void *ptr, const std::nothrow_t &) noexcept;
 
 #if __cpp_sized_deallocation >= 201309
 /* C++14's sized-delete operators. */
-void	operator delete(void *ptr, std::size_t size) noexcept;
-void	operator delete[](void *ptr, std::size_t size) noexcept;
+void operator delete(void *ptr, std::size_t size) noexcept;
+void operator delete[](void *ptr, std::size_t size) noexcept;
 #endif
 
 #if __cpp_aligned_new >= 201606
 /* C++17's over-aligned operators. */
-void	*operator new(std::size_t size, std::align_val_t);
-void	*operator new(std::size_t size, std::align_val_t, const std::nothrow_t &) noexcept;
-void	*operator new[](std::size_t size, std::align_val_t);
-void	*operator new[](std::size_t size, std::align_val_t, const std::nothrow_t &) noexcept;
-void	operator delete(void* ptr, std::align_val_t) noexcept;
-void	operator delete(void* ptr, std::align_val_t, const std::nothrow_t &) noexcept;
-void	operator delete(void* ptr, std::size_t size, std::align_val_t al) noexcept;
-void	operator delete[](void* ptr, std::align_val_t) noexcept;
-void	operator delete[](void* ptr, std::align_val_t, const std::nothrow_t &) noexcept;
-void	operator delete[](void* ptr, std::size_t size, std::align_val_t al) noexcept;
+void *operator new(std::size_t size, std::align_val_t);
+void *operator new(
+    std::size_t size, std::align_val_t, const std::nothrow_t &) noexcept;
+void *operator new[](std::size_t size, std::align_val_t);
+void *operator new[](
+    std::size_t size, std::align_val_t, const std::nothrow_t &) noexcept;
+void operator delete(void *ptr, std::align_val_t) noexcept;
+void operator delete(
+    void *ptr, std::align_val_t, const std::nothrow_t &) noexcept;
+void operator delete(void *ptr, std::size_t size, std::align_val_t al) noexcept;
+void operator delete[](void *ptr, std::align_val_t) noexcept;
+void operator delete[](
+    void *ptr, std::align_val_t, const std::nothrow_t &) noexcept;
+void operator delete[](
+    void *ptr, std::size_t size, std::align_val_t al) noexcept;
 #endif
 
 JEMALLOC_NOINLINE
 static void *
 handleOOM(std::size_t size, bool nothrow) {
 	if (opt_experimental_infallible_new) {
-		const char *huge_warning = (size >= ((std::size_t)1 << 30)) ?
-		    "This may be caused by heap corruption, if the large size "
-		    "is unexpected (suggest building with sanitizers for "
-		    "debugging)." : "";
+		const char *huge_warning = (size >= ((std::size_t)1 << 30))
+		    ? "This may be caused by heap corruption, if the large size "
+		      "is unexpected (suggest building with sanitizers for "
+		      "debugging)."
+		    : "";
 
-		safety_check_fail("<jemalloc>: Allocation of size %zu failed. "
+		safety_check_fail(
+		    "<jemalloc>: Allocation of size %zu failed. "
 		    "%s opt.experimental_infallible_new is true. Aborting.\n",
 		    size, huge_warning);
 		return nullptr;
@@ -74,7 +81,7 @@ handleOOM(std::size_t size, bool nothrow) {
 		std::new_handler handler;
 		// GCC-4.8 and clang 4.0 do not have std::get_new_handler.
 		{
-			static std::mutex mtx;
+			static std::mutex           mtx;
 			std::lock_guard<std::mutex> lock(mtx);
 
 			handler = std::set_new_handler(nullptr);
@@ -98,8 +105,7 @@ handleOOM(std::size_t size, bool nothrow) {
 }
 
 template <bool IsNoExcept>
-JEMALLOC_NOINLINE
-static void *
+JEMALLOC_NOINLINE static void *
 fallbackNewImpl(std::size_t size) noexcept(IsNoExcept) {
 	void *ptr = malloc_default(size);
 	if (likely(ptr != nullptr)) {
@@ -109,12 +115,11 @@ fallbackNewImpl(std::size_t size) noexcept(IsNoExcept) {
 }
 
 template <bool IsNoExcept>
-JEMALLOC_ALWAYS_INLINE
-void *
+JEMALLOC_ALWAYS_INLINE void *
 newImpl(std::size_t size) noexcept(IsNoExcept) {
 	LOG("core.operator_new.entry", "size: %zu", size);
 
-	void * ret = imalloc_fastpath(size, &fallbackNewImpl<IsNoExcept>);
+	void *ret = imalloc_fastpath(size, &fallbackNewImpl<IsNoExcept>);
 
 	LOG("core.operator_new.exit", "result: %p", ret);
 	return ret;
@@ -143,9 +148,9 @@ operator new[](std::size_t size, const std::nothrow_t &) noexcept {
 #if __cpp_aligned_new >= 201606
 
 template <bool IsNoExcept>
-JEMALLOC_ALWAYS_INLINE
-void *
-alignedNewImpl(std::size_t size, std::align_val_t alignment) noexcept(IsNoExcept) {
+JEMALLOC_ALWAYS_INLINE void *
+alignedNewImpl(std::size_t size, std::align_val_t alignment) noexcept(
+    IsNoExcept) {
 	void *ptr = je_aligned_alloc(static_cast<std::size_t>(alignment), size);
 	if (likely(ptr != nullptr)) {
 		return ptr;
@@ -165,16 +170,18 @@ operator new[](std::size_t size, std::align_val_t alignment) {
 }
 
 void *
-operator new(std::size_t size, std::align_val_t alignment, const std::nothrow_t &) noexcept {
+operator new(std::size_t size, std::align_val_t alignment,
+    const std::nothrow_t &) noexcept {
 	return alignedNewImpl<true>(size, alignment);
 }
 
 void *
-operator new[](std::size_t size, std::align_val_t alignment, const std::nothrow_t &) noexcept {
+operator new[](std::size_t size, std::align_val_t alignment,
+    const std::nothrow_t &) noexcept {
 	return alignedNewImpl<true>(size, alignment);
 }
 
-#endif  // __cpp_aligned_new
+#endif // __cpp_aligned_new
 
 void
 operator delete(void *ptr) noexcept {
@@ -203,7 +210,8 @@ operator delete(void *ptr, const std::nothrow_t &) noexcept {
 	LOG("core.operator_delete.exit", "");
 }
 
-void operator delete[](void *ptr, const std::nothrow_t &) noexcept {
+void
+operator delete[](void *ptr, const std::nothrow_t &) noexcept {
 	LOG("core.operator_delete.entry", "ptr: %p", ptr);
 
 	je_free_impl(ptr);
@@ -215,7 +223,7 @@ void operator delete[](void *ptr, const std::nothrow_t &) noexcept {
 
 JEMALLOC_ALWAYS_INLINE
 void
-sizedDeleteImpl(void* ptr, std::size_t size) noexcept {
+sizedDeleteImpl(void *ptr, std::size_t size) noexcept {
 	if (unlikely(ptr == nullptr)) {
 		return;
 	}
@@ -236,14 +244,14 @@ operator delete[](void *ptr, std::size_t size) noexcept {
 	sizedDeleteImpl(ptr, size);
 }
 
-#endif  // __cpp_sized_deallocation
+#endif // __cpp_sized_deallocation
 
 #if __cpp_aligned_new >= 201606
 
 JEMALLOC_ALWAYS_INLINE
 void
-alignedSizedDeleteImpl(void* ptr, std::size_t size, std::align_val_t alignment)
-    noexcept {
+alignedSizedDeleteImpl(
+    void *ptr, std::size_t size, std::align_val_t alignment) noexcept {
 	if (config_debug) {
 		assert(((size_t)alignment & ((size_t)alignment - 1)) == 0);
 	}
@@ -259,7 +267,7 @@ alignedSizedDeleteImpl(void* ptr, std::size_t size, std::align_val_t alignment)
 }
 
 void
-operator delete(void* ptr, std::align_val_t) noexcept {
+operator delete(void *ptr, std::align_val_t) noexcept {
 	LOG("core.operator_delete.entry", "ptr: %p", ptr);
 
 	je_free_impl(ptr);
@@ -268,7 +276,7 @@ operator delete(void* ptr, std::align_val_t) noexcept {
 }
 
 void
-operator delete[](void* ptr, std::align_val_t) noexcept {
+operator delete[](void *ptr, std::align_val_t) noexcept {
 	LOG("core.operator_delete.entry", "ptr: %p", ptr);
 
 	je_free_impl(ptr);
@@ -277,7 +285,7 @@ operator delete[](void* ptr, std::align_val_t) noexcept {
 }
 
 void
-operator delete(void* ptr, std::align_val_t, const std::nothrow_t&) noexcept {
+operator delete(void *ptr, std::align_val_t, const std::nothrow_t &) noexcept {
 	LOG("core.operator_delete.entry", "ptr: %p", ptr);
 
 	je_free_impl(ptr);
@@ -286,7 +294,8 @@ operator delete(void* ptr, std::align_val_t, const std::nothrow_t&) noexcept {
 }
 
 void
-operator delete[](void* ptr, std::align_val_t, const std::nothrow_t&) noexcept {
+operator delete[](
+    void *ptr, std::align_val_t, const std::nothrow_t &) noexcept {
 	LOG("core.operator_delete.entry", "ptr: %p", ptr);
 
 	je_free_impl(ptr);
@@ -295,14 +304,16 @@ operator delete[](void* ptr, std::align_val_t, const std::nothrow_t&) noexcept {
 }
 
 void
-operator delete(void* ptr, std::size_t size, std::align_val_t alignment) noexcept {
+operator delete(
+    void *ptr, std::size_t size, std::align_val_t alignment) noexcept {
 	alignedSizedDeleteImpl(ptr, size, alignment);
 }
 
 void
-operator delete[](void* ptr, std::size_t size, std::align_val_t alignment) noexcept {
+operator delete[](
+    void *ptr, std::size_t size, std::align_val_t alignment) noexcept {
 	alignedSizedDeleteImpl(ptr, size, alignment);
 }
 
-#endif  // __cpp_aligned_new
+#endif // __cpp_aligned_new
 // NOLINTEND(misc-use-anonymous-namespace)
diff --git a/src/large.c b/src/large.c
index d78085f0..7cae61ae 100644
--- a/src/large.c
+++ b/src/large.c
@@ -18,10 +18,10 @@ large_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero) {
 }
 
 void *
-large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
-    bool zero) {
-	size_t ausize;
-	edata_t *edata;
+large_palloc(
+    tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, bool zero) {
+	size_t            ausize;
+	edata_t          *edata;
 	UNUSED bool idump JEMALLOC_CC_SILENCE_INIT(false);
 
 	assert(!tsdn_null(tsdn) || arena != NULL);
@@ -34,8 +34,10 @@ large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 	if (likely(!tsdn_null(tsdn))) {
 		arena = arena_choose_maybe_huge(tsdn_tsd(tsdn), arena, usize);
 	}
-	if (unlikely(arena == NULL) || (edata = arena_extent_alloc_large(tsdn,
-	    arena, usize, alignment, zero)) == NULL) {
+	if (unlikely(arena == NULL)
+	    || (edata = arena_extent_alloc_large(
+	            tsdn, arena, usize, alignment, zero))
+	        == NULL) {
 		return NULL;
 	}
 
@@ -53,10 +55,10 @@ large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 
 static bool
 large_ralloc_no_move_shrink(tsdn_t *tsdn, edata_t *edata, size_t usize) {
-	arena_t *arena = arena_get_from_edata(edata);
+	arena_t  *arena = arena_get_from_edata(edata);
 	ehooks_t *ehooks = arena_get_ehooks(arena);
-	size_t old_size = edata_size_get(edata);
-	size_t old_usize = edata_usize_get(edata);
+	size_t    old_size = edata_size_get(edata);
+	size_t    old_usize = edata_usize_get(edata);
 
 	assert(old_usize > usize);
 
@@ -80,8 +82,8 @@ large_ralloc_no_move_shrink(tsdn_t *tsdn, edata_t *edata, size_t usize) {
 }
 
 static bool
-large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
-    bool zero) {
+large_ralloc_no_move_expand(
+    tsdn_t *tsdn, edata_t *edata, size_t usize, bool zero) {
 	arena_t *arena = arena_get_from_edata(edata);
 
 	size_t old_size = edata_size_get(edata);
@@ -112,10 +114,10 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 			 * offset from the beginning of the extent is a multiple
 			 * of CACHELINE in [0 .. PAGE).
 			 */
-			void *zbase = (void *)
-			    ((byte_t *)edata_addr_get(edata) + old_usize);
-			void *zpast = PAGE_ADDR2BASE((void *)((byte_t *)zbase +
-			    PAGE));
+			void *zbase = (void *)((byte_t *)edata_addr_get(edata)
+			    + old_usize);
+			void *zpast = PAGE_ADDR2BASE(
+			    (void *)((byte_t *)zbase + PAGE));
 			size_t nzero = (byte_t *)zpast - (byte_t *)zbase;
 			assert(nzero > 0);
 			memset(zbase, 0, nzero);
@@ -134,19 +136,19 @@ large_ralloc_no_move(tsdn_t *tsdn, edata_t *edata, size_t usize_min,
 	/* The following should have been caught by callers. */
 	assert(usize_min > 0 && usize_max <= SC_LARGE_MAXCLASS);
 	/* Both allocation sizes must be large to avoid a move. */
-	assert(oldusize >= SC_LARGE_MINCLASS
-	    && usize_max >= SC_LARGE_MINCLASS);
+	assert(oldusize >= SC_LARGE_MINCLASS && usize_max >= SC_LARGE_MINCLASS);
 
 	if (usize_max > oldusize) {
 		/* Attempt to expand the allocation in-place. */
-		if (!large_ralloc_no_move_expand(tsdn, edata, usize_max,
-		    zero)) {
+		if (!large_ralloc_no_move_expand(
+		        tsdn, edata, usize_max, zero)) {
 			arena_decay_tick(tsdn, arena_get_from_edata(edata));
 			return false;
 		}
 		/* Try again, this time with usize_min. */
-		if (usize_min < usize_max && usize_min > oldusize &&
-		    large_ralloc_no_move_expand(tsdn, edata, usize_min, zero)) {
+		if (usize_min < usize_max && usize_min > oldusize
+		    && large_ralloc_no_move_expand(
+		        tsdn, edata, usize_min, zero)) {
 			arena_decay_tick(tsdn, arena_get_from_edata(edata));
 			return false;
 		}
@@ -172,8 +174,8 @@ large_ralloc_no_move(tsdn_t *tsdn, edata_t *edata, size_t usize_min,
 }
 
 static void *
-large_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize,
-    size_t alignment, bool zero) {
+large_ralloc_move_helper(
+    tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, bool zero) {
 	if (alignment <= CACHELINE) {
 		return large_malloc(tsdn, arena, usize, zero);
 	}
@@ -190,14 +192,13 @@ large_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t usize,
 	/* The following should have been caught by callers. */
 	assert(usize > 0 && usize <= SC_LARGE_MAXCLASS);
 	/* Both allocation sizes must be large to avoid a move. */
-	assert(oldusize >= SC_LARGE_MINCLASS
-	    && usize >= SC_LARGE_MINCLASS);
+	assert(oldusize >= SC_LARGE_MINCLASS && usize >= SC_LARGE_MINCLASS);
 
 	/* Try to avoid moving the allocation. */
 	if (!large_ralloc_no_move(tsdn, edata, usize, usize, zero)) {
-		hook_invoke_expand(hook_args->is_realloc
-		    ? hook_expand_realloc : hook_expand_rallocx, ptr, oldusize,
-		    usize, (uintptr_t)ptr, hook_args->args);
+		hook_invoke_expand(hook_args->is_realloc ? hook_expand_realloc
+		                                         : hook_expand_rallocx,
+		    ptr, oldusize, usize, (uintptr_t)ptr, hook_args->args);
 		return edata_addr_get(edata);
 	}
 
@@ -206,17 +207,18 @@ large_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t usize,
 	 * different size class.  In that case, fall back to allocating new
 	 * space and copying.
 	 */
-	void *ret = large_ralloc_move_helper(tsdn, arena, usize, alignment,
-	    zero);
+	void *ret = large_ralloc_move_helper(
+	    tsdn, arena, usize, alignment, zero);
 	if (ret == NULL) {
 		return NULL;
 	}
 
-	hook_invoke_alloc(hook_args->is_realloc
-	    ? hook_alloc_realloc : hook_alloc_rallocx, ret, (uintptr_t)ret,
-	    hook_args->args);
-	hook_invoke_dalloc(hook_args->is_realloc
-	    ? hook_dalloc_realloc : hook_dalloc_rallocx, ptr, hook_args->args);
+	hook_invoke_alloc(
+	    hook_args->is_realloc ? hook_alloc_realloc : hook_alloc_rallocx,
+	    ret, (uintptr_t)ret, hook_args->args);
+	hook_invoke_dalloc(
+	    hook_args->is_realloc ? hook_dalloc_realloc : hook_dalloc_rallocx,
+	    ptr, hook_args->args);
 
 	size_t copysize = (usize < oldusize) ? usize : oldusize;
 	memcpy(ret, edata_addr_get(edata), copysize);
@@ -228,8 +230,8 @@ large_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t usize,
  * locked indicates whether the arena's large_mtx is currently held.
  */
 static void
-large_dalloc_prep_impl(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
-    bool locked) {
+large_dalloc_prep_impl(
+    tsdn_t *tsdn, arena_t *arena, edata_t *edata, bool locked) {
 	if (!locked) {
 		/* See comments in arena_bin_slabs_full_insert(). */
 		if (!arena_is_auto(arena)) {
@@ -280,16 +282,16 @@ large_salloc(tsdn_t *tsdn, const edata_t *edata) {
 }
 
 void
-large_prof_info_get(tsd_t *tsd, edata_t *edata, prof_info_t *prof_info,
-    bool reset_recent) {
+large_prof_info_get(
+    tsd_t *tsd, edata_t *edata, prof_info_t *prof_info, bool reset_recent) {
 	assert(prof_info != NULL);
 
 	prof_tctx_t *alloc_tctx = edata_prof_tctx_get(edata);
 	prof_info->alloc_tctx = alloc_tctx;
 
 	if (prof_tctx_is_valid(alloc_tctx)) {
-		nstime_copy(&prof_info->alloc_time,
-		    edata_prof_alloc_time_get(edata));
+		nstime_copy(
+		    &prof_info->alloc_time, edata_prof_alloc_time_get(edata));
 		prof_info->alloc_size = edata_prof_alloc_size_get(edata);
 		if (reset_recent) {
 			/*
diff --git a/src/log.c b/src/log.c
index 778902fb..9b1c6261 100644
--- a/src/log.c
+++ b/src/log.c
@@ -3,7 +3,7 @@
 
 #include "jemalloc/internal/log.h"
 
-char log_var_names[JEMALLOC_LOG_VAR_BUFSIZE];
+char       log_var_names[JEMALLOC_LOG_VAR_BUFSIZE];
 atomic_b_t log_init_done = ATOMIC_INIT(false);
 
 /*
@@ -11,7 +11,7 @@ atomic_b_t log_init_done = ATOMIC_INIT(false);
  * with a pointer to the first character after the end of the string.
  */
 static const char *
-log_var_extract_segment(const char* segment_begin) {
+log_var_extract_segment(const char *segment_begin) {
 	const char *end;
 	for (end = segment_begin; *end != '\0' && *end != '|'; end++) {
 	}
@@ -30,12 +30,12 @@ log_var_matches_segment(const char *segment_begin, const char *segment_end,
 	if (segment_len == 1 && *segment_begin == '.') {
 		return true;
 	}
-        if (segment_len == log_var_len) {
+	if (segment_len == log_var_len) {
 		return strncmp(segment_begin, log_var_begin, segment_len) == 0;
 	} else if (segment_len < log_var_len) {
 		return strncmp(segment_begin, log_var_begin, segment_len) == 0
 		    && log_var_begin[segment_len] == '.';
-        } else {
+	} else {
 		return false;
 	}
 }
@@ -61,9 +61,9 @@ log_var_update_state(log_var_t *log_var) {
 		    segment_begin);
 		assert(segment_end < log_var_names + JEMALLOC_LOG_VAR_BUFSIZE);
 		if (log_var_matches_segment(segment_begin, segment_end,
-		    log_var_begin, log_var_end)) {
-			atomic_store_u(&log_var->state, LOG_ENABLED,
-			    ATOMIC_RELAXED);
+		        log_var_begin, log_var_end)) {
+			atomic_store_u(
+			    &log_var->state, LOG_ENABLED, ATOMIC_RELAXED);
 			return LOG_ENABLED;
 		}
 		if (*segment_end == '\0') {
diff --git a/src/malloc_io.c b/src/malloc_io.c
index 192d8208..0c5d6c03 100644
--- a/src/malloc_io.c
+++ b/src/malloc_io.c
@@ -5,63 +5,68 @@
 #include "jemalloc/internal/util.h"
 
 #ifdef assert
-#  undef assert
+#	undef assert
 #endif
 #ifdef not_reached
-#  undef not_reached
+#	undef not_reached
 #endif
 #ifdef not_implemented
-#  undef not_implemented
+#	undef not_implemented
 #endif
 #ifdef assert_not_implemented
-#  undef assert_not_implemented
+#	undef assert_not_implemented
 #endif
 
 /*
  * Define simple versions of assertion macros that won't recurse in case
  * of assertion failures in malloc_*printf().
  */
-#define assert(e) do {							\
-	if (config_debug && !(e)) {					\
-		malloc_write("<jemalloc>: Failed assertion\n");		\
-		abort();						\
-	}								\
-} while (0)
+#define assert(e)                                                              \
+	do {                                                                   \
+		if (config_debug && !(e)) {                                    \
+			malloc_write("<jemalloc>: Failed assertion\n");        \
+			abort();                                               \
+		}                                                              \
+	} while (0)
 
-#define not_reached() do {						\
-	if (config_debug) {						\
-		malloc_write("<jemalloc>: Unreachable code reached\n");	\
-		abort();						\
-	}								\
-	unreachable();							\
-} while (0)
+#define not_reached()                                                          \
+	do {                                                                   \
+		if (config_debug) {                                            \
+			malloc_write(                                          \
+			    "<jemalloc>: Unreachable code reached\n");         \
+			abort();                                               \
+		}                                                              \
+		unreachable();                                                 \
+	} while (0)
 
-#define not_implemented() do {						\
-	if (config_debug) {						\
-		malloc_write("<jemalloc>: Not implemented\n");		\
-		abort();						\
-	}								\
-} while (0)
+#define not_implemented()                                                      \
+	do {                                                                   \
+		if (config_debug) {                                            \
+			malloc_write("<jemalloc>: Not implemented\n");         \
+			abort();                                               \
+		}                                                              \
+	} while (0)
 
-#define assert_not_implemented(e) do {					\
-	if (unlikely(config_debug && !(e))) {				\
-		not_implemented();					\
-	}								\
-} while (0)
+#define assert_not_implemented(e)                                              \
+	do {                                                                   \
+		if (unlikely(config_debug && !(e))) {                          \
+			not_implemented();                                     \
+		}                                                              \
+	} while (0)
 
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */
 
 #define U2S_BUFSIZE ((1U << (LG_SIZEOF_INTMAX_T + 3)) + 1)
-static char *u2s(uintmax_t x, unsigned base, bool uppercase, char *s,
-    size_t *slen_p);
+static char *u2s(
+    uintmax_t x, unsigned base, bool uppercase, char *s, size_t *slen_p);
 #define D2S_BUFSIZE (1 + U2S_BUFSIZE)
 static char *d2s(intmax_t x, char sign, char *s, size_t *slen_p);
 #define O2S_BUFSIZE (1 + U2S_BUFSIZE)
 static char *o2s(uintmax_t x, bool alt_form, char *s, size_t *slen_p);
 #define X2S_BUFSIZE (2 + U2S_BUFSIZE)
-static char *x2s(uintmax_t x, bool alt_form, bool uppercase, char *s,
-    size_t *slen_p);
+static char *x2s(
+    uintmax_t x, bool alt_form, bool uppercase, char *s, size_t *slen_p);
 
 /******************************************************************************/
 
@@ -71,7 +76,7 @@ wrtmessage(void *cbopaque, const char *s) {
 	malloc_write_fd(STDERR_FILENO, s, strlen(s));
 }
 
-JEMALLOC_EXPORT void	(*je_malloc_message)(void *, const char *s);
+JEMALLOC_EXPORT void (*je_malloc_message)(void *, const char *s);
 
 /*
  * Wrapper around malloc_message() that avoids the need for
@@ -93,14 +98,15 @@ malloc_write(const char *s) {
 int
 buferror(int err, char *buf, size_t buflen) {
 #ifdef _WIN32
-	FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, NULL, err, 0,
-	    (LPSTR)buf, (DWORD)buflen, NULL);
+	FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, NULL, err, 0, (LPSTR)buf,
+	    (DWORD)buflen, NULL);
 	return 0;
-#elif defined(JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE) && defined(_GNU_SOURCE)
+#elif defined(JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE)                \
+    && defined(_GNU_SOURCE)
 	char *b = strerror_r(err, buf, buflen);
 	if (b != buf) {
 		strncpy(buf, b, buflen);
-		buf[buflen-1] = '\0';
+		buf[buflen - 1] = '\0';
 	}
 	return 0;
 #else
@@ -110,9 +116,9 @@ buferror(int err, char *buf, size_t buflen) {
 
 uintmax_t
 malloc_strtoumax(const char *restrict nptr, char **restrict endptr, int base) {
-	uintmax_t ret, digit;
-	unsigned b;
-	bool neg;
+	uintmax_t   ret, digit;
+	unsigned    b;
+	bool        neg;
 	const char *p, *ns;
 
 	p = nptr;
@@ -128,7 +134,12 @@ malloc_strtoumax(const char *restrict nptr, char **restrict endptr, int base) {
 	neg = false;
 	while (true) {
 		switch (*p) {
-		case '\t': case '\n': case '\v': case '\f': case '\r': case ' ':
+		case '\t':
+		case '\n':
+		case '\v':
+		case '\f':
+		case '\r':
+		case ' ':
 			p++;
 			break;
 		case '-':
@@ -142,8 +153,8 @@ malloc_strtoumax(const char *restrict nptr, char **restrict endptr, int base) {
 		}
 	}
 
-	/* Get prefix, if any. */
-	label_prefix:
+/* Get prefix, if any. */
+label_prefix:
 	/*
 	 * Note where the first non-whitespace/sign character is so that it is
 	 * possible to tell whether any digits are consumed (e.g., "  0" vs.
@@ -152,8 +163,14 @@ malloc_strtoumax(const char *restrict nptr, char **restrict endptr, int base) {
 	ns = p;
 	if (*p == '0') {
 		switch (p[1]) {
-		case '0': case '1': case '2': case '3': case '4': case '5':
-		case '6': case '7':
+		case '0':
+		case '1':
+		case '2':
+		case '3':
+		case '4':
+		case '5':
+		case '6':
+		case '7':
 			if (b == 0) {
 				b = 8;
 			}
@@ -161,13 +178,30 @@ malloc_strtoumax(const char *restrict nptr, char **restrict endptr, int base) {
 				p++;
 			}
 			break;
-		case 'X': case 'x':
+		case 'X':
+		case 'x':
 			switch (p[2]) {
-			case '0': case '1': case '2': case '3': case '4':
-			case '5': case '6': case '7': case '8': case '9':
-			case 'A': case 'B': case 'C': case 'D': case 'E':
+			case '0':
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+			case '8':
+			case '9':
+			case 'A':
+			case 'B':
+			case 'C':
+			case 'D':
+			case 'E':
 			case 'F':
-			case 'a': case 'b': case 'c': case 'd': case 'e':
+			case 'a':
+			case 'b':
+			case 'c':
+			case 'd':
+			case 'e':
 			case 'f':
 				if (b == 0) {
 					b = 16;
@@ -244,9 +278,8 @@ u2s(uintmax_t x, unsigned base, bool uppercase, char *s, size_t *slen_p) {
 		} while (x > 0);
 		break;
 	case 16: {
-		const char *digits = (uppercase)
-		    ? "0123456789ABCDEF"
-		    : "0123456789abcdef";
+		const char *digits = (uppercase) ? "0123456789ABCDEF"
+		                                 : "0123456789abcdef";
 
 		do {
 			i--;
@@ -254,7 +287,8 @@ u2s(uintmax_t x, unsigned base, bool uppercase, char *s, size_t *slen_p) {
 			x >>= 4;
 		} while (x > 0);
 		break;
-	} default: {
+	}
+	default: {
 		const char *digits = (uppercase)
 		    ? "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
 		    : "0123456789abcdefghijklmnopqrstuvwxyz";
@@ -265,7 +299,8 @@ u2s(uintmax_t x, unsigned base, bool uppercase, char *s, size_t *slen_p) {
 			s[i] = digits[x % (uint64_t)base];
 			x /= (uint64_t)base;
 		} while (x > 0);
-	}}
+	}
+	}
 
 	*slen_p = U2S_BUFSIZE - 1 - i;
 	return &s[i];
@@ -294,7 +329,8 @@ d2s(intmax_t x, char sign, char *s, size_t *slen_p) {
 		(*slen_p)++;
 		*s = sign;
 		break;
-	default: not_reached();
+	default:
+		not_reached();
 	}
 	return s;
 }
@@ -325,106 +361,112 @@ x2s(uintmax_t x, bool alt_form, bool uppercase, char *s, size_t *slen_p) {
 JEMALLOC_COLD
 size_t
 malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
-	size_t i;
+	size_t      i;
 	const char *f;
 
-#define APPEND_C(c) do {						\
-	if (i < size) {							\
-		str[i] = (c);						\
-	}								\
-	i++;								\
-} while (0)
-#define APPEND_S(s, slen) do {						\
-	if (i < size) {							\
-		size_t cpylen = (slen <= size - i) ? slen : size - i;	\
-		memcpy(&str[i], s, cpylen);				\
-	}								\
-	i += slen;							\
-} while (0)
-#define APPEND_PADDED_S(s, slen, width, left_justify) do {		\
-	/* Left padding. */						\
-	size_t pad_len = (width == -1) ? 0 : ((slen < (size_t)width) ?	\
-	    (size_t)width - slen : 0);					\
-	if (!left_justify && pad_len != 0) {				\
-		size_t j;						\
-		for (j = 0; j < pad_len; j++) {				\
-			if (pad_zero) {					\
-				APPEND_C('0');				\
-			} else {					\
-				APPEND_C(' ');				\
-			}						\
-		}							\
-	}								\
-	/* Value. */							\
-	APPEND_S(s, slen);						\
-	/* Right padding. */						\
-	if (left_justify && pad_len != 0) {				\
-		size_t j;						\
-		for (j = 0; j < pad_len; j++) {				\
-			APPEND_C(' ');					\
-		}							\
-	}								\
-} while (0)
-#define GET_ARG_NUMERIC(val, len) do {					\
-	switch ((unsigned char)len) {					\
-	case '?':							\
-		val = va_arg(ap, int);					\
-		break;							\
-	case '?' | 0x80:						\
-		val = va_arg(ap, unsigned int);				\
-		break;							\
-	case 'l':							\
-		val = va_arg(ap, long);					\
-		break;							\
-	case 'l' | 0x80:						\
-		val = va_arg(ap, unsigned long);			\
-		break;							\
-	case 'q':							\
-		val = va_arg(ap, long long);				\
-		break;							\
-	case 'q' | 0x80:						\
-		val = va_arg(ap, unsigned long long);			\
-		break;							\
-	case 'j':							\
-		val = va_arg(ap, intmax_t);				\
-		break;							\
-	case 'j' | 0x80:						\
-		val = va_arg(ap, uintmax_t);				\
-		break;							\
-	case 't':							\
-		val = va_arg(ap, ptrdiff_t);				\
-		break;							\
-	case 'z':							\
-		val = va_arg(ap, ssize_t);				\
-		break;							\
-	case 'z' | 0x80:						\
-		val = va_arg(ap, size_t);				\
-		break;							\
-	case 'p': /* Synthetic; used for %p. */				\
-		val = va_arg(ap, uintptr_t);				\
-		break;							\
-	default:							\
-		not_reached();						\
-		val = 0;						\
-	}								\
-} while (0)
+#define APPEND_C(c)                                                            \
+	do {                                                                   \
+		if (i < size) {                                                \
+			str[i] = (c);                                          \
+		}                                                              \
+		i++;                                                           \
+	} while (0)
+#define APPEND_S(s, slen)                                                      \
+	do {                                                                   \
+		if (i < size) {                                                \
+			size_t cpylen = (slen <= size - i) ? slen : size - i;  \
+			memcpy(&str[i], s, cpylen);                            \
+		}                                                              \
+		i += slen;                                                     \
+	} while (0)
+#define APPEND_PADDED_S(s, slen, width, left_justify)                          \
+	do {                                                                   \
+		/* Left padding. */                                            \
+		size_t pad_len = (width == -1)                                 \
+		    ? 0                                                        \
+		    : ((slen < (size_t)width) ? (size_t)width - slen : 0);     \
+		if (!left_justify && pad_len != 0) {                           \
+			size_t j;                                              \
+			for (j = 0; j < pad_len; j++) {                        \
+				if (pad_zero) {                                \
+					APPEND_C('0');                         \
+				} else {                                       \
+					APPEND_C(' ');                         \
+				}                                              \
+			}                                                      \
+		}                                                              \
+		/* Value. */                                                   \
+		APPEND_S(s, slen);                                             \
+		/* Right padding. */                                           \
+		if (left_justify && pad_len != 0) {                            \
+			size_t j;                                              \
+			for (j = 0; j < pad_len; j++) {                        \
+				APPEND_C(' ');                                 \
+			}                                                      \
+		}                                                              \
+	} while (0)
+#define GET_ARG_NUMERIC(val, len)                                              \
+	do {                                                                   \
+		switch ((unsigned char)len) {                                  \
+		case '?':                                                      \
+			val = va_arg(ap, int);                                 \
+			break;                                                 \
+		case '?' | 0x80:                                               \
+			val = va_arg(ap, unsigned int);                        \
+			break;                                                 \
+		case 'l':                                                      \
+			val = va_arg(ap, long);                                \
+			break;                                                 \
+		case 'l' | 0x80:                                               \
+			val = va_arg(ap, unsigned long);                       \
+			break;                                                 \
+		case 'q':                                                      \
+			val = va_arg(ap, long long);                           \
+			break;                                                 \
+		case 'q' | 0x80:                                               \
+			val = va_arg(ap, unsigned long long);                  \
+			break;                                                 \
+		case 'j':                                                      \
+			val = va_arg(ap, intmax_t);                            \
+			break;                                                 \
+		case 'j' | 0x80:                                               \
+			val = va_arg(ap, uintmax_t);                           \
+			break;                                                 \
+		case 't':                                                      \
+			val = va_arg(ap, ptrdiff_t);                           \
+			break;                                                 \
+		case 'z':                                                      \
+			val = va_arg(ap, ssize_t);                             \
+			break;                                                 \
+		case 'z' | 0x80:                                               \
+			val = va_arg(ap, size_t);                              \
+			break;                                                 \
+		case 'p': /* Synthetic; used for %p. */                        \
+			val = va_arg(ap, uintptr_t);                           \
+			break;                                                 \
+		default:                                                       \
+			not_reached();                                         \
+			val = 0;                                               \
+		}                                                              \
+	} while (0)
 
 	i = 0;
 	f = format;
 	while (true) {
 		switch (*f) {
-		case '\0': goto label_out;
+		case '\0':
+			goto label_out;
 		case '%': {
-			bool alt_form = false;
-			bool left_justify = false;
-			bool plus_space = false;
-			bool plus_plus = false;
-			int prec = -1;
-			int width = -1;
+			bool          alt_form = false;
+			bool          left_justify = false;
+			bool          plus_space = false;
+			bool          plus_plus = false;
+			int           prec = -1;
+			int           width = -1;
 			unsigned char len = '?';
-			char *s;
-			size_t slen;
-			bool pad_zero = false;
+			char         *s;
+			size_t        slen;
+			bool          pad_zero = false;
 
 			f++;
 			/* Flags. */
@@ -446,12 +488,13 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
 					assert(!plus_plus);
 					plus_plus = true;
 					break;
-				default: goto label_width;
+				default:
+					goto label_width;
 				}
 				f++;
 			}
-			/* Width. */
-			label_width:
+		/* Width. */
+		label_width:
 			switch (*f) {
 			case '*':
 				width = va_arg(ap, int);
@@ -464,16 +507,24 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
 			case '0':
 				pad_zero = true;
 				JEMALLOC_FALLTHROUGH;
-			case '1': case '2': case '3': case '4':
-			case '5': case '6': case '7': case '8': case '9': {
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+			case '8':
+			case '9': {
 				uintmax_t uwidth;
 				set_errno(0);
 				uwidth = malloc_strtoumax(f, (char **)&f, 10);
-				assert(uwidth != UINTMAX_MAX || get_errno() !=
-				    ERANGE);
+				assert(uwidth != UINTMAX_MAX
+				    || get_errno() != ERANGE);
 				width = (int)uwidth;
 				break;
-			} default:
+			}
+			default:
 				break;
 			}
 			/* Width/precision separator. */
@@ -488,20 +539,29 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
 				prec = va_arg(ap, int);
 				f++;
 				break;
-			case '0': case '1': case '2': case '3': case '4':
-			case '5': case '6': case '7': case '8': case '9': {
+			case '0':
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+			case '8':
+			case '9': {
 				uintmax_t uprec;
 				set_errno(0);
 				uprec = malloc_strtoumax(f, (char **)&f, 10);
-				assert(uprec != UINTMAX_MAX || get_errno() !=
-				    ERANGE);
+				assert(uprec != UINTMAX_MAX
+				    || get_errno() != ERANGE);
 				prec = (int)uprec;
 				break;
 			}
-			default: break;
+			default:
+				break;
 			}
-			/* Length. */
-			label_length:
+		/* Length. */
+		label_length:
 			switch (*f) {
 			case 'l':
 				f++;
@@ -512,11 +572,15 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
 					len = 'l';
 				}
 				break;
-			case 'q': case 'j': case 't': case 'z':
+			case 'q':
+			case 'j':
+			case 't':
+			case 'z':
 				len = *f;
 				f++;
 				break;
-			default: break;
+			default:
+				break;
 			}
 			/* Conversion specifier. */
 			switch (*f) {
@@ -525,9 +589,10 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
 				APPEND_C(*f);
 				f++;
 				break;
-			case 'd': case 'i': {
+			case 'd':
+			case 'i': {
 				intmax_t val JEMALLOC_CC_SILENCE_INIT(0);
-				char buf[D2S_BUFSIZE];
+				char         buf[D2S_BUFSIZE];
 
 				/*
 				 * Outputting negative, zero-padded numbers
@@ -542,41 +607,48 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
 				assert(!pad_zero);
 
 				GET_ARG_NUMERIC(val, len);
-				s = d2s(val, (plus_plus ? '+' : (plus_space ?
-				    ' ' : '-')), buf, &slen);
+				s = d2s(val,
+				    (plus_plus ? '+'
+				               : (plus_space ? ' ' : '-')),
+				    buf, &slen);
 				APPEND_PADDED_S(s, slen, width, left_justify);
 				f++;
 				break;
-			} case 'o': {
+			}
+			case 'o': {
 				uintmax_t val JEMALLOC_CC_SILENCE_INIT(0);
-				char buf[O2S_BUFSIZE];
+				char          buf[O2S_BUFSIZE];
 
 				GET_ARG_NUMERIC(val, len | 0x80);
 				s = o2s(val, alt_form, buf, &slen);
 				APPEND_PADDED_S(s, slen, width, left_justify);
 				f++;
 				break;
-			} case 'u': {
+			}
+			case 'u': {
 				uintmax_t val JEMALLOC_CC_SILENCE_INIT(0);
-				char buf[U2S_BUFSIZE];
+				char          buf[U2S_BUFSIZE];
 
 				GET_ARG_NUMERIC(val, len | 0x80);
 				s = u2s(val, 10, false, buf, &slen);
 				APPEND_PADDED_S(s, slen, width, left_justify);
 				f++;
 				break;
-			} case 'x': case 'X': {
+			}
+			case 'x':
+			case 'X': {
 				uintmax_t val JEMALLOC_CC_SILENCE_INIT(0);
-				char buf[X2S_BUFSIZE];
+				char          buf[X2S_BUFSIZE];
 
 				GET_ARG_NUMERIC(val, len | 0x80);
 				s = x2s(val, alt_form, *f == 'X', buf, &slen);
 				APPEND_PADDED_S(s, slen, width, left_justify);
 				f++;
 				break;
-			} case 'c': {
+			}
+			case 'c': {
 				unsigned char val;
-				char buf[2];
+				char          buf[2];
 
 				assert(len == '?' || len == 'l');
 				assert_not_implemented(len != 'l');
@@ -586,7 +658,8 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
 				APPEND_PADDED_S(buf, 1, width, left_justify);
 				f++;
 				break;
-			} case 's':
+			}
+			case 's':
 				assert(len == '?' || len == 'l');
 				assert_not_implemented(len != 'l');
 				s = va_arg(ap, char *);
@@ -596,23 +669,27 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
 				break;
 			case 'p': {
 				uintmax_t val;
-				char buf[X2S_BUFSIZE];
+				char      buf[X2S_BUFSIZE];
 
 				GET_ARG_NUMERIC(val, 'p');
 				s = x2s(val, true, false, buf, &slen);
 				APPEND_PADDED_S(s, slen, width, left_justify);
 				f++;
 				break;
-			} default: not_reached();
+			}
+			default:
+				not_reached();
 			}
 			break;
-		} default: {
+		}
+		default: {
 			APPEND_C(*f);
 			f++;
 			break;
-		}}
+		}
+		}
 	}
-	label_out:
+label_out:
 	if (i < size) {
 		str[i] = '\0';
 	} else {
@@ -629,7 +706,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
 JEMALLOC_FORMAT_PRINTF(3, 4)
 size_t
 malloc_snprintf(char *str, size_t size, const char *format, ...) {
-	size_t ret;
+	size_t  ret;
 	va_list ap;
 
 	va_start(ap, format);
@@ -640,8 +717,8 @@ malloc_snprintf(char *str, size_t size, const char *format, ...) {
 }
 
 void
-malloc_vcprintf(write_cb_t *write_cb, void *cbopaque, const char *format,
-    va_list ap) {
+malloc_vcprintf(
+    write_cb_t *write_cb, void *cbopaque, const char *format, va_list ap) {
 	char buf[MALLOC_PRINTF_BUFSIZE];
 
 	if (write_cb == NULL) {
@@ -650,8 +727,8 @@ malloc_vcprintf(write_cb_t *write_cb, void *cbopaque, const char *format,
 		 * function, so use the default one.  malloc_write() is an
 		 * inline function, so use malloc_message() directly here.
 		 */
-		write_cb = (je_malloc_message != NULL) ? je_malloc_message :
-		    wrtmessage;
+		write_cb = (je_malloc_message != NULL) ? je_malloc_message
+		                                       : wrtmessage;
 	}
 
 	malloc_vsnprintf(buf, sizeof(buf), format, ap);
diff --git a/src/mutex.c b/src/mutex.c
index 5655100d..aa2ab665 100644
--- a/src/mutex.c
+++ b/src/mutex.c
@@ -6,7 +6,7 @@
 #include "jemalloc/internal/spin.h"
 
 #if defined(_WIN32) && !defined(_CRT_SPINCOUNT)
-#define _CRT_SPINCOUNT 4000
+#	define _CRT_SPINCOUNT 4000
 #endif
 
 /*
@@ -22,8 +22,8 @@ int64_t opt_mutex_max_spin = 600;
 bool isthreaded = false;
 #endif
 #ifdef JEMALLOC_MUTEX_INIT_CB
-static bool		postpone_init = true;
-static malloc_mutex_t	*postponed_mutexes = NULL;
+static bool            postpone_init = true;
+static malloc_mutex_t *postponed_mutexes = NULL;
 #endif
 
 /******************************************************************************/
@@ -44,14 +44,14 @@ pthread_create(pthread_t *__restrict thread,
 /******************************************************************************/
 
 #ifdef JEMALLOC_MUTEX_INIT_CB
-JEMALLOC_EXPORT int	_pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex,
-    void *(calloc_cb)(size_t, size_t));
+JEMALLOC_EXPORT int _pthread_mutex_init_calloc_cb(
+    pthread_mutex_t *mutex, void *(calloc_cb)(size_t, size_t));
 #endif
 
 void
 malloc_mutex_lock_slow(malloc_mutex_t *mutex) {
 	mutex_prof_data_t *data = &mutex->prof_data;
-	nstime_t before;
+	nstime_t           before;
 
 	if (ncpus == 1) {
 		goto label_spin_done;
@@ -61,7 +61,7 @@ malloc_mutex_lock_slow(malloc_mutex_t *mutex) {
 	do {
 		spin_cpu_spinwait();
 		if (!atomic_load_b(&mutex->locked, ATOMIC_RELAXED)
-                    && !malloc_mutex_trylock_final(mutex)) {
+		    && !malloc_mutex_trylock_final(mutex)) {
 			data->n_spin_acquired++;
 			return;
 		}
@@ -77,8 +77,9 @@ label_spin_done:
 	/* Copy before to after to avoid clock skews. */
 	nstime_t after;
 	nstime_copy(&after, &before);
-	uint32_t n_thds = atomic_fetch_add_u32(&data->n_waiting_thds, 1,
-	    ATOMIC_RELAXED) + 1;
+	uint32_t n_thds = atomic_fetch_add_u32(
+	                      &data->n_waiting_thds, 1, ATOMIC_RELAXED)
+	    + 1;
 	/* One last try as above two calls may take quite some cycles. */
 	if (!malloc_mutex_trylock_final(mutex)) {
 		atomic_fetch_sub_u32(&data->n_waiting_thds, 1, ATOMIC_RELAXED);
@@ -137,27 +138,28 @@ mutex_addr_comp(const witness_t *witness1, void *mutex1,
 }
 
 bool
-malloc_mutex_init(malloc_mutex_t *mutex, const char *name,
-    witness_rank_t rank, malloc_mutex_lock_order_t lock_order) {
+malloc_mutex_init(malloc_mutex_t *mutex, const char *name, witness_rank_t rank,
+    malloc_mutex_lock_order_t lock_order) {
 	mutex_prof_data_init(&mutex->prof_data);
 #ifdef _WIN32
-#  if _WIN32_WINNT >= 0x0600
+#	if _WIN32_WINNT >= 0x0600
 	InitializeSRWLock(&mutex->lock);
-#  else
-	if (!InitializeCriticalSectionAndSpinCount(&mutex->lock,
-	    _CRT_SPINCOUNT)) {
+#	else
+	if (!InitializeCriticalSectionAndSpinCount(
+	        &mutex->lock, _CRT_SPINCOUNT)) {
 		return true;
 	}
-#  endif
+#	endif
 #elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
-       mutex->lock = OS_UNFAIR_LOCK_INIT;
+	mutex->lock = OS_UNFAIR_LOCK_INIT;
 #elif (defined(JEMALLOC_MUTEX_INIT_CB))
 	if (postpone_init) {
 		mutex->postponed_next = postponed_mutexes;
 		postponed_mutexes = mutex;
 	} else {
-		if (_pthread_mutex_init_calloc_cb(&mutex->lock,
-		    bootstrap_calloc) != 0) {
+		if (_pthread_mutex_init_calloc_cb(
+		        &mutex->lock, bootstrap_calloc)
+		    != 0) {
 			return true;
 		}
 	}
@@ -201,9 +203,10 @@ malloc_mutex_postfork_child(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 #ifdef JEMALLOC_MUTEX_INIT_CB
 	malloc_mutex_unlock(tsdn, mutex);
 #else
-	if (malloc_mutex_init(mutex, mutex->witness.name,
-	    mutex->witness.rank, mutex->lock_order)) {
-		malloc_printf("<jemalloc>: Error re-initializing mutex in "
+	if (malloc_mutex_init(mutex, mutex->witness.name, mutex->witness.rank,
+	        mutex->lock_order)) {
+		malloc_printf(
+		    "<jemalloc>: Error re-initializing mutex in "
 		    "child\n");
 		if (opt_abort) {
 			abort();
@@ -217,8 +220,9 @@ malloc_mutex_boot(void) {
 #ifdef JEMALLOC_MUTEX_INIT_CB
 	postpone_init = false;
 	while (postponed_mutexes != NULL) {
-		if (_pthread_mutex_init_calloc_cb(&postponed_mutexes->lock,
-		    bootstrap_calloc) != 0) {
+		if (_pthread_mutex_init_calloc_cb(
+		        &postponed_mutexes->lock, bootstrap_calloc)
+		    != 0) {
 			return true;
 		}
 		postponed_mutexes = postponed_mutexes->postponed_next;
diff --git a/src/nstime.c b/src/nstime.c
index 894753aa..ee2ddc51 100644
--- a/src/nstime.c
+++ b/src/nstime.c
@@ -5,8 +5,8 @@
 
 #include "jemalloc/internal/assert.h"
 
-#define BILLION	UINT64_C(1000000000)
-#define MILLION	UINT64_C(1000000)
+#define BILLION UINT64_C(1000000000)
+#define MILLION UINT64_C(1000000)
 
 static void
 nstime_set_initialized(nstime_t *time) {
@@ -22,8 +22,8 @@ nstime_assert_initialized(const nstime_t *time) {
 	 * Some parts (e.g. stats) rely on memset to zero initialize.  Treat
 	 * these as valid initialization.
 	 */
-	assert(time->magic == NSTIME_MAGIC ||
-	    (time->magic == 0 && time->ns == 0));
+	assert(
+	    time->magic == NSTIME_MAGIC || (time->magic == 0 && time->ns == 0));
 #endif
 }
 
@@ -133,8 +133,10 @@ nstime_isubtract(nstime_t *time, uint64_t subtrahend) {
 void
 nstime_imultiply(nstime_t *time, uint64_t multiplier) {
 	nstime_assert_initialized(time);
-	assert((((time->ns | multiplier) & (UINT64_MAX << (sizeof(uint64_t) <<
-	    2))) == 0) || ((time->ns * multiplier) / multiplier == time->ns));
+	assert(
+	    (((time->ns | multiplier) & (UINT64_MAX << (sizeof(uint64_t) << 2)))
+	        == 0)
+	    || ((time->ns * multiplier) / multiplier == time->ns));
 
 	nstime_initialize_operand(time);
 	time->ns *= multiplier;
@@ -178,7 +180,7 @@ nstime_ms_since(const nstime_t *past) {
 }
 
 #ifdef _WIN32
-#  define NSTIME_MONOTONIC false
+#	define NSTIME_MONOTONIC false
 static void
 nstime_get(nstime_t *time) {
 	FILETIME ft;
@@ -190,7 +192,7 @@ nstime_get(nstime_t *time) {
 	nstime_init(time, ticks_100ns * 100);
 }
 #elif defined(JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE)
-#  define NSTIME_MONOTONIC true
+#	define NSTIME_MONOTONIC true
 static void
 nstime_get(nstime_t *time) {
 	struct timespec ts;
@@ -199,7 +201,7 @@ nstime_get(nstime_t *time) {
 	nstime_init2(time, ts.tv_sec, ts.tv_nsec);
 }
 #elif defined(JEMALLOC_HAVE_CLOCK_MONOTONIC)
-#  define NSTIME_MONOTONIC true
+#	define NSTIME_MONOTONIC true
 static void
 nstime_get(nstime_t *time) {
 	struct timespec ts;
@@ -208,24 +210,24 @@ nstime_get(nstime_t *time) {
 	nstime_init2(time, ts.tv_sec, ts.tv_nsec);
 }
 #elif defined(JEMALLOC_HAVE_CLOCK_GETTIME_NSEC_NP)
-#  define NSTIME_MONOTONIC true
+#	define NSTIME_MONOTONIC true
 static void
 nstime_get(nstime_t *time) {
 	nstime_init(time, clock_gettime_nsec_np(CLOCK_UPTIME_RAW));
 }
 #elif defined(JEMALLOC_HAVE_MACH_ABSOLUTE_TIME)
-#  define NSTIME_MONOTONIC true
+#	define NSTIME_MONOTONIC true
 static void
 nstime_get(nstime_t *time) {
 	static mach_timebase_info_data_t sTimebaseInfo;
 	if (sTimebaseInfo.denom == 0) {
-		(void) mach_timebase_info(&sTimebaseInfo);
+		(void)mach_timebase_info(&sTimebaseInfo);
 	}
-	nstime_init(time, mach_absolute_time() * sTimebaseInfo.numer
-	    / sTimebaseInfo.denom);
+	nstime_init(time,
+	    mach_absolute_time() * sTimebaseInfo.numer / sTimebaseInfo.denom);
 }
 #else
-#  define NSTIME_MONOTONIC false
+#	define NSTIME_MONOTONIC false
 static void
 nstime_get(nstime_t *time) {
 	struct timeval tv;
@@ -242,15 +244,13 @@ nstime_monotonic_impl(void) {
 }
 nstime_monotonic_t *JET_MUTABLE nstime_monotonic = nstime_monotonic_impl;
 
-prof_time_res_t opt_prof_time_res =
-	prof_time_res_default;
+prof_time_res_t opt_prof_time_res = prof_time_res_default;
 
 const char *const prof_time_res_mode_names[] = {
-	"default",
-	"high",
+    "default",
+    "high",
 };
 
-
 static void
 nstime_get_realtime(nstime_t *time) {
 #if defined(JEMALLOC_HAVE_CLOCK_REALTIME) && !defined(_WIN32)
@@ -302,5 +302,3 @@ nstime_prof_init_update(nstime_t *time) {
 	nstime_init_zero(time);
 	nstime_prof_update(time);
 }
-
-
diff --git a/src/pa.c b/src/pa.c
index 7a24ae65..becf69b1 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -41,8 +41,8 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, pa_central_t *central,
 	}
 
 	if (pac_init(tsdn, &shard->pac, base, emap, &shard->edata_cache,
-	    cur_time, pac_oversize_threshold, dirty_decay_ms, muzzy_decay_ms,
-	    &stats->pac_stats, stats_mtx)) {
+	        cur_time, pac_oversize_threshold, dirty_decay_ms,
+	        muzzy_decay_ms, &stats->pac_stats, stats_mtx)) {
 		return true;
 	}
 
@@ -68,11 +68,11 @@ bool
 pa_shard_enable_hpa(tsdn_t *tsdn, pa_shard_t *shard,
     const hpa_shard_opts_t *hpa_opts, const sec_opts_t *hpa_sec_opts) {
 	if (hpa_shard_init(&shard->hpa_shard, &shard->central->hpa, shard->emap,
-	    shard->base, &shard->edata_cache, shard->ind, hpa_opts)) {
+	        shard->base, &shard->edata_cache, shard->ind, hpa_opts)) {
 		return true;
 	}
 	if (sec_init(tsdn, &shard->hpa_sec, shard->base, &shard->hpa_shard.pai,
-	    hpa_sec_opts)) {
+	        hpa_sec_opts)) {
 		return true;
 	}
 	shard->ever_used_hpa = true;
@@ -114,16 +114,16 @@ pa_shard_destroy(tsdn_t *tsdn, pa_shard_t *shard) {
 
 static pai_t *
 pa_get_pai(pa_shard_t *shard, edata_t *edata) {
-	return (edata_pai_get(edata) == EXTENT_PAI_PAC
-	    ? &shard->pac.pai : &shard->hpa_sec.pai);
+	return (edata_pai_get(edata) == EXTENT_PAI_PAC ? &shard->pac.pai
+	                                               : &shard->hpa_sec.pai);
 }
 
 edata_t *
 pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
     bool slab, szind_t szind, bool zero, bool guarded,
     bool *deferred_work_generated) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 	assert(!guarded || alignment <= PAGE);
 
 	edata_t *edata = NULL;
@@ -190,8 +190,8 @@ pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 	size_t shrink_amount = old_size - new_size;
 
 	pai_t *pai = pa_get_pai(shard, edata);
-	bool error = pai_shrink(tsdn, pai, edata, old_size, new_size,
-	    deferred_work_generated);
+	bool   error = pai_shrink(
+            tsdn, pai, edata, old_size, new_size, deferred_work_generated);
 	if (error) {
 		return true;
 	}
@@ -232,11 +232,11 @@ pa_decay_ms_get(pa_shard_t *shard, extent_state_t state) {
 }
 
 void
-pa_shard_set_deferral_allowed(tsdn_t *tsdn, pa_shard_t *shard,
-    bool deferral_allowed) {
+pa_shard_set_deferral_allowed(
+    tsdn_t *tsdn, pa_shard_t *shard, bool deferral_allowed) {
 	if (pa_shard_uses_hpa(shard)) {
-		hpa_shard_set_deferral_allowed(tsdn, &shard->hpa_shard,
-		    deferral_allowed);
+		hpa_shard_set_deferral_allowed(
+		    tsdn, &shard->hpa_shard, deferral_allowed);
 	}
 }
 
@@ -260,8 +260,8 @@ pa_shard_time_until_deferred_work(tsdn_t *tsdn, pa_shard_t *shard) {
 	}
 
 	if (pa_shard_uses_hpa(shard)) {
-		uint64_t hpa =
-		    pai_time_until_deferred_work(tsdn, &shard->hpa_shard.pai);
+		uint64_t hpa = pai_time_until_deferred_work(
+		    tsdn, &shard->hpa_shard.pai);
 		if (hpa < time) {
 			time = hpa;
 		}
diff --git a/src/pa_extra.c b/src/pa_extra.c
index 76507039..7c2498b7 100644
--- a/src/pa_extra.c
+++ b/src/pa_extra.c
@@ -94,8 +94,8 @@ pa_shard_nmuzzy(pa_shard_t *shard) {
 }
 
 void
-pa_shard_basic_stats_merge(pa_shard_t *shard, size_t *nactive, size_t *ndirty,
-    size_t *nmuzzy) {
+pa_shard_basic_stats_merge(
+    pa_shard_t *shard, size_t *nactive, size_t *ndirty, size_t *nmuzzy) {
 	*nactive += pa_shard_nactive(shard);
 	*ndirty += pa_shard_ndirty(shard);
 	*nmuzzy += pa_shard_nmuzzy(shard);
@@ -122,29 +122,29 @@ pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
 	locked_inc_u64_unsynchronized(
 	    &pa_shard_stats_out->pac_stats.decay_dirty.npurge,
 	    locked_read_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
-	    &shard->pac.stats->decay_dirty.npurge));
+	        &shard->pac.stats->decay_dirty.npurge));
 	locked_inc_u64_unsynchronized(
 	    &pa_shard_stats_out->pac_stats.decay_dirty.nmadvise,
 	    locked_read_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
-	    &shard->pac.stats->decay_dirty.nmadvise));
+	        &shard->pac.stats->decay_dirty.nmadvise));
 	locked_inc_u64_unsynchronized(
 	    &pa_shard_stats_out->pac_stats.decay_dirty.purged,
 	    locked_read_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
-	    &shard->pac.stats->decay_dirty.purged));
+	        &shard->pac.stats->decay_dirty.purged));
 
 	/* Muzzy decay stats */
 	locked_inc_u64_unsynchronized(
 	    &pa_shard_stats_out->pac_stats.decay_muzzy.npurge,
 	    locked_read_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
-	    &shard->pac.stats->decay_muzzy.npurge));
+	        &shard->pac.stats->decay_muzzy.npurge));
 	locked_inc_u64_unsynchronized(
 	    &pa_shard_stats_out->pac_stats.decay_muzzy.nmadvise,
 	    locked_read_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
-	    &shard->pac.stats->decay_muzzy.nmadvise));
+	        &shard->pac.stats->decay_muzzy.nmadvise));
 	locked_inc_u64_unsynchronized(
 	    &pa_shard_stats_out->pac_stats.decay_muzzy.purged,
 	    locked_read_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
-	    &shard->pac.stats->decay_muzzy.purged));
+	        &shard->pac.stats->decay_muzzy.purged));
 
 	atomic_load_add_store_zu(&pa_shard_stats_out->pac_stats.abandoned_vm,
 	    atomic_load_zu(&shard->pac.stats->abandoned_vm, ATOMIC_RELAXED));
@@ -157,8 +157,8 @@ pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
 		retained = ecache_nextents_get(&shard->pac.ecache_retained, i);
 		dirty_bytes = ecache_nbytes_get(&shard->pac.ecache_dirty, i);
 		muzzy_bytes = ecache_nbytes_get(&shard->pac.ecache_muzzy, i);
-		retained_bytes = ecache_nbytes_get(&shard->pac.ecache_retained,
-		    i);
+		retained_bytes = ecache_nbytes_get(
+		    &shard->pac.ecache_retained, i);
 
 		estats_out[i].ndirty = dirty;
 		estats_out[i].nmuzzy = muzzy;
diff --git a/src/pac.c b/src/pac.c
index 0e435717..361816e9 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -7,18 +7,18 @@
 static edata_t *pac_alloc_impl(tsdn_t *tsdn, pai_t *self, size_t size,
     size_t alignment, bool zero, bool guarded, bool frequent_reuse,
     bool *deferred_work_generated);
-static bool pac_expand_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated);
-static bool pac_shrink_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size, bool *deferred_work_generated);
-static void pac_dalloc_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    bool *deferred_work_generated);
+static bool     pac_expand_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+        size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated);
+static bool     pac_shrink_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+        size_t old_size, size_t new_size, bool *deferred_work_generated);
+static void     pac_dalloc_impl(
+        tsdn_t *tsdn, pai_t *self, edata_t *edata, bool *deferred_work_generated);
 static uint64_t pac_time_until_deferred_work(tsdn_t *tsdn, pai_t *self);
 
 static inline void
-pac_decay_data_get(pac_t *pac, extent_state_t state,
-    decay_t **r_decay, pac_decay_stats_t **r_decay_stats, ecache_t **r_ecache) {
-	switch(state) {
+pac_decay_data_get(pac_t *pac, extent_state_t state, decay_t **r_decay,
+    pac_decay_stats_t **r_decay_stats, ecache_t **r_ecache) {
+	switch (state) {
 	case extent_state_dirty:
 		*r_decay = &pac->decay_dirty;
 		*r_decay_stats = &pac->stats->decay_dirty;
@@ -51,7 +51,7 @@ pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
 	 * merging/splitting extents is non-trivial.
 	 */
 	if (ecache_init(tsdn, &pac->ecache_dirty, extent_state_dirty, ind,
-	    /* delay_coalesce */ true)) {
+	        /* delay_coalesce */ true)) {
 		return true;
 	}
 	/*
@@ -59,7 +59,7 @@ pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
 	 * the critical path much less often than for dirty extents.
 	 */
 	if (ecache_init(tsdn, &pac->ecache_muzzy, extent_state_muzzy, ind,
-	    /* delay_coalesce */ false)) {
+	        /* delay_coalesce */ false)) {
 		return true;
 	}
 	/*
@@ -68,17 +68,17 @@ pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
 	 * coalescing), but also because operations on retained extents are not
 	 * in the critical path.
 	 */
-	if (ecache_init(tsdn, &pac->ecache_retained, extent_state_retained,
-	    ind, /* delay_coalesce */ false)) {
+	if (ecache_init(tsdn, &pac->ecache_retained, extent_state_retained, ind,
+	        /* delay_coalesce */ false)) {
 		return true;
 	}
 	exp_grow_init(&pac->exp_grow);
 	if (malloc_mutex_init(&pac->grow_mtx, "extent_grow",
-	    WITNESS_RANK_EXTENT_GROW, malloc_mutex_rank_exclusive)) {
+	        WITNESS_RANK_EXTENT_GROW, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
-	atomic_store_zu(&pac->oversize_threshold, pac_oversize_threshold,
-	    ATOMIC_RELAXED);
+	atomic_store_zu(
+	    &pac->oversize_threshold, pac_oversize_threshold, ATOMIC_RELAXED);
 	if (decay_init(&pac->decay_dirty, cur_time, dirty_decay_ms)) {
 		return true;
 	}
@@ -112,7 +112,8 @@ pac_may_have_muzzy(pac_t *pac) {
 	return pac_decay_ms_get(pac, extent_state_muzzy) != 0;
 }
 
-static size_t pac_alloc_retained_batched_size(size_t size) {
+static size_t
+pac_alloc_retained_batched_size(size_t size) {
 	if (size > SC_LARGE_MAXCLASS) {
 		/*
 		 * A valid input with usize SC_LARGE_MAXCLASS could still
@@ -124,8 +125,8 @@ static size_t pac_alloc_retained_batched_size(size_t size) {
 	}
 	size_t batched_size = sz_s2u_compute_using_delta(size);
 	size_t next_hugepage_size = HUGEPAGE_CEILING(size);
-	return batched_size > next_hugepage_size? next_hugepage_size:
-	    batched_size;
+	return batched_size > next_hugepage_size ? next_hugepage_size
+	                                         : batched_size;
 }
 
 static edata_t *
@@ -162,8 +163,8 @@ pac_alloc_real(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size,
 	 * limits.  This choice should be reevaluated if
 	 * pac_alloc_retained_batched_size is changed to be more aggressive.
 	 */
-	if (sz_large_size_classes_disabled() && edata == NULL &&
-	    (maps_coalesce || opt_retain)) {
+	if (sz_large_size_classes_disabled() && edata == NULL
+	    && (maps_coalesce || opt_retain)) {
 		size_t batched_size = pac_alloc_retained_batched_size(size);
 		/*
 		 * Note that ecache_alloc_grow will try to retrieve virtual
@@ -173,12 +174,12 @@ pac_alloc_real(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size,
 		 * with opt_retain off.
 		 */
 		edata = ecache_alloc_grow(tsdn, pac, ehooks,
-		    &pac->ecache_retained, NULL, batched_size,
-		    alignment, zero, guarded);
+		    &pac->ecache_retained, NULL, batched_size, alignment, zero,
+		    guarded);
 
 		if (edata != NULL && batched_size > size) {
-			edata_t *trail = extent_split_wrapper(tsdn, pac,
-			    ehooks, edata, size, batched_size - size,
+			edata_t *trail = extent_split_wrapper(tsdn, pac, ehooks,
+			    edata, size, batched_size - size,
 			    /* holding_core_locks */ false);
 			if (trail == NULL) {
 				ecache_dalloc(tsdn, pac, ehooks,
@@ -203,8 +204,8 @@ pac_alloc_real(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size,
 	}
 
 	if (config_stats && newly_mapped_size != 0) {
-		atomic_fetch_add_zu(&pac->stats->pac_mapped,
-		    newly_mapped_size, ATOMIC_RELAXED);
+		atomic_fetch_add_zu(
+		    &pac->stats->pac_mapped, newly_mapped_size, ATOMIC_RELAXED);
 	}
 
 	return edata;
@@ -217,8 +218,8 @@ pac_alloc_new_guarded(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size,
 
 	edata_t *edata;
 	if (san_bump_enabled() && frequent_reuse) {
-		edata = san_bump_alloc(tsdn, &pac->sba, pac, ehooks, size,
-		    zero);
+		edata = san_bump_alloc(
+		    tsdn, &pac->sba, pac, ehooks, size, zero);
 	} else {
 		size_t size_with_guards = san_two_side_guarded_sz(size);
 		/* Alloc a non-guarded extent first.*/
@@ -227,12 +228,12 @@ pac_alloc_new_guarded(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size,
 		if (edata != NULL) {
 			/* Add guards around it. */
 			assert(edata_size_get(edata) == size_with_guards);
-			san_guard_pages_two_sided(tsdn, ehooks, edata,
-			    pac->emap, true);
+			san_guard_pages_two_sided(
+			    tsdn, ehooks, edata, pac->emap, true);
 		}
 	}
-	assert(edata == NULL || (edata_guarded_get(edata) &&
-	    edata_size_get(edata) == size));
+	assert(edata == NULL
+	    || (edata_guarded_get(edata) && edata_size_get(edata) == size));
 
 	return edata;
 }
@@ -241,7 +242,7 @@ static edata_t *
 pac_alloc_impl(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment,
     bool zero, bool guarded, bool frequent_reuse,
     bool *deferred_work_generated) {
-	pac_t *pac = (pac_t *)self;
+	pac_t    *pac = (pac_t *)self;
 	ehooks_t *ehooks = pac_ehooks_get(pac);
 
 	edata_t *edata = NULL;
@@ -252,13 +253,13 @@ pac_alloc_impl(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment,
 	 * for such allocations would always return NULL.
 	 * */
 	if (!guarded || frequent_reuse) {
-		edata =	pac_alloc_real(tsdn, pac, ehooks, size, alignment,
-		    zero, guarded);
+		edata = pac_alloc_real(
+		    tsdn, pac, ehooks, size, alignment, zero, guarded);
 	}
 	if (edata == NULL && guarded) {
 		/* No cached guarded extents; creating a new one. */
-		edata = pac_alloc_new_guarded(tsdn, pac, ehooks, size,
-		    alignment, zero, frequent_reuse);
+		edata = pac_alloc_new_guarded(
+		    tsdn, pac, ehooks, size, alignment, zero, frequent_reuse);
 	}
 
 	return edata;
@@ -267,7 +268,7 @@ pac_alloc_impl(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment,
 static bool
 pac_expand_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
     size_t new_size, bool zero, bool *deferred_work_generated) {
-	pac_t *pac = (pac_t *)self;
+	pac_t    *pac = (pac_t *)self;
 	ehooks_t *ehooks = pac_ehooks_get(pac);
 
 	size_t mapped_add = 0;
@@ -296,8 +297,8 @@ pac_expand_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
 		return true;
 	}
 	if (config_stats && mapped_add > 0) {
-		atomic_fetch_add_zu(&pac->stats->pac_mapped, mapped_add,
-		    ATOMIC_RELAXED);
+		atomic_fetch_add_zu(
+		    &pac->stats->pac_mapped, mapped_add, ATOMIC_RELAXED);
 	}
 	return false;
 }
@@ -305,7 +306,7 @@ pac_expand_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
 static bool
 pac_shrink_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
     size_t new_size, bool *deferred_work_generated) {
-	pac_t *pac = (pac_t *)self;
+	pac_t    *pac = (pac_t *)self;
 	ehooks_t *ehooks = pac_ehooks_get(pac);
 
 	size_t shrink_amount = old_size - new_size;
@@ -325,9 +326,9 @@ pac_shrink_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
 }
 
 static void
-pac_dalloc_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    bool *deferred_work_generated) {
-	pac_t *pac = (pac_t *)self;
+pac_dalloc_impl(
+    tsdn_t *tsdn, pai_t *self, edata_t *edata, bool *deferred_work_generated) {
+	pac_t    *pac = (pac_t *)self;
 	ehooks_t *ehooks = pac_ehooks_get(pac);
 
 	if (edata_guarded_get(edata)) {
@@ -344,10 +345,10 @@ pac_dalloc_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
 		 * guarded).
 		 */
 		if (!edata_slab_get(edata) || !maps_coalesce) {
-			assert(edata_size_get(edata) >= SC_LARGE_MINCLASS ||
-			    !maps_coalesce);
-			san_unguard_pages_two_sided(tsdn, ehooks, edata,
-			    pac->emap);
+			assert(edata_size_get(edata) >= SC_LARGE_MINCLASS
+			    || !maps_coalesce);
+			san_unguard_pages_two_sided(
+			    tsdn, ehooks, edata, pac->emap);
 		}
 	}
 
@@ -362,8 +363,8 @@ pac_ns_until_purge(tsdn_t *tsdn, decay_t *decay, size_t npages) {
 		/* Use minimal interval if decay is contended. */
 		return BACKGROUND_THREAD_DEFERRED_MIN;
 	}
-	uint64_t result = decay_ns_until_purge(decay, npages,
-	    ARENA_DEFERRED_PURGE_NPAGES_THRESHOLD);
+	uint64_t result = decay_ns_until_purge(
+	    decay, npages, ARENA_DEFERRED_PURGE_NPAGES_THRESHOLD);
 
 	malloc_mutex_unlock(tsdn, &decay->mtx);
 	return result;
@@ -372,18 +373,16 @@ pac_ns_until_purge(tsdn_t *tsdn, decay_t *decay, size_t npages) {
 static uint64_t
 pac_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) {
 	uint64_t time;
-	pac_t *pac = (pac_t *)self;
+	pac_t   *pac = (pac_t *)self;
 
-	time = pac_ns_until_purge(tsdn,
-	    &pac->decay_dirty,
-	    ecache_npages_get(&pac->ecache_dirty));
+	time = pac_ns_until_purge(
+	    tsdn, &pac->decay_dirty, ecache_npages_get(&pac->ecache_dirty));
 	if (time == BACKGROUND_THREAD_DEFERRED_MIN) {
 		return time;
 	}
 
-	uint64_t muzzy = pac_ns_until_purge(tsdn,
-	    &pac->decay_muzzy,
-	    ecache_npages_get(&pac->ecache_muzzy));
+	uint64_t muzzy = pac_ns_until_purge(
+	    tsdn, &pac->decay_muzzy, ecache_npages_get(&pac->ecache_muzzy));
 	if (muzzy < time) {
 		time = muzzy;
 	}
@@ -391,8 +390,8 @@ pac_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) {
 }
 
 bool
-pac_retain_grow_limit_get_set(tsdn_t *tsdn, pac_t *pac, size_t *old_limit,
-    size_t *new_limit) {
+pac_retain_grow_limit_get_set(
+    tsdn_t *tsdn, pac_t *pac, size_t *old_limit, size_t *new_limit) {
 	pszind_t new_ind JEMALLOC_CC_SILENCE_INIT(0);
 	if (new_limit != NULL) {
 		size_t limit = *new_limit;
@@ -418,15 +417,15 @@ static size_t
 pac_stash_decayed(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache,
     size_t npages_limit, size_t npages_decay_max,
     edata_list_inactive_t *result) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 	ehooks_t *ehooks = pac_ehooks_get(pac);
 
 	/* Stash extents according to npages_limit. */
 	size_t nstashed = 0;
 	while (nstashed < npages_decay_max) {
-		edata_t *edata = ecache_evict(tsdn, pac, ehooks, ecache,
-		    npages_limit);
+		edata_t *edata = ecache_evict(
+		    tsdn, pac, ehooks, ecache, npages_limit);
 		if (edata == NULL) {
 			break;
 		}
@@ -443,8 +442,8 @@ decay_with_process_madvise(edata_list_inactive_t *decay_extents) {
 #ifndef JEMALLOC_HAVE_PROCESS_MADVISE
 	return true;
 #else
-	assert(opt_process_madvise_max_batch <=
-	    PROCESS_MADVISE_MAX_BATCH_LIMIT);
+	assert(
+	    opt_process_madvise_max_batch <= PROCESS_MADVISE_MAX_BATCH_LIMIT);
 	size_t len = opt_process_madvise_max_batch;
 	VARIABLE_ARRAY(struct iovec, vec, len);
 
@@ -458,8 +457,8 @@ decay_with_process_madvise(edata_list_inactive_t *decay_extents) {
 		total_bytes += pages_bytes;
 		cur++;
 		if (cur == len) {
-			bool err = pages_purge_process_madvise(vec, len,
-			    total_bytes);
+			bool err = pages_purge_process_madvise(
+			    vec, len, total_bytes);
 			if (err) {
 				return true;
 			}
@@ -489,14 +488,14 @@ pac_decay_stashed(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
 	bool try_muzzy = !fully_decay
 	    && pac_decay_ms_get(pac, extent_state_muzzy) != 0;
 
-	bool purge_to_retained = !try_muzzy ||
-	    ecache->state == extent_state_muzzy;
+	bool purge_to_retained = !try_muzzy
+	    || ecache->state == extent_state_muzzy;
 	/*
 	 * Attempt process_madvise only if 1) enabled, 2) purging to retained,
 	 * and 3) not using custom hooks.
 	 */
-	bool try_process_madvise = (opt_process_madvise_max_batch > 0) &&
-	    purge_to_retained && ehooks_dalloc_will_fail(ehooks);
+	bool try_process_madvise = (opt_process_madvise_max_batch > 0)
+	    && purge_to_retained && ehooks_dalloc_will_fail(ehooks);
 
 	bool already_purged;
 	if (try_process_madvise) {
@@ -511,8 +510,8 @@ pac_decay_stashed(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
 		already_purged = false;
 	}
 
-	for (edata_t *edata = edata_list_inactive_first(decay_extents); edata !=
-	    NULL; edata = edata_list_inactive_first(decay_extents)) {
+	for (edata_t *edata = edata_list_inactive_first(decay_extents);
+	     edata != NULL; edata = edata_list_inactive_first(decay_extents)) {
 		edata_list_inactive_remove(decay_extents, edata);
 
 		size_t size = edata_size_get(edata);
@@ -524,8 +523,8 @@ pac_decay_stashed(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
 		switch (ecache->state) {
 		case extent_state_dirty:
 			if (try_muzzy) {
-				err = extent_purge_lazy_wrapper(tsdn, ehooks,
-				    edata, /* offset */ 0, size);
+				err = extent_purge_lazy_wrapper(
+				    tsdn, ehooks, edata, /* offset */ 0, size);
 				if (!err) {
 					ecache_dalloc(tsdn, pac, ehooks,
 					    &pac->ecache_muzzy, edata);
@@ -535,8 +534,8 @@ pac_decay_stashed(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
 			JEMALLOC_FALLTHROUGH;
 		case extent_state_muzzy:
 			if (already_purged) {
-				extent_dalloc_wrapper_purged(tsdn, pac, ehooks,
-				    edata);
+				extent_dalloc_wrapper_purged(
+				    tsdn, pac, ehooks, edata);
 			} else {
 				extent_dalloc_wrapper(tsdn, pac, ehooks, edata);
 			}
@@ -578,8 +577,8 @@ static void
 pac_decay_to_limit(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
     pac_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay,
     size_t npages_limit, size_t npages_decay_max) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 1);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 1);
 
 	if (decay->purging || npages_decay_max == 0) {
 		return;
@@ -589,8 +588,8 @@ pac_decay_to_limit(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
 
 	edata_list_inactive_t decay_extents;
 	edata_list_inactive_init(&decay_extents);
-	size_t npurge = pac_stash_decayed(tsdn, pac, ecache, npages_limit,
-	    npages_decay_max, &decay_extents);
+	size_t npurge = pac_stash_decayed(
+	    tsdn, pac, ecache, npages_limit, npages_decay_max, &decay_extents);
 	if (npurge != 0) {
 		size_t npurged = pac_decay_stashed(tsdn, pac, decay,
 		    decay_stats, ecache, fully_decay, &decay_extents);
@@ -611,8 +610,8 @@ pac_decay_all(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
 
 static void
 pac_decay_try_purge(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
-    pac_decay_stats_t *decay_stats, ecache_t *ecache,
-    size_t current_npages, size_t npages_limit) {
+    pac_decay_stats_t *decay_stats, ecache_t *ecache, size_t current_npages,
+    size_t npages_limit) {
 	if (current_npages > npages_limit) {
 		pac_decay_to_limit(tsdn, pac, decay, decay_stats, ecache,
 		    /* fully_decay */ false, npages_limit,
@@ -647,8 +646,8 @@ pac_maybe_decay_purge(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
 	nstime_t time;
 	nstime_init_update(&time);
 	size_t npages_current = ecache_npages_get(ecache);
-	bool epoch_advanced = decay_maybe_advance_epoch(decay, &time,
-	    npages_current);
+	bool   epoch_advanced = decay_maybe_advance_epoch(
+            decay, &time, npages_current);
 	if (eagerness == PAC_PURGE_ALWAYS
 	    || (epoch_advanced && eagerness == PAC_PURGE_ON_EPOCH_ADVANCE)) {
 		size_t npages_limit = decay_npages_limit_get(decay);
@@ -662,9 +661,9 @@ pac_maybe_decay_purge(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
 bool
 pac_decay_ms_set(tsdn_t *tsdn, pac_t *pac, extent_state_t state,
     ssize_t decay_ms, pac_purge_eagerness_t eagerness) {
-	decay_t *decay;
+	decay_t           *decay;
 	pac_decay_stats_t *decay_stats;
-	ecache_t *ecache;
+	ecache_t          *ecache;
 	pac_decay_data_get(pac, state, &decay, &decay_stats, &ecache);
 
 	if (!decay_ms_valid(decay_ms)) {
@@ -691,9 +690,9 @@ pac_decay_ms_set(tsdn_t *tsdn, pac_t *pac, extent_state_t state,
 
 ssize_t
 pac_decay_ms_get(pac_t *pac, extent_state_t state) {
-	decay_t *decay;
+	decay_t           *decay;
 	pac_decay_stats_t *decay_stats;
-	ecache_t *ecache;
+	ecache_t          *ecache;
 	pac_decay_data_get(pac, state, &decay, &decay_stats, &ecache);
 	return decay_ms_read(decay);
 }
@@ -722,9 +721,10 @@ pac_destroy(tsdn_t *tsdn, pac_t *pac) {
 	 * dss-based extents for later reuse.
 	 */
 	ehooks_t *ehooks = pac_ehooks_get(pac);
-	edata_t *edata;
-	while ((edata = ecache_evict(tsdn, pac, ehooks,
-	    &pac->ecache_retained, 0)) != NULL) {
+	edata_t  *edata;
+	while (
+	    (edata = ecache_evict(tsdn, pac, ehooks, &pac->ecache_retained, 0))
+	    != NULL) {
 		extent_destroy_wrapper(tsdn, pac, ehooks, edata);
 	}
 }
diff --git a/src/pages.c b/src/pages.c
index d53e0fef..88301c2b 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -8,46 +8,42 @@
 #include "jemalloc/internal/malloc_io.h"
 
 #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
-#include <sys/sysctl.h>
-#ifdef __FreeBSD__
-#include <vm/vm_param.h>
-#endif
+#	include <sys/sysctl.h>
+#	ifdef __FreeBSD__
+#		include <vm/vm_param.h>
+#	endif
 #endif
 #ifdef __NetBSD__
-#include <sys/bitops.h>	/* ilog2 */
+#	include <sys/bitops.h> /* ilog2 */
 #endif
 #ifdef JEMALLOC_HAVE_VM_MAKE_TAG
-#define PAGES_FD_TAG VM_MAKE_TAG(254U)
+#	define PAGES_FD_TAG VM_MAKE_TAG(254U)
 #else
-#define PAGES_FD_TAG -1
+#	define PAGES_FD_TAG -1
 #endif
 #if defined(JEMALLOC_HAVE_PRCTL) && defined(JEMALLOC_PAGEID)
-#include <sys/prctl.h>
-#ifndef PR_SET_VMA
-#define PR_SET_VMA 0x53564d41
-#define PR_SET_VMA_ANON_NAME 0
-#endif
+#	include <sys/prctl.h>
+#	ifndef PR_SET_VMA
+#		define PR_SET_VMA 0x53564d41
+#		define PR_SET_VMA_ANON_NAME 0
+#	endif
 #endif
 
 /******************************************************************************/
 /* Data. */
 
 /* Actual operating system page size, detected during bootstrap, <= PAGE. */
-size_t	os_page;
+size_t os_page;
 
 #ifndef _WIN32
-#  define PAGES_PROT_COMMIT (PROT_READ | PROT_WRITE)
-#  define PAGES_PROT_DECOMMIT (PROT_NONE)
-static int	mmap_flags;
+#	define PAGES_PROT_COMMIT (PROT_READ | PROT_WRITE)
+#	define PAGES_PROT_DECOMMIT (PROT_NONE)
+static int mmap_flags;
 #endif
-static bool	os_overcommits;
+static bool os_overcommits;
 
 const char *const thp_mode_names[] = {
-	"default",
-	"always",
-	"never",
-	"not supported"
-};
+    "default", "always", "never", "not supported"};
 thp_mode_t opt_thp = THP_MODE_DEFAULT;
 thp_mode_t init_system_thp_mode;
 
@@ -66,15 +62,16 @@ static int madvise_dont_need_zeros_is_faulty = -1;
  *
  *   [1]: https://patchwork.kernel.org/patch/10576637/
  */
-static int madvise_MADV_DONTNEED_zeroes_pages(void)
-{
+static int
+madvise_MADV_DONTNEED_zeroes_pages(void) {
 	size_t size = PAGE;
 
-	void * addr = mmap(NULL, size, PROT_READ|PROT_WRITE,
-	    MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+	void *addr = mmap(NULL, size, PROT_READ | PROT_WRITE,
+	    MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
 
 	if (addr == MAP_FAILED) {
-		malloc_write("<jemalloc>: Cannot allocate memory for "
+		malloc_write(
+		    "<jemalloc>: Cannot allocate memory for "
 		    "MADV_DONTNEED check\n");
 		if (opt_abort) {
 			abort();
@@ -94,7 +91,8 @@ static int madvise_MADV_DONTNEED_zeroes_pages(void)
 	}
 
 	if (munmap(addr, size) != 0) {
-		malloc_write("<jemalloc>: Cannot deallocate memory for "
+		malloc_write(
+		    "<jemalloc>: Cannot deallocate memory for "
 		    "MADV_DONTNEED check\n");
 		if (opt_abort) {
 			abort();
@@ -106,18 +104,18 @@ static int madvise_MADV_DONTNEED_zeroes_pages(void)
 #endif
 
 #ifdef JEMALLOC_PAGEID
-static int os_page_id(void *addr, size_t size, const char *name)
-{
-#ifdef JEMALLOC_HAVE_PRCTL
+static int
+os_page_id(void *addr, size_t size, const char *name) {
+#	ifdef JEMALLOC_HAVE_PRCTL
 	/*
 	 * While parsing `/proc/<pid>/maps` file, the block could appear as
 	 * 7f4836000000-7f4836800000 rw-p 00000000 00:00 0 [anon:jemalloc_pg_overcommit]`
 	 */
 	return prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, (uintptr_t)addr, size,
 	    (uintptr_t)name);
-#else
+#	else
 	return 0;
-#endif
+#	endif
 }
 #endif
 
@@ -156,7 +154,7 @@ os_pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
 	 */
 	{
 		int flags = mmap_flags;
-#ifdef __NetBSD__
+#	ifdef __NetBSD__
 		/*
 		 * On NetBSD PAGE for a platform is defined to the
 		 * maximum page size of all machine architectures
@@ -167,7 +165,7 @@ os_pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
 			unsigned int a = ilog2(MAX(alignment, PAGE));
 			flags |= MAP_ALIGNED(a);
 		}
-#endif
+#	endif
 		int prot = *commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
 
 		ret = mmap(addr, size, prot, flags, PAGES_FD_TAG, 0);
@@ -184,8 +182,8 @@ os_pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
 		ret = NULL;
 	}
 #endif
-	assert(ret == NULL || (addr == NULL && ret != addr) || (addr != NULL &&
-	    ret == addr));
+	assert(ret == NULL || (addr == NULL && ret != addr)
+	    || (addr != NULL && ret == addr));
 #ifdef JEMALLOC_PAGEID
 	int n = os_page_id(ret, size,
 	    os_overcommits ? "jemalloc_pg_overcommit" : "jemalloc_pg");
@@ -195,8 +193,8 @@ os_pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
 }
 
 static void *
-os_pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size,
-    bool *commit) {
+os_pages_trim(
+    void *addr, size_t alloc_size, size_t leadsize, size_t size, bool *commit) {
 	void *ret = (void *)((byte_t *)addr + leadsize);
 
 	assert(alloc_size >= leadsize + size);
@@ -237,13 +235,15 @@ os_pages_unmap(void *addr, size_t size) {
 		char buf[BUFERROR_BUF];
 
 		buferror(get_errno(), buf, sizeof(buf));
-		malloc_printf("<jemalloc>: Error in "
+		malloc_printf(
+		    "<jemalloc>: Error in "
 #ifdef _WIN32
 		    "VirtualFree"
 #else
 		    "munmap"
 #endif
-		    "(): %s\n", buf);
+		    "(): %s\n",
+		    buf);
 		if (opt_abort) {
 			abort();
 		}
@@ -350,13 +350,14 @@ os_pages_commit(void *addr, size_t size, bool commit) {
 	assert(PAGE_CEILING(size) == size);
 
 #ifdef _WIN32
-	return (commit ? (addr != VirtualAlloc(addr, size, MEM_COMMIT,
-	    PAGE_READWRITE)) : (!VirtualFree(addr, size, MEM_DECOMMIT)));
+	return (commit
+	        ? (addr != VirtualAlloc(addr, size, MEM_COMMIT, PAGE_READWRITE))
+	        : (!VirtualFree(addr, size, MEM_DECOMMIT)));
 #else
 	{
-		int prot = commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
-		void *result = mmap(addr, size, prot, mmap_flags | MAP_FIXED,
-		    PAGES_FD_TAG, 0);
+		int   prot = commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
+		void *result = mmap(
+		    addr, size, prot, mmap_flags | MAP_FIXED, PAGES_FD_TAG, 0);
 		if (result == MAP_FAILED) {
 			return true;
 		}
@@ -395,8 +396,8 @@ pages_decommit(void *addr, size_t size) {
 void
 pages_mark_guards(void *head, void *tail) {
 	assert(head != NULL || tail != NULL);
-	assert(head == NULL || tail == NULL ||
-	    (uintptr_t)head < (uintptr_t)tail);
+	assert(
+	    head == NULL || tail == NULL || (uintptr_t)head < (uintptr_t)tail);
 #ifdef JEMALLOC_HAVE_MPROTECT
 	if (head != NULL) {
 		mprotect(head, PAGE, PROT_NONE);
@@ -418,13 +419,12 @@ pages_mark_guards(void *head, void *tail) {
 void
 pages_unmark_guards(void *head, void *tail) {
 	assert(head != NULL || tail != NULL);
-	assert(head == NULL || tail == NULL ||
-	    (uintptr_t)head < (uintptr_t)tail);
+	assert(
+	    head == NULL || tail == NULL || (uintptr_t)head < (uintptr_t)tail);
 #ifdef JEMALLOC_HAVE_MPROTECT
-	bool head_and_tail = (head != NULL) && (tail != NULL);
-	size_t range = head_and_tail ?
-	    (uintptr_t)tail - (uintptr_t)head + PAGE :
-	    SIZE_T_MAX;
+	bool   head_and_tail = (head != NULL) && (tail != NULL);
+	size_t range = head_and_tail ? (uintptr_t)tail - (uintptr_t)head + PAGE
+	                             : SIZE_T_MAX;
 	/*
 	 * The amount of work that the kernel does in mprotect depends on the
 	 * range argument.  SC_LARGE_MINCLASS is an arbitrary threshold chosen
@@ -473,17 +473,18 @@ pages_purge_lazy(void *addr, size_t size) {
 	return false;
 #elif defined(JEMALLOC_PURGE_MADVISE_FREE)
 	return (madvise(addr, size,
-#  ifdef MADV_FREE
-	    MADV_FREE
-#  else
-	    JEMALLOC_MADV_FREE
-#  endif
-	    ) != 0);
-#elif defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \
-    !defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)
+#	ifdef MADV_FREE
+	            MADV_FREE
+#	else
+	            JEMALLOC_MADV_FREE
+#	endif
+	            )
+	    != 0);
+#elif defined(JEMALLOC_PURGE_MADVISE_DONTNEED)                                 \
+    && !defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)
 	return (madvise(addr, size, MADV_DONTNEED) != 0);
-#elif defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED) && \
-    !defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS)
+#elif defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED)                           \
+    && !defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS)
 	return (posix_madvise(addr, size, POSIX_MADV_DONTNEED) != 0);
 #else
 	not_reached();
@@ -499,14 +500,14 @@ pages_purge_forced(void *addr, size_t size) {
 		return true;
 	}
 
-#if defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \
-    defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)
-	return (unlikely(madvise_dont_need_zeros_is_faulty) ||
-	    madvise(addr, size, MADV_DONTNEED) != 0);
-#elif defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED) && \
-    defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS)
-	return (unlikely(madvise_dont_need_zeros_is_faulty) ||
-	    posix_madvise(addr, size, POSIX_MADV_DONTNEED) != 0);
+#if defined(JEMALLOC_PURGE_MADVISE_DONTNEED)                                   \
+    && defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)
+	return (unlikely(madvise_dont_need_zeros_is_faulty)
+	    || madvise(addr, size, MADV_DONTNEED) != 0);
+#elif defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED)                           \
+    && defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS)
+	return (unlikely(madvise_dont_need_zeros_is_faulty)
+	    || posix_madvise(addr, size, POSIX_MADV_DONTNEED) != 0);
 #elif defined(JEMALLOC_MAPS_COALESCE)
 	/* Try to overlay a new demand-zeroed mapping. */
 	return pages_commit(addr, size);
@@ -579,13 +580,13 @@ pages_collapse(void *addr, size_t size) {
 	 * means we can't call pages_collapse on freshly mapped memory region.
 	 * See madvise(2) man page for more details.
 	 */
-#if defined(JEMALLOC_HAVE_MADVISE_COLLAPSE) && \
-    (defined(MADV_COLLAPSE) || defined(JEMALLOC_MADV_COLLAPSE))
-#  if defined(MADV_COLLAPSE)
+#if defined(JEMALLOC_HAVE_MADVISE_COLLAPSE)                                    \
+    && (defined(MADV_COLLAPSE) || defined(JEMALLOC_MADV_COLLAPSE))
+#	if defined(MADV_COLLAPSE)
 	return (madvise(addr, size, MADV_COLLAPSE) != 0);
-#  elif defined(JEMALLOC_MADV_COLLAPSE)
+#	elif defined(JEMALLOC_MADV_COLLAPSE)
 	return (madvise(addr, size, JEMALLOC_MADV_COLLAPSE) != 0);
-#  endif
+#	endif
 #else
 	return true;
 #endif
@@ -618,8 +619,8 @@ pages_dodump(void *addr, size_t size) {
 }
 
 #ifdef JEMALLOC_HAVE_PROCESS_MADVISE
-#include <sys/mman.h>
-#include <sys/syscall.h>
+#	include <sys/mman.h>
+#	include <sys/syscall.h>
 static int pidfd;
 
 static bool
@@ -640,15 +641,16 @@ init_process_madvise(void) {
 	return false;
 }
 
-#ifdef SYS_process_madvise
-#define JE_SYS_PROCESS_MADVISE_NR SYS_process_madvise
-#else
-#define JE_SYS_PROCESS_MADVISE_NR EXPERIMENTAL_SYS_PROCESS_MADVISE_NR
-#endif
+#	ifdef SYS_process_madvise
+#		define JE_SYS_PROCESS_MADVISE_NR SYS_process_madvise
+#	else
+#		define JE_SYS_PROCESS_MADVISE_NR                              \
+			EXPERIMENTAL_SYS_PROCESS_MADVISE_NR
+#	endif
 
 static bool
-pages_purge_process_madvise_impl(void *vec, size_t vec_len,
-    size_t total_bytes) {
+pages_purge_process_madvise_impl(
+    void *vec, size_t vec_len, size_t total_bytes) {
 	size_t purged_bytes = (size_t)syscall(JE_SYS_PROCESS_MADVISE_NR, pidfd,
 	    (struct iovec *)vec, vec_len, MADV_DONTNEED, 0);
 
@@ -663,8 +665,8 @@ init_process_madvise(void) {
 }
 
 static bool
-pages_purge_process_madvise_impl(void *vec, size_t vec_len,
-    size_t total_bytes) {
+pages_purge_process_madvise_impl(
+    void *vec, size_t vec_len, size_t total_bytes) {
 	not_reached();
 	return true;
 }
@@ -700,11 +702,11 @@ os_page_detect(void) {
 #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
 static bool
 os_overcommits_sysctl(void) {
-	int vm_overcommit;
+	int    vm_overcommit;
 	size_t sz;
 
 	sz = sizeof(vm_overcommit);
-#if defined(__FreeBSD__) && defined(VM_OVERCOMMIT)
+#	if defined(__FreeBSD__) && defined(VM_OVERCOMMIT)
 	int mib[2];
 
 	mib[0] = CTL_VM;
@@ -712,11 +714,11 @@ os_overcommits_sysctl(void) {
 	if (sysctl(mib, 2, &vm_overcommit, &sz, NULL, 0) != 0) {
 		return false; /* Error. */
 	}
-#else
+#	else
 	if (sysctlbyname("vm.overcommit", &vm_overcommit, &sz, NULL, 0) != 0) {
 		return false; /* Error. */
 	}
-#endif
+#	endif
 
 	return ((vm_overcommit & 0x3) == 0);
 }
@@ -730,17 +732,18 @@ os_overcommits_sysctl(void) {
  */
 static bool
 os_overcommits_proc(void) {
-	int fd;
+	int  fd;
 	char buf[1];
 
-#if defined(O_CLOEXEC)
-	fd = malloc_open("/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC);
-#else
+#	if defined(O_CLOEXEC)
+	fd = malloc_open(
+	    "/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC);
+#	else
 	fd = malloc_open("/proc/sys/vm/overcommit_memory", O_RDONLY);
 	if (fd != -1) {
 		fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
 	}
-#endif
+#	endif
 
 	if (fd == -1) {
 		return false; /* Error. */
@@ -763,20 +766,20 @@ os_overcommits_proc(void) {
 #endif
 
 void
-pages_set_thp_state (void *ptr, size_t size) {
+pages_set_thp_state(void *ptr, size_t size) {
 	if (opt_thp == thp_mode_default || opt_thp == init_system_thp_mode) {
 		return;
 	}
-	assert(opt_thp != thp_mode_not_supported &&
-	    init_system_thp_mode != thp_mode_not_supported);
+	assert(opt_thp != thp_mode_not_supported
+	    && init_system_thp_mode != thp_mode_not_supported);
 
 	if (opt_thp == thp_mode_always
 	    && init_system_thp_mode != thp_mode_never) {
 		assert(init_system_thp_mode == thp_mode_default);
 		pages_huge_unaligned(ptr, size);
 	} else if (opt_thp == thp_mode_never) {
-		assert(init_system_thp_mode == thp_mode_default ||
-		    init_system_thp_mode == thp_mode_always);
+		assert(init_system_thp_mode == thp_mode_default
+		    || init_system_thp_mode == thp_mode_always);
 		pages_nohuge_unaligned(ptr, size);
 	}
 }
@@ -794,7 +797,7 @@ init_thp_state(void) {
 	static const char sys_state_madvise[] = "always [madvise] never\n";
 	static const char sys_state_always[] = "[always] madvise never\n";
 	static const char sys_state_never[] = "always madvise [never]\n";
-	char buf[sizeof(sys_state_madvise)];
+	char              buf[sizeof(sys_state_madvise)];
 
 	int fd = malloc_open(
 	    "/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
@@ -839,10 +842,13 @@ pages_boot(void) {
 
 #ifdef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS
 	if (!opt_trust_madvise) {
-		madvise_dont_need_zeros_is_faulty = !madvise_MADV_DONTNEED_zeroes_pages();
+		madvise_dont_need_zeros_is_faulty =
+		    !madvise_MADV_DONTNEED_zeroes_pages();
 		if (madvise_dont_need_zeros_is_faulty) {
-			malloc_write("<jemalloc>: MADV_DONTNEED does not work (memset will be used instead)\n");
-			malloc_write("<jemalloc>: (This is the expected behaviour if you are running under QEMU)\n");
+			malloc_write(
+			    "<jemalloc>: MADV_DONTNEED does not work (memset will be used instead)\n");
+			malloc_write(
+			    "<jemalloc>: (This is the expected behaviour if you are running under QEMU)\n");
 		}
 	} else {
 		/* In case opt_trust_madvise is disable,
@@ -859,11 +865,11 @@ pages_boot(void) {
 	os_overcommits = os_overcommits_sysctl();
 #elif defined(JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY)
 	os_overcommits = os_overcommits_proc();
-#  ifdef MAP_NORESERVE
+#	ifdef MAP_NORESERVE
 	if (os_overcommits) {
 		mmap_flags |= MAP_NORESERVE;
 	}
-#  endif
+#	endif
 #elif defined(__NetBSD__)
 	os_overcommits = true;
 #else
@@ -879,8 +885,9 @@ pages_boot(void) {
 #else
 	/* Detect lazy purge runtime support. */
 	if (pages_can_purge_lazy) {
-		bool committed = false;
-		void *madv_free_page = os_pages_map(NULL, PAGE, PAGE, &committed);
+		bool  committed = false;
+		void *madv_free_page = os_pages_map(
+		    NULL, PAGE, PAGE, &committed);
 		if (madv_free_page == NULL) {
 			return true;
 		}
diff --git a/src/pai.c b/src/pai.c
index e8cddfc3..3114e658 100644
--- a/src/pai.c
+++ b/src/pai.c
@@ -6,7 +6,7 @@ pai_alloc_batch_default(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
     edata_list_active_t *results, bool frequent_reuse,
     bool *deferred_work_generated) {
 	for (size_t i = 0; i < nallocs; i++) {
-		bool deferred_by_alloc = false;
+		bool     deferred_by_alloc = false;
 		edata_t *edata = pai_alloc(tsdn, self, size, PAGE,
 		    /* zero */ false, /* guarded */ false, frequent_reuse,
 		    &deferred_by_alloc);
@@ -20,8 +20,8 @@ pai_alloc_batch_default(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
 }
 
 void
-pai_dalloc_batch_default(tsdn_t *tsdn, pai_t *self,
-    edata_list_active_t *list, bool *deferred_work_generated) {
+pai_dalloc_batch_default(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list,
+    bool *deferred_work_generated) {
 	edata_t *edata;
 	while ((edata = edata_list_active_first(list)) != NULL) {
 		bool deferred_by_dalloc = false;
diff --git a/src/peak_event.c b/src/peak_event.c
index e7f3ced6..e7f54dba 100644
--- a/src/peak_event.c
+++ b/src/peak_event.c
@@ -12,7 +12,7 @@ void
 peak_event_update(tsd_t *tsd) {
 	uint64_t alloc = tsd_thread_allocated_get(tsd);
 	uint64_t dalloc = tsd_thread_deallocated_get(tsd);
-	peak_t *peak = tsd_peakp_get(tsd);
+	peak_t  *peak = tsd_peakp_get(tsd);
 	peak_update(peak, alloc, dalloc);
 }
 
@@ -32,7 +32,7 @@ void
 peak_event_zero(tsd_t *tsd) {
 	uint64_t alloc = tsd_thread_allocated_get(tsd);
 	uint64_t dalloc = tsd_thread_deallocated_get(tsd);
-	peak_t *peak = tsd_peakp_get(tsd);
+	peak_t  *peak = tsd_peakp_get(tsd);
 	peak_set_zero(peak, alloc, dalloc);
 }
 
@@ -65,8 +65,8 @@ peak_event_enabled(void) {
 
 /* Handles alloc and dalloc */
 te_base_cb_t peak_te_handler = {
-	.enabled = &peak_event_enabled,
-	.new_event_wait = &peak_event_new_event_wait,
-	.postponed_event_wait = &peak_event_postponed_event_wait,
-	.event_handler = &peak_event_handler,
+    .enabled = &peak_event_enabled,
+    .new_event_wait = &peak_event_new_event_wait,
+    .postponed_event_wait = &peak_event_postponed_event_wait,
+    .event_handler = &peak_event_handler,
 };
diff --git a/src/prof.c b/src/prof.c
index ec13afbd..a833fed5 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -24,21 +24,21 @@
 
 /* Data. */
 
-bool opt_prof = false;
-bool opt_prof_active = true;
-bool opt_prof_thread_active_init = true;
+bool     opt_prof = false;
+bool     opt_prof_active = true;
+bool     opt_prof_thread_active_init = true;
 unsigned opt_prof_bt_max = PROF_BT_MAX_DEFAULT;
-size_t opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT;
-ssize_t opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT;
-bool opt_prof_gdump = false;
-bool opt_prof_final = false;
-bool opt_prof_leak = false;
-bool opt_prof_leak_error = false;
-bool opt_prof_accum = false;
-bool opt_prof_pid_namespace = false;
-char opt_prof_prefix[PROF_DUMP_FILENAME_LEN];
-bool opt_prof_sys_thread_name = false;
-bool opt_prof_unbias = true;
+size_t   opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT;
+ssize_t  opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT;
+bool     opt_prof_gdump = false;
+bool     opt_prof_final = false;
+bool     opt_prof_leak = false;
+bool     opt_prof_leak_error = false;
+bool     opt_prof_accum = false;
+bool     opt_prof_pid_namespace = false;
+char     opt_prof_prefix[PROF_DUMP_FILENAME_LEN];
+bool     opt_prof_sys_thread_name = false;
+bool     opt_prof_unbias = true;
 
 /* Accessed via prof_sample_event_handler(). */
 static counter_accum_t prof_idump_accumulated;
@@ -47,28 +47,28 @@ static counter_accum_t prof_idump_accumulated;
  * Initialized as opt_prof_active, and accessed via
  * prof_active_[gs]et{_unlocked,}().
  */
-bool prof_active_state;
+bool                  prof_active_state;
 static malloc_mutex_t prof_active_mtx;
 
 /*
  * Initialized as opt_prof_thread_active_init, and accessed via
  * prof_thread_active_init_[gs]et().
  */
-static bool prof_thread_active_init;
+static bool           prof_thread_active_init;
 static malloc_mutex_t prof_thread_active_init_mtx;
 
 /*
  * Initialized as opt_prof_gdump, and accessed via
  * prof_gdump_[gs]et{_unlocked,}().
  */
-bool prof_gdump_val;
+bool                  prof_gdump_val;
 static malloc_mutex_t prof_gdump_mtx;
 
 uint64_t prof_interval = 0;
 
 size_t lg_prof_sample;
 
-static uint64_t next_thr_uid;
+static uint64_t       next_thr_uid;
 static malloc_mutex_t next_thr_uid_mtx;
 
 /* Do not dump any profiles until bootstrapping is complete. */
@@ -113,16 +113,16 @@ prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx) {
 }
 
 void
-prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
-    size_t usize, prof_tctx_t *tctx) {
+prof_malloc_sample_object(
+    tsd_t *tsd, const void *ptr, size_t size, size_t usize, prof_tctx_t *tctx) {
 	cassert(config_prof);
 
 	if (opt_prof_sys_thread_name) {
 		prof_sys_thread_name_fetch(tsd);
 	}
 
-	edata_t *edata = emap_edata_lookup(tsd_tsdn(tsd), &arena_emap_global,
-	    ptr);
+	edata_t *edata = emap_edata_lookup(
+	    tsd_tsdn(tsd), &arena_emap_global, ptr);
 	prof_info_set(tsd, edata, tctx, size);
 
 	szind_t szind = sz_size2index(usize);
@@ -173,8 +173,8 @@ prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
 }
 
 void
-prof_free_sampled_object(tsd_t *tsd, const void *ptr, size_t usize,
-    prof_info_t *prof_info) {
+prof_free_sampled_object(
+    tsd_t *tsd, const void *ptr, size_t usize, prof_info_t *prof_info) {
 	cassert(config_prof);
 
 	assert(prof_info != NULL);
@@ -279,10 +279,12 @@ prof_sample_new_event_wait(tsd_t *tsd) {
 	 * otherwise bytes_until_sample would be 0 if u is exactly 1.0.
 	 */
 	uint64_t r = prng_lg_range_u64(tsd_prng_statep_get(tsd), 53);
-	double u = (r == 0U) ? 1.0 : (double)((long double)r *
-	    (1.0L/9007199254740992.0L));
-	return (uint64_t)(log(u) /
-	    log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample))))
+	double   u = (r == 0U)
+	      ? 1.0
+	      : (double)((long double)r * (1.0L / 9007199254740992.0L));
+	return (uint64_t)(log(u)
+	           / log(
+	               1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample))))
 	    + (uint64_t)1U;
 #else
 	not_reached();
@@ -322,9 +324,9 @@ prof_sample_enabled(void) {
 }
 
 te_base_cb_t prof_sample_te_handler = {
-	.enabled = &prof_sample_enabled,
-	.new_event_wait = &prof_sample_new_event_wait,
-	/*
+    .enabled = &prof_sample_enabled,
+    .new_event_wait = &prof_sample_new_event_wait,
+    /*
 	 * The postponed wait time for prof sample event is computed as if we
 	 * want a new wait time (i.e. as if the event were triggered).  If we
 	 * instead postpone to the immediate next allocation, like how we're
@@ -332,8 +334,8 @@ te_base_cb_t prof_sample_te_handler = {
 	 * the allocation immediately following a reentrancy always comes from
 	 * the same stack trace.
 	*/
-	.postponed_event_wait = &prof_sample_new_event_wait,
-	.event_handler = &prof_sample_event_handler,
+    .postponed_event_wait = &prof_sample_new_event_wait,
+    .event_handler = &prof_sample_event_handler,
 };
 
 static void
@@ -361,7 +363,7 @@ prof_idump_accum_init(void) {
 
 void
 prof_idump(tsdn_t *tsdn) {
-	tsd_t *tsd;
+	tsd_t        *tsd;
 	prof_tdata_t *tdata;
 
 	cassert(config_prof);
@@ -400,7 +402,7 @@ prof_mdump(tsd_t *tsd, const char *filename) {
 
 void
 prof_gdump(tsdn_t *tsdn) {
-	tsd_t *tsd;
+	tsd_t        *tsd;
 	prof_tdata_t *tdata;
 
 	cassert(config_prof);
@@ -447,7 +449,7 @@ prof_tdata_t *
 prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata) {
 	uint64_t thr_uid = tdata->thr_uid;
 	uint64_t thr_discrim = tdata->thr_discrim + 1;
-	bool active = tdata->active;
+	bool     active = tdata->active;
 
 	/* Keep a local copy of the thread name, before detaching. */
 	prof_thread_name_assert(tdata);
@@ -455,8 +457,8 @@ prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata) {
 	strncpy(thread_name, tdata->thread_name, PROF_THREAD_NAME_MAX_LEN);
 	prof_tdata_detach(tsd, tdata);
 
-	return prof_tdata_init_impl(tsd, thr_uid, thr_discrim, thread_name,
-	    active);
+	return prof_tdata_init_impl(
+	    tsd, thr_uid, thr_discrim, thread_name, active);
 }
 
 void
@@ -595,8 +597,8 @@ prof_backtrace_hook_set(prof_backtrace_hook_t hook) {
 
 prof_backtrace_hook_t
 prof_backtrace_hook_get(void) {
-	return (prof_backtrace_hook_t)atomic_load_p(&prof_backtrace_hook,
-	    ATOMIC_ACQUIRE);
+	return (prof_backtrace_hook_t)atomic_load_p(
+	    &prof_backtrace_hook, ATOMIC_ACQUIRE);
 }
 
 void
@@ -606,8 +608,7 @@ prof_dump_hook_set(prof_dump_hook_t hook) {
 
 prof_dump_hook_t
 prof_dump_hook_get(void) {
-	return (prof_dump_hook_t)atomic_load_p(&prof_dump_hook,
-	    ATOMIC_ACQUIRE);
+	return (prof_dump_hook_t)atomic_load_p(&prof_dump_hook, ATOMIC_ACQUIRE);
 }
 
 void
@@ -617,8 +618,8 @@ prof_sample_hook_set(prof_sample_hook_t hook) {
 
 prof_sample_hook_t
 prof_sample_hook_get(void) {
-	return (prof_sample_hook_t)atomic_load_p(&prof_sample_hook,
-	    ATOMIC_ACQUIRE);
+	return (prof_sample_hook_t)atomic_load_p(
+	    &prof_sample_hook, ATOMIC_ACQUIRE);
 }
 
 void
@@ -628,16 +629,16 @@ prof_sample_free_hook_set(prof_sample_free_hook_t hook) {
 
 prof_sample_free_hook_t
 prof_sample_free_hook_get(void) {
-	return (prof_sample_free_hook_t)atomic_load_p(&prof_sample_free_hook,
-	    ATOMIC_ACQUIRE);
+	return (prof_sample_free_hook_t)atomic_load_p(
+	    &prof_sample_free_hook, ATOMIC_ACQUIRE);
 }
 
 void
 prof_boot0(void) {
 	cassert(config_prof);
 
-	memcpy(opt_prof_prefix, PROF_PREFIX_DEFAULT,
-	    sizeof(PROF_PREFIX_DEFAULT));
+	memcpy(
+	    opt_prof_prefix, PROF_PREFIX_DEFAULT, sizeof(PROF_PREFIX_DEFAULT));
 }
 
 void
@@ -661,8 +662,8 @@ prof_boot1(void) {
 		opt_prof_gdump = false;
 	} else if (opt_prof) {
 		if (opt_lg_prof_interval >= 0) {
-			prof_interval = (((uint64_t)1U) <<
-			    opt_lg_prof_interval);
+			prof_interval = (((uint64_t)1U)
+			    << opt_lg_prof_interval);
 		}
 	}
 }
@@ -676,41 +677,40 @@ prof_boot2(tsd_t *tsd, base_t *base) {
 	 * stats when opt_prof is false.
 	 */
 	if (malloc_mutex_init(&prof_active_mtx, "prof_active",
-	    WITNESS_RANK_PROF_ACTIVE, malloc_mutex_rank_exclusive)) {
+	        WITNESS_RANK_PROF_ACTIVE, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 	if (malloc_mutex_init(&prof_gdump_mtx, "prof_gdump",
-	    WITNESS_RANK_PROF_GDUMP, malloc_mutex_rank_exclusive)) {
+	        WITNESS_RANK_PROF_GDUMP, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 	if (malloc_mutex_init(&prof_thread_active_init_mtx,
-	    "prof_thread_active_init", WITNESS_RANK_PROF_THREAD_ACTIVE_INIT,
-	    malloc_mutex_rank_exclusive)) {
+	        "prof_thread_active_init", WITNESS_RANK_PROF_THREAD_ACTIVE_INIT,
+	        malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 	if (malloc_mutex_init(&bt2gctx_mtx, "prof_bt2gctx",
-	    WITNESS_RANK_PROF_BT2GCTX, malloc_mutex_rank_exclusive)) {
+	        WITNESS_RANK_PROF_BT2GCTX, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 	if (malloc_mutex_init(&tdatas_mtx, "prof_tdatas",
-	    WITNESS_RANK_PROF_TDATAS, malloc_mutex_rank_exclusive)) {
+	        WITNESS_RANK_PROF_TDATAS, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 	if (malloc_mutex_init(&next_thr_uid_mtx, "prof_next_thr_uid",
-	    WITNESS_RANK_PROF_NEXT_THR_UID, malloc_mutex_rank_exclusive)) {
+	        WITNESS_RANK_PROF_NEXT_THR_UID, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 	if (malloc_mutex_init(&prof_stats_mtx, "prof_stats",
-	    WITNESS_RANK_PROF_STATS, malloc_mutex_rank_exclusive)) {
+	        WITNESS_RANK_PROF_STATS, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
-	if (malloc_mutex_init(&prof_dump_filename_mtx,
-	    "prof_dump_filename", WITNESS_RANK_PROF_DUMP_FILENAME,
-	    malloc_mutex_rank_exclusive)) {
+	if (malloc_mutex_init(&prof_dump_filename_mtx, "prof_dump_filename",
+	        WITNESS_RANK_PROF_DUMP_FILENAME, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 	if (malloc_mutex_init(&prof_dump_mtx, "prof_dump",
-	    WITNESS_RANK_PROF_DUMP, malloc_mutex_rank_exclusive)) {
+	        WITNESS_RANK_PROF_DUMP, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 
@@ -730,8 +730,8 @@ prof_boot2(tsd_t *tsd, base_t *base) {
 			return true;
 		}
 
-		if (opt_prof_final && opt_prof_prefix[0] != '\0' &&
-		    atexit(prof_fdump) != 0) {
+		if (opt_prof_final && opt_prof_prefix[0] != '\0'
+		    && atexit(prof_fdump) != 0) {
 			malloc_write("<jemalloc>: Error in atexit()\n");
 			if (opt_abort) {
 				abort();
@@ -755,8 +755,8 @@ prof_boot2(tsd_t *tsd, base_t *base) {
 		}
 		for (unsigned i = 0; i < PROF_NCTX_LOCKS; i++) {
 			if (malloc_mutex_init(&gctx_locks[i], "prof_gctx",
-			    WITNESS_RANK_PROF_GCTX,
-			    malloc_mutex_rank_exclusive)) {
+			        WITNESS_RANK_PROF_GCTX,
+			        malloc_mutex_rank_exclusive)) {
 				return true;
 			}
 		}
@@ -768,8 +768,8 @@ prof_boot2(tsd_t *tsd, base_t *base) {
 		}
 		for (unsigned i = 0; i < PROF_NTDATA_LOCKS; i++) {
 			if (malloc_mutex_init(&tdata_locks[i], "prof_tdata",
-			    WITNESS_RANK_PROF_TDATA,
-			    malloc_mutex_rank_exclusive)) {
+			        WITNESS_RANK_PROF_TDATA,
+			        malloc_mutex_rank_exclusive)) {
 				return true;
 			}
 		}
@@ -820,8 +820,8 @@ prof_postfork_parent(tsdn_t *tsdn) {
 	if (config_prof && opt_prof) {
 		unsigned i;
 
-		malloc_mutex_postfork_parent(tsdn,
-		    &prof_thread_active_init_mtx);
+		malloc_mutex_postfork_parent(
+		    tsdn, &prof_thread_active_init_mtx);
 		malloc_mutex_postfork_parent(tsdn, &next_thr_uid_mtx);
 		malloc_mutex_postfork_parent(tsdn, &prof_stats_mtx);
 		malloc_mutex_postfork_parent(tsdn, &prof_recent_alloc_mtx);
diff --git a/src/prof_data.c b/src/prof_data.c
index edc5c558..7aa047ac 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -36,7 +36,7 @@ malloc_mutex_t prof_dump_mtx;
  * and destroying mutexes causes complications for systems that allocate when
  * creating/destroying mutexes.
  */
-malloc_mutex_t *gctx_locks;
+malloc_mutex_t   *gctx_locks;
 static atomic_u_t cum_gctxs; /* Atomic counter. */
 
 /*
@@ -69,33 +69,32 @@ static int
 prof_tctx_comp(const prof_tctx_t *a, const prof_tctx_t *b) {
 	uint64_t a_thr_uid = a->thr_uid;
 	uint64_t b_thr_uid = b->thr_uid;
-	int ret = (a_thr_uid > b_thr_uid) - (a_thr_uid < b_thr_uid);
+	int      ret = (a_thr_uid > b_thr_uid) - (a_thr_uid < b_thr_uid);
 	if (ret == 0) {
 		uint64_t a_thr_discrim = a->thr_discrim;
 		uint64_t b_thr_discrim = b->thr_discrim;
-		ret = (a_thr_discrim > b_thr_discrim) - (a_thr_discrim <
-		    b_thr_discrim);
+		ret = (a_thr_discrim > b_thr_discrim)
+		    - (a_thr_discrim < b_thr_discrim);
 		if (ret == 0) {
 			uint64_t a_tctx_uid = a->tctx_uid;
 			uint64_t b_tctx_uid = b->tctx_uid;
-			ret = (a_tctx_uid > b_tctx_uid) - (a_tctx_uid <
-			    b_tctx_uid);
+			ret = (a_tctx_uid > b_tctx_uid)
+			    - (a_tctx_uid < b_tctx_uid);
 		}
 	}
 	return ret;
 }
 
 /* NOLINTBEGIN(performance-no-int-to-ptr) */
-rb_gen(static UNUSED, tctx_tree_, prof_tctx_tree_t, prof_tctx_t,
-    tctx_link, prof_tctx_comp)
-/* NOLINTEND(performance-no-int-to-ptr) */
+rb_gen(static UNUSED, tctx_tree_, prof_tctx_tree_t, prof_tctx_t, tctx_link,
+    prof_tctx_comp)
+    /* NOLINTEND(performance-no-int-to-ptr) */
 
-static int
-prof_gctx_comp(const prof_gctx_t *a, const prof_gctx_t *b) {
+    static int prof_gctx_comp(const prof_gctx_t *a, const prof_gctx_t *b) {
 	unsigned a_len = a->bt.len;
 	unsigned b_len = b->bt.len;
 	unsigned comp_len = (a_len < b_len) ? a_len : b_len;
-	int ret = memcmp(a->bt.vec, b->bt.vec, comp_len * sizeof(void *));
+	int      ret = memcmp(a->bt.vec, b->bt.vec, comp_len * sizeof(void *));
 	if (ret == 0) {
 		ret = (a_len > b_len) - (a_len < b_len);
 	}
@@ -105,11 +104,10 @@ prof_gctx_comp(const prof_gctx_t *a, const prof_gctx_t *b) {
 /* NOLINTBEGIN(performance-no-int-to-ptr) */
 rb_gen(static UNUSED, gctx_tree_, prof_gctx_tree_t, prof_gctx_t, dump_link,
     prof_gctx_comp)
-/* NOLINTEND(performance-no-int-to-ptr) */
+    /* NOLINTEND(performance-no-int-to-ptr) */
 
-static int
-prof_tdata_comp(const prof_tdata_t *a, const prof_tdata_t *b) {
-	int ret;
+    static int prof_tdata_comp(const prof_tdata_t *a, const prof_tdata_t *b) {
+	int      ret;
 	uint64_t a_uid = a->thr_uid;
 	uint64_t b_uid = b->thr_uid;
 
@@ -126,12 +124,11 @@ prof_tdata_comp(const prof_tdata_t *a, const prof_tdata_t *b) {
 /* NOLINTBEGIN(performance-no-int-to-ptr) */
 rb_gen(static UNUSED, tdata_tree_, prof_tdata_tree_t, prof_tdata_t, tdata_link,
     prof_tdata_comp)
-/* NOLINTEND(performance-no-int-to-ptr) */
+    /* NOLINTEND(performance-no-int-to-ptr) */
 
-/******************************************************************************/
+    /******************************************************************************/
 
-static malloc_mutex_t *
-prof_gctx_mutex_choose(void) {
+    static malloc_mutex_t *prof_gctx_mutex_choose(void) {
 	unsigned ngctxs = atomic_fetch_add_u(&cum_gctxs, 1, ATOMIC_RELAXED);
 
 	return &gctx_locks[(ngctxs - 1) % PROF_NCTX_LOCKS];
@@ -145,8 +142,8 @@ prof_tdata_mutex_choose(uint64_t thr_uid) {
 bool
 prof_data_init(tsd_t *tsd) {
 	tdata_tree_new(&tdatas);
-	return ckh_new(tsd, &bt2gctx, PROF_CKH_MINITEMS,
-	    prof_bt_hash, prof_bt_keycomp);
+	return ckh_new(
+	    tsd, &bt2gctx, PROF_CKH_MINITEMS, prof_bt_hash, prof_bt_keycomp);
 }
 
 static void
@@ -195,8 +192,8 @@ prof_gctx_create(tsdn_t *tsdn, prof_bt_t *bt) {
 	 */
 	size_t size = offsetof(prof_gctx_t, vec) + (bt->len * sizeof(void *));
 	prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsdn, size,
-	    sz_size2index(size), false, NULL, true, arena_get(TSDN_NULL, 0, true),
-	    true);
+	    sz_size2index(size), false, NULL, true,
+	    arena_get(TSDN_NULL, 0, true), true);
 	if (gctx == NULL) {
 		return NULL;
 	}
@@ -215,8 +212,7 @@ prof_gctx_create(tsdn_t *tsdn, prof_bt_t *bt) {
 }
 
 static void
-prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self,
-    prof_gctx_t *gctx) {
+prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx) {
 	cassert(config_prof);
 
 	/*
@@ -267,12 +263,12 @@ static bool
 prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
     void **p_btkey, prof_gctx_t **p_gctx, bool *p_new_gctx) {
 	union {
-		prof_gctx_t	*p;
-		void		*v;
+		prof_gctx_t *p;
+		void        *v;
 	} gctx, tgctx;
 	union {
-		prof_bt_t	*p;
-		void		*v;
+		prof_bt_t *p;
+		void      *v;
 	} btkey;
 	bool new_gctx;
 
@@ -316,8 +312,8 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
 
 		if (tgctx.v != NULL) {
 			/* Lost race to insert. */
-			idalloctm(tsd_tsdn(tsd), tgctx.v, NULL, NULL, true,
-			    true);
+			idalloctm(
+			    tsd_tsdn(tsd), tgctx.v, NULL, NULL, true, true);
 		}
 	}
 	prof_leave(tsd, tdata);
@@ -331,11 +327,11 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
 prof_tctx_t *
 prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
 	union {
-		prof_tctx_t	*p;
-		void		*v;
+		prof_tctx_t *p;
+		void        *v;
 	} ret;
 	prof_tdata_t *tdata;
-	bool not_found;
+	bool          not_found;
 
 	cassert(config_prof);
 
@@ -349,16 +345,16 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
 	}
 	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
 	if (not_found) {
-		void *btkey;
+		void        *btkey;
 		prof_gctx_t *gctx;
-		bool new_gctx, error;
+		bool         new_gctx, error;
 
 		/*
 		 * This thread's cache lacks bt.  Look for it in the global
 		 * cache.
 		 */
-		if (prof_lookup_global(tsd, bt, tdata, &btkey, &gctx,
-		    &new_gctx)) {
+		if (prof_lookup_global(
+		        tsd, bt, tdata, &btkey, &gctx, &new_gctx)) {
 			return NULL;
 		}
 
@@ -403,8 +399,8 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
 
 /* Used in unit tests. */
 static prof_tdata_t *
-prof_tdata_count_iter(prof_tdata_tree_t *tdatas_ptr, prof_tdata_t *tdata,
-    void *arg) {
+prof_tdata_count_iter(
+    prof_tdata_tree_t *tdatas_ptr, prof_tdata_t *tdata, void *arg) {
 	size_t *tdata_count = (size_t *)arg;
 
 	(*tdata_count)++;
@@ -415,13 +411,13 @@ prof_tdata_count_iter(prof_tdata_tree_t *tdatas_ptr, prof_tdata_t *tdata,
 /* Used in unit tests. */
 size_t
 prof_tdata_count(void) {
-	size_t tdata_count = 0;
+	size_t  tdata_count = 0;
 	tsdn_t *tsdn;
 
 	tsdn = tsdn_fetch();
 	malloc_mutex_lock(tsdn, &tdatas_mtx);
-	tdata_tree_iter(&tdatas, NULL, prof_tdata_count_iter,
-	    (void *)&tdata_count);
+	tdata_tree_iter(
+	    &tdatas, NULL, prof_tdata_count_iter, (void *)&tdata_count);
 	malloc_mutex_unlock(tsdn, &tdatas_mtx);
 
 	return tdata_count;
@@ -430,8 +426,8 @@ prof_tdata_count(void) {
 /* Used in unit tests. */
 size_t
 prof_bt_count(void) {
-	size_t bt_count;
-	tsd_t *tsd;
+	size_t        bt_count;
+	tsd_t        *tsd;
 	prof_tdata_t *tdata;
 
 	tsd = tsd_fetch();
@@ -477,10 +473,10 @@ prof_thread_name_set_impl(tsd_t *tsd, const char *thread_name) {
 
 JEMALLOC_FORMAT_PRINTF(3, 4)
 static void
-prof_dump_printf(write_cb_t *prof_dump_write, void *cbopaque,
-    const char *format, ...) {
+prof_dump_printf(
+    write_cb_t *prof_dump_write, void *cbopaque, const char *format, ...) {
 	va_list ap;
-	char buf[PROF_PRINTF_BUFSIZE];
+	char    buf[PROF_PRINTF_BUFSIZE];
 
 	va_start(ap, format);
 	malloc_vsnprintf(buf, sizeof(buf), format, ap);
@@ -509,7 +505,8 @@ prof_double_uint64_cast(double d) {
 }
 #endif
 
-void prof_unbias_map_init(void) {
+void
+prof_unbias_map_init(void) {
 	/* See the comment in prof_sample_new_event_wait */
 #ifdef JEMALLOC_PROF
 	for (szind_t i = 0; i < SC_NSIZES; i++) {
@@ -621,8 +618,8 @@ prof_do_unbias(uint64_t c_out_shifted_i, uint64_t s_out_i, uint64_t *r_c_in,
 }
 
 static void
-prof_dump_print_cnts(write_cb_t *prof_dump_write, void *cbopaque,
-    const prof_cnt_t *cnts) {
+prof_dump_print_cnts(
+    write_cb_t *prof_dump_write, void *cbopaque, const prof_cnt_t *cnts) {
 	uint64_t curobjs;
 	uint64_t curbytes;
 	uint64_t accumobjs;
@@ -639,8 +636,8 @@ prof_dump_print_cnts(write_cb_t *prof_dump_write, void *cbopaque,
 		accumbytes = cnts->accumbytes;
 	}
 	prof_dump_printf(prof_dump_write, cbopaque,
-	    "%"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]",
-	    curobjs, curbytes, accumobjs, accumbytes);
+	    "%" FMTu64 ": %" FMTu64 " [%" FMTu64 ": %" FMTu64 "]", curobjs,
+	    curbytes, accumobjs, accumbytes);
 }
 
 static void
@@ -660,11 +657,11 @@ prof_tctx_merge_tdata(tsdn_t *tsdn, prof_tctx_t *tctx, prof_tdata_t *tdata) {
 		memcpy(&tctx->dump_cnts, &tctx->cnts, sizeof(prof_cnt_t));
 
 		tdata->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
-		tdata->cnt_summed.curobjs_shifted_unbiased
-		    += tctx->dump_cnts.curobjs_shifted_unbiased;
+		tdata->cnt_summed.curobjs_shifted_unbiased +=
+		    tctx->dump_cnts.curobjs_shifted_unbiased;
 		tdata->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
-		tdata->cnt_summed.curbytes_unbiased
-		    += tctx->dump_cnts.curbytes_unbiased;
+		tdata->cnt_summed.curbytes_unbiased +=
+		    tctx->dump_cnts.curbytes_unbiased;
 		if (opt_prof_accum) {
 			tdata->cnt_summed.accumobjs +=
 			    tctx->dump_cnts.accumobjs;
@@ -687,17 +684,17 @@ prof_tctx_merge_gctx(tsdn_t *tsdn, prof_tctx_t *tctx, prof_gctx_t *gctx) {
 	malloc_mutex_assert_owner(tsdn, gctx->lock);
 
 	gctx->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
-	gctx->cnt_summed.curobjs_shifted_unbiased
-	    += tctx->dump_cnts.curobjs_shifted_unbiased;
+	gctx->cnt_summed.curobjs_shifted_unbiased +=
+	    tctx->dump_cnts.curobjs_shifted_unbiased;
 	gctx->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
 	gctx->cnt_summed.curbytes_unbiased += tctx->dump_cnts.curbytes_unbiased;
 	if (opt_prof_accum) {
 		gctx->cnt_summed.accumobjs += tctx->dump_cnts.accumobjs;
-		gctx->cnt_summed.accumobjs_shifted_unbiased
-		    += tctx->dump_cnts.accumobjs_shifted_unbiased;
+		gctx->cnt_summed.accumobjs_shifted_unbiased +=
+		    tctx->dump_cnts.accumobjs_shifted_unbiased;
 		gctx->cnt_summed.accumbytes += tctx->dump_cnts.accumbytes;
-		gctx->cnt_summed.accumbytes_unbiased
-		    += tctx->dump_cnts.accumbytes_unbiased;
+		gctx->cnt_summed.accumbytes_unbiased +=
+		    tctx->dump_cnts.accumbytes_unbiased;
 	}
 }
 
@@ -725,9 +722,9 @@ prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
 
 typedef struct prof_dump_iter_arg_s prof_dump_iter_arg_t;
 struct prof_dump_iter_arg_s {
-	tsdn_t *tsdn;
+	tsdn_t     *tsdn;
 	write_cb_t *prof_dump_write;
-	void *cbopaque;
+	void       *cbopaque;
 };
 
 static prof_tctx_t *
@@ -743,9 +740,9 @@ prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *opaque) {
 	case prof_tctx_state_dumping:
 	case prof_tctx_state_purgatory:
 		prof_dump_printf(arg->prof_dump_write, arg->cbopaque,
-		    "  t%"FMTu64": ", tctx->thr_uid);
-		prof_dump_print_cnts(arg->prof_dump_write, arg->cbopaque,
-		    &tctx->dump_cnts);
+		    "  t%" FMTu64 ": ", tctx->thr_uid);
+		prof_dump_print_cnts(
+		    arg->prof_dump_write, arg->cbopaque, &tctx->dump_cnts);
 		arg->prof_dump_write(arg->cbopaque, "\n");
 		break;
 	default:
@@ -756,7 +753,7 @@ prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *opaque) {
 
 static prof_tctx_t *
 prof_tctx_finish_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
-	tsdn_t *tsdn = (tsdn_t *)arg;
+	tsdn_t      *tsdn = (tsdn_t *)arg;
 	prof_tctx_t *ret;
 
 	malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
@@ -811,8 +808,8 @@ prof_gctx_merge_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
 	prof_gctx_merge_iter_arg_t *arg = (prof_gctx_merge_iter_arg_t *)opaque;
 
 	malloc_mutex_lock(arg->tsdn, gctx->lock);
-	tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_merge_iter,
-	    (void *)arg->tsdn);
+	tctx_tree_iter(
+	    &gctx->tctxs, NULL, prof_tctx_merge_iter, (void *)arg->tsdn);
 	if (gctx->cnt_summed.curobjs != 0) {
 		(*arg->leak_ngctx)++;
 	}
@@ -824,7 +821,7 @@ prof_gctx_merge_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
 static void
 prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs) {
 	prof_tdata_t *tdata = prof_tdata_get(tsd, false);
-	prof_gctx_t *gctx;
+	prof_gctx_t  *gctx;
 
 	/*
 	 * Standard tree iteration won't work here, because as soon as we
@@ -840,15 +837,14 @@ prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs) {
 
 			next = NULL;
 			do {
-				prof_tctx_t *to_destroy =
-				    tctx_tree_iter(&gctx->tctxs, next,
-				    prof_tctx_finish_iter,
+				prof_tctx_t *to_destroy = tctx_tree_iter(
+				    &gctx->tctxs, next, prof_tctx_finish_iter,
 				    (void *)tsd_tsdn(tsd));
 				if (to_destroy != NULL) {
-					next = tctx_tree_next(&gctx->tctxs,
-					    to_destroy);
-					tctx_tree_remove(&gctx->tctxs,
-					    to_destroy);
+					next = tctx_tree_next(
+					    &gctx->tctxs, to_destroy);
+					tctx_tree_remove(
+					    &gctx->tctxs, to_destroy);
 					idalloctm(tsd_tsdn(tsd), to_destroy,
 					    NULL, NULL, true, true);
 				} else {
@@ -869,41 +865,41 @@ prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs) {
 
 typedef struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg_t;
 struct prof_tdata_merge_iter_arg_s {
-	tsdn_t *tsdn;
+	tsdn_t     *tsdn;
 	prof_cnt_t *cnt_all;
 };
 
 static prof_tdata_t *
-prof_tdata_merge_iter(prof_tdata_tree_t *tdatas_ptr, prof_tdata_t *tdata,
-    void *opaque) {
-	prof_tdata_merge_iter_arg_t *arg =
-	    (prof_tdata_merge_iter_arg_t *)opaque;
+prof_tdata_merge_iter(
+    prof_tdata_tree_t *tdatas_ptr, prof_tdata_t *tdata, void *opaque) {
+	prof_tdata_merge_iter_arg_t *arg = (prof_tdata_merge_iter_arg_t *)
+	    opaque;
 
 	malloc_mutex_lock(arg->tsdn, tdata->lock);
 	if (!tdata->expired) {
 		size_t tabind;
 		union {
-			prof_tctx_t	*p;
-			void		*v;
+			prof_tctx_t *p;
+			void        *v;
 		} tctx;
 
 		tdata->dumping = true;
 		memset(&tdata->cnt_summed, 0, sizeof(prof_cnt_t));
-		for (tabind = 0; !ckh_iter(&tdata->bt2tctx, &tabind, NULL,
-		    &tctx.v);) {
+		for (tabind = 0;
+		     !ckh_iter(&tdata->bt2tctx, &tabind, NULL, &tctx.v);) {
 			prof_tctx_merge_tdata(arg->tsdn, tctx.p, tdata);
 		}
 
 		arg->cnt_all->curobjs += tdata->cnt_summed.curobjs;
-		arg->cnt_all->curobjs_shifted_unbiased
-		    += tdata->cnt_summed.curobjs_shifted_unbiased;
+		arg->cnt_all->curobjs_shifted_unbiased +=
+		    tdata->cnt_summed.curobjs_shifted_unbiased;
 		arg->cnt_all->curbytes += tdata->cnt_summed.curbytes;
-		arg->cnt_all->curbytes_unbiased
-		    += tdata->cnt_summed.curbytes_unbiased;
+		arg->cnt_all->curbytes_unbiased +=
+		    tdata->cnt_summed.curbytes_unbiased;
 		if (opt_prof_accum) {
 			arg->cnt_all->accumobjs += tdata->cnt_summed.accumobjs;
-			arg->cnt_all->accumobjs_shifted_unbiased
-			    += tdata->cnt_summed.accumobjs_shifted_unbiased;
+			arg->cnt_all->accumobjs_shifted_unbiased +=
+			    tdata->cnt_summed.accumobjs_shifted_unbiased;
 			arg->cnt_all->accumbytes +=
 			    tdata->cnt_summed.accumbytes;
 			arg->cnt_all->accumbytes_unbiased +=
@@ -918,17 +914,17 @@ prof_tdata_merge_iter(prof_tdata_tree_t *tdatas_ptr, prof_tdata_t *tdata,
 }
 
 static prof_tdata_t *
-prof_tdata_dump_iter(prof_tdata_tree_t *tdatas_ptr, prof_tdata_t *tdata,
-    void *opaque) {
+prof_tdata_dump_iter(
+    prof_tdata_tree_t *tdatas_ptr, prof_tdata_t *tdata, void *opaque) {
 	if (!tdata->dumping) {
 		return NULL;
 	}
 
 	prof_dump_iter_arg_t *arg = (prof_dump_iter_arg_t *)opaque;
-	prof_dump_printf(arg->prof_dump_write, arg->cbopaque, "  t%"FMTu64": ",
-	    tdata->thr_uid);
-	prof_dump_print_cnts(arg->prof_dump_write, arg->cbopaque,
-	    &tdata->cnt_summed);
+	prof_dump_printf(arg->prof_dump_write, arg->cbopaque,
+	    "  t%" FMTu64 ": ", tdata->thr_uid);
+	prof_dump_print_cnts(
+	    arg->prof_dump_write, arg->cbopaque, &tdata->cnt_summed);
 	if (!prof_thread_name_empty(tdata)) {
 		arg->prof_dump_write(arg->cbopaque, " ");
 		arg->prof_dump_write(arg->cbopaque, tdata->thread_name);
@@ -940,7 +936,7 @@ prof_tdata_dump_iter(prof_tdata_tree_t *tdatas_ptr, prof_tdata_t *tdata,
 static void
 prof_dump_header(prof_dump_iter_arg_t *arg, const prof_cnt_t *cnt_all) {
 	prof_dump_printf(arg->prof_dump_write, arg->cbopaque,
-	    "heap_v2/%"FMTu64"\n  t*: ", ((uint64_t)1U << lg_prof_sample));
+	    "heap_v2/%" FMTu64 "\n  t*: ", ((uint64_t)1U << lg_prof_sample));
 	prof_dump_print_cnts(arg->prof_dump_write, arg->cbopaque, cnt_all);
 	arg->prof_dump_write(arg->cbopaque, "\n");
 
@@ -956,8 +952,8 @@ prof_dump_gctx(prof_dump_iter_arg_t *arg, prof_gctx_t *gctx,
 	malloc_mutex_assert_owner(arg->tsdn, gctx->lock);
 
 	/* Avoid dumping such gctx's that have no useful data. */
-	if ((!opt_prof_accum && gctx->cnt_summed.curobjs == 0) ||
-	    (opt_prof_accum && gctx->cnt_summed.accumobjs == 0)) {
+	if ((!opt_prof_accum && gctx->cnt_summed.curobjs == 0)
+	    || (opt_prof_accum && gctx->cnt_summed.accumobjs == 0)) {
 		assert(gctx->cnt_summed.curobjs == 0);
 		assert(gctx->cnt_summed.curbytes == 0);
 		/*
@@ -976,12 +972,12 @@ prof_dump_gctx(prof_dump_iter_arg_t *arg, prof_gctx_t *gctx,
 	arg->prof_dump_write(arg->cbopaque, "@");
 	for (unsigned i = 0; i < bt->len; i++) {
 		prof_dump_printf(arg->prof_dump_write, arg->cbopaque,
-		    " %#"FMTxPTR, (uintptr_t)bt->vec[i]);
+		    " %#" FMTxPTR, (uintptr_t)bt->vec[i]);
 	}
 
 	arg->prof_dump_write(arg->cbopaque, "\n  t*: ");
-	prof_dump_print_cnts(arg->prof_dump_write, arg->cbopaque,
-	    &gctx->cnt_summed);
+	prof_dump_print_cnts(
+	    arg->prof_dump_write, arg->cbopaque, &gctx->cnt_summed);
 	arg->prof_dump_write(arg->cbopaque, "\n");
 
 	tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_dump_iter, arg);
@@ -1002,18 +998,21 @@ prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_ngctx) {
 	 */
 	if (cnt_all->curbytes != 0) {
 		double sample_period = (double)((uint64_t)1 << lg_prof_sample);
-		double ratio = (((double)cnt_all->curbytes) /
-		    (double)cnt_all->curobjs) / sample_period;
-		double scale_factor = 1.0 / (1.0 - exp(-ratio));
-		uint64_t curbytes = (uint64_t)round(((double)cnt_all->curbytes)
-		    * scale_factor);
-		uint64_t curobjs = (uint64_t)round(((double)cnt_all->curobjs) *
-		    scale_factor);
+		double ratio = (((double)cnt_all->curbytes)
+		                   / (double)cnt_all->curobjs)
+		    / sample_period;
+		double   scale_factor = 1.0 / (1.0 - exp(-ratio));
+		uint64_t curbytes = (uint64_t)round(
+		    ((double)cnt_all->curbytes) * scale_factor);
+		uint64_t curobjs = (uint64_t)round(
+		    ((double)cnt_all->curobjs) * scale_factor);
 
-		malloc_printf("<jemalloc>: Leak approximation summary: ~%"FMTu64
-		    " byte%s, ~%"FMTu64" object%s, >= %zu context%s\n",
-		    curbytes, (curbytes != 1) ? "s" : "", curobjs, (curobjs !=
-		    1) ? "s" : "", leak_ngctx, (leak_ngctx != 1) ? "s" : "");
+		malloc_printf(
+		    "<jemalloc>: Leak approximation summary: ~%" FMTu64
+		    " byte%s, ~%" FMTu64 " object%s, >= %zu context%s\n",
+		    curbytes, (curbytes != 1) ? "s" : "", curobjs,
+		    (curobjs != 1) ? "s" : "", leak_ngctx,
+		    (leak_ngctx != 1) ? "s" : "");
 		malloc_printf(
 		    "<jemalloc>: Run jeprof on dump output for leak detail\n");
 		if (opt_prof_leak_error) {
@@ -1044,8 +1043,8 @@ prof_dump_prep(tsd_t *tsd, prof_tdata_t *tdata, prof_cnt_t *cnt_all,
     size_t *leak_ngctx, prof_gctx_tree_t *gctxs) {
 	size_t tabind;
 	union {
-		prof_gctx_t	*p;
-		void		*v;
+		prof_gctx_t *p;
+		void        *v;
 	} gctx;
 
 	prof_enter(tsd, tdata);
@@ -1064,19 +1063,19 @@ prof_dump_prep(tsd_t *tsd, prof_tdata_t *tdata, prof_cnt_t *cnt_all,
 	 * stats and merge them into the associated gctx's.
 	 */
 	memset(cnt_all, 0, sizeof(prof_cnt_t));
-	prof_tdata_merge_iter_arg_t prof_tdata_merge_iter_arg = {tsd_tsdn(tsd),
-	    cnt_all};
+	prof_tdata_merge_iter_arg_t prof_tdata_merge_iter_arg = {
+	    tsd_tsdn(tsd), cnt_all};
 	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
-	tdata_tree_iter(&tdatas, NULL, prof_tdata_merge_iter,
-	    &prof_tdata_merge_iter_arg);
+	tdata_tree_iter(
+	    &tdatas, NULL, prof_tdata_merge_iter, &prof_tdata_merge_iter_arg);
 	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
 
 	/* Merge tctx stats into gctx's. */
 	*leak_ngctx = 0;
-	prof_gctx_merge_iter_arg_t prof_gctx_merge_iter_arg = {tsd_tsdn(tsd),
-	    leak_ngctx};
-	gctx_tree_iter(gctxs, NULL, prof_gctx_merge_iter,
-	    &prof_gctx_merge_iter_arg);
+	prof_gctx_merge_iter_arg_t prof_gctx_merge_iter_arg = {
+	    tsd_tsdn(tsd), leak_ngctx};
+	gctx_tree_iter(
+	    gctxs, NULL, prof_gctx_merge_iter, &prof_gctx_merge_iter_arg);
 
 	prof_leave(tsd, tdata);
 }
@@ -1085,12 +1084,12 @@ void
 prof_dump_impl(tsd_t *tsd, write_cb_t *prof_dump_write, void *cbopaque,
     prof_tdata_t *tdata, bool leakcheck) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_dump_mtx);
-	prof_cnt_t cnt_all;
-	size_t leak_ngctx;
+	prof_cnt_t       cnt_all;
+	size_t           leak_ngctx;
 	prof_gctx_tree_t gctxs;
 	prof_dump_prep(tsd, tdata, &cnt_all, &leak_ngctx, &gctxs);
-	prof_dump_iter_arg_t prof_dump_iter_arg = {tsd_tsdn(tsd),
-	    prof_dump_write, cbopaque};
+	prof_dump_iter_arg_t prof_dump_iter_arg = {
+	    tsd_tsdn(tsd), prof_dump_write, cbopaque};
 	prof_dump_header(&prof_dump_iter_arg, &cnt_all);
 	gctx_tree_iter(&gctxs, NULL, prof_gctx_dump_iter, &prof_dump_iter_arg);
 	prof_gctx_finish(tsd, &gctxs);
@@ -1102,12 +1101,12 @@ prof_dump_impl(tsd_t *tsd, write_cb_t *prof_dump_write, void *cbopaque,
 /* Used in unit tests. */
 void
 prof_cnt_all(prof_cnt_t *cnt_all) {
-	tsd_t *tsd = tsd_fetch();
+	tsd_t        *tsd = tsd_fetch();
 	prof_tdata_t *tdata = prof_tdata_get(tsd, false);
 	if (tdata == NULL) {
 		memset(cnt_all, 0, sizeof(prof_cnt_t));
 	} else {
-		size_t leak_ngctx;
+		size_t           leak_ngctx;
 		prof_gctx_tree_t gctxs;
 		prof_dump_prep(tsd, tdata, cnt_all, &leak_ngctx, &gctxs);
 		prof_gctx_finish(tsd, &gctxs);
@@ -1148,8 +1147,8 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
 	/* Initialize an empty cache for this thread. */
 	size_t tdata_sz = ALIGNMENT_CEILING(sizeof(prof_tdata_t), QUANTUM);
 	size_t total_sz = tdata_sz + sizeof(void *) * opt_prof_bt_max;
-	tdata = (prof_tdata_t *)iallocztm(tsd_tsdn(tsd),
-	    total_sz, sz_size2index(total_sz), false, NULL, true,
+	tdata = (prof_tdata_t *)iallocztm(tsd_tsdn(tsd), total_sz,
+	    sz_size2index(total_sz), false, NULL, true,
 	    arena_get(TSDN_NULL, 0, true), true);
 	if (tdata == NULL) {
 		return NULL;
@@ -1170,7 +1169,7 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
 	prof_thread_name_assert(tdata);
 
 	if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS, prof_bt_hash,
-	    prof_bt_keycomp)) {
+	        prof_bt_keycomp)) {
 		idalloctm(tsd_tsdn(tsd), tdata, NULL, NULL, true, true);
 		return NULL;
 	}
@@ -1201,16 +1200,16 @@ prof_tdata_should_destroy_unlocked(prof_tdata_t *tdata, bool even_if_attached) {
 }
 
 static bool
-prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
-    bool even_if_attached) {
+prof_tdata_should_destroy(
+    tsdn_t *tsdn, prof_tdata_t *tdata, bool even_if_attached) {
 	malloc_mutex_assert_owner(tsdn, tdata->lock);
 
 	return prof_tdata_should_destroy_unlocked(tdata, even_if_attached);
 }
 
 static void
-prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata,
-    bool even_if_attached) {
+prof_tdata_destroy_locked(
+    tsd_t *tsd, prof_tdata_t *tdata, bool even_if_attached) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &tdatas_mtx);
 	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), tdata->lock);
 
@@ -1234,8 +1233,8 @@ prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata) {
 
 	malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
 	if (tdata->attached) {
-		destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata,
-		    true);
+		destroy_tdata = prof_tdata_should_destroy(
+		    tsd_tsdn(tsd), tdata, true);
 		/*
 		 * Only detach if !destroy_tdata, because detaching would allow
 		 * another thread to win the race to destroy tdata.
@@ -1270,8 +1269,8 @@ prof_tdata_expire(tsdn_t *tsdn, prof_tdata_t *tdata) {
 }
 
 static prof_tdata_t *
-prof_tdata_reset_iter(prof_tdata_tree_t *tdatas_ptr, prof_tdata_t *tdata,
-    void *arg) {
+prof_tdata_reset_iter(
+    prof_tdata_tree_t *tdatas_ptr, prof_tdata_t *tdata, void *arg) {
 	tsdn_t *tsdn = (tsdn_t *)arg;
 
 	return (prof_tdata_expire(tsdn, tdata) ? tdata : NULL);
@@ -1291,8 +1290,8 @@ prof_reset(tsd_t *tsd, size_t lg_sample) {
 
 	next = NULL;
 	do {
-		prof_tdata_t *to_destroy = tdata_tree_iter(&tdatas, next,
-		    prof_tdata_reset_iter, (void *)tsd);
+		prof_tdata_t *to_destroy = tdata_tree_iter(
+		    &tdatas, next, prof_tdata_reset_iter, (void *)tsd);
 		if (to_destroy != NULL) {
 			next = tdata_tree_next(&tdatas, to_destroy);
 			prof_tdata_destroy_locked(tsd, to_destroy, false);
@@ -1355,8 +1354,8 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) {
 		prof_tdata_t *tdata = tctx->tdata;
 		tctx->tdata = NULL;
 		ckh_remove(tsd, &tdata->bt2tctx, &gctx->bt, NULL, NULL);
-		bool destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd),
-		    tdata, false);
+		bool destroy_tdata = prof_tdata_should_destroy(
+		    tsd_tsdn(tsd), tdata, false);
 		malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
 		if (destroy_tdata) {
 			prof_tdata_destroy(tsd, tdata, false);
diff --git a/src/prof_log.c b/src/prof_log.c
index f4000aec..64b363bb 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -12,7 +12,7 @@
 #include "jemalloc/internal/prof_log.h"
 #include "jemalloc/internal/prof_sys.h"
 
-bool opt_prof_log = false;
+bool                              opt_prof_log = false;
 typedef enum prof_logging_state_e prof_logging_state_t;
 enum prof_logging_state_e {
 	prof_logging_state_stopped,
@@ -32,8 +32,8 @@ static bool prof_log_dummy = false;
 
 /* Incremented for every log file that is output. */
 static uint64_t log_seq = 0;
-static char log_filename[
-    /* Minimize memory bloat for non-prof builds. */
+static char     log_filename[
+/* Minimize memory bloat for non-prof builds. */
 #ifdef JEMALLOC_PROF
     PATH_MAX +
 #endif
@@ -51,8 +51,8 @@ typedef struct prof_bt_node_s prof_bt_node_t;
 
 struct prof_bt_node_s {
 	prof_bt_node_t *next;
-	size_t index;
-	prof_bt_t bt;
+	size_t          index;
+	prof_bt_t       bt;
 	/* Variable size backtrace vector pointed to by bt. */
 	void *vec[1];
 };
@@ -61,8 +61,8 @@ typedef struct prof_thr_node_s prof_thr_node_t;
 
 struct prof_thr_node_s {
 	prof_thr_node_t *next;
-	size_t index;
-	uint64_t thr_uid;
+	size_t           index;
+	uint64_t         thr_uid;
 	/* Variable size based on thr_name_sz. */
 	char name[1];
 };
@@ -91,15 +91,15 @@ struct prof_alloc_node_s {
  * These are the backtraces and threads that have already been logged by an
  * allocation.
  */
-static bool log_tables_initialized = false;
+static bool  log_tables_initialized = false;
 static ckh_t log_bt_node_set;
 static ckh_t log_thr_node_set;
 
 /* Store linked lists for logged data. */
-static prof_bt_node_t *log_bt_first = NULL;
-static prof_bt_node_t *log_bt_last = NULL;
-static prof_thr_node_t *log_thr_first = NULL;
-static prof_thr_node_t *log_thr_last = NULL;
+static prof_bt_node_t    *log_bt_first = NULL;
+static prof_bt_node_t    *log_bt_last = NULL;
+static prof_thr_node_t   *log_thr_first = NULL;
+static prof_thr_node_t   *log_thr_last = NULL;
 static prof_alloc_node_t *log_alloc_first = NULL;
 static prof_alloc_node_t *log_alloc_last = NULL;
 
@@ -131,12 +131,12 @@ prof_log_bt_index(tsd_t *tsd, prof_bt_t *bt) {
 
 	/* See if this backtrace is already cached in the table. */
 	if (ckh_search(&log_bt_node_set, (void *)(&dummy_node),
-	    (void **)(&node), NULL)) {
-		size_t sz = offsetof(prof_bt_node_t, vec) +
-			        (bt->len * sizeof(void *));
-		prof_bt_node_t *new_node = (prof_bt_node_t *)
-		    iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL,
-		    true, arena_get(TSDN_NULL, 0, true), true);
+	        (void **)(&node), NULL)) {
+		size_t sz = offsetof(prof_bt_node_t, vec)
+		    + (bt->len * sizeof(void *));
+		prof_bt_node_t *new_node = (prof_bt_node_t *)iallocztm(
+		    tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL, true,
+		    arena_get(TSDN_NULL, 0, true), true);
 		if (log_bt_first == NULL) {
 			log_bt_first = new_node;
 			log_bt_last = new_node;
@@ -174,11 +174,11 @@ prof_log_thr_index(tsd_t *tsd, uint64_t thr_uid, const char *name) {
 
 	/* See if this thread is already cached in the table. */
 	if (ckh_search(&log_thr_node_set, (void *)(&dummy_node),
-	    (void **)(&node), NULL)) {
+	        (void **)(&node), NULL)) {
 		size_t sz = offsetof(prof_thr_node_t, name) + strlen(name) + 1;
-		prof_thr_node_t *new_node = (prof_thr_node_t *)
-		    iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL,
-		    true, arena_get(TSDN_NULL, 0, true), true);
+		prof_thr_node_t *new_node = (prof_thr_node_t *)iallocztm(
+		    tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL, true,
+		    arena_get(TSDN_NULL, 0, true), true);
 		if (log_thr_first == NULL) {
 			log_thr_first = new_node;
 			log_thr_last = new_node;
@@ -225,9 +225,9 @@ prof_try_log(tsd_t *tsd, size_t usize, prof_info_t *prof_info) {
 
 	if (!log_tables_initialized) {
 		bool err1 = ckh_new(tsd, &log_bt_node_set, PROF_CKH_MINITEMS,
-				prof_bt_node_hash, prof_bt_node_keycomp);
+		    prof_bt_node_hash, prof_bt_node_keycomp);
 		bool err2 = ckh_new(tsd, &log_thr_node_set, PROF_CKH_MINITEMS,
-				prof_thr_node_hash, prof_thr_node_keycomp);
+		    prof_thr_node_hash, prof_thr_node_keycomp);
 		if (err1 || err2) {
 			goto label_done;
 		}
@@ -238,9 +238,9 @@ prof_try_log(tsd_t *tsd, size_t usize, prof_info_t *prof_info) {
 	nstime_t free_time;
 	nstime_prof_init_update(&free_time);
 
-	size_t sz = sizeof(prof_alloc_node_t);
-	prof_alloc_node_t *new_node = (prof_alloc_node_t *)
-	    iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL, true,
+	size_t             sz = sizeof(prof_alloc_node_t);
+	prof_alloc_node_t *new_node = (prof_alloc_node_t *)iallocztm(
+	    tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL, true,
 	    arena_get(TSDN_NULL, 0, true), true);
 
 	const char *prod_thr_name = tctx->tdata->thread_name;
@@ -256,10 +256,10 @@ prof_try_log(tsd_t *tsd, size_t usize, prof_info_t *prof_info) {
 	prof_bt_t *prod_bt = &tctx->gctx->bt;
 
 	new_node->next = NULL;
-	new_node->alloc_thr_ind = prof_log_thr_index(tsd, tctx->tdata->thr_uid,
-				      prod_thr_name);
-	new_node->free_thr_ind = prof_log_thr_index(tsd, cons_tdata->thr_uid,
-				     cons_thr_name);
+	new_node->alloc_thr_ind = prof_log_thr_index(
+	    tsd, tctx->tdata->thr_uid, prod_thr_name);
+	new_node->free_thr_ind = prof_log_thr_index(
+	    tsd, cons_tdata->thr_uid, cons_thr_name);
 	new_node->alloc_bt_ind = prof_log_bt_index(tsd, prod_bt);
 	new_node->free_bt_ind = prof_log_bt_index(tsd, cons_bt);
 	new_node->alloc_time_ns = nstime_ns(&alloc_time);
@@ -288,8 +288,8 @@ static bool
 prof_bt_node_keycomp(const void *k1, const void *k2) {
 	const prof_bt_node_t *bt_node1 = (prof_bt_node_t *)k1;
 	const prof_bt_node_t *bt_node2 = (prof_bt_node_t *)k2;
-	return prof_bt_keycomp((void *)(&bt_node1->bt),
-	    (void *)(&bt_node2->bt));
+	return prof_bt_keycomp(
+	    (void *)(&bt_node1->bt), (void *)(&bt_node2->bt));
 }
 
 static void
@@ -309,7 +309,7 @@ prof_thr_node_keycomp(const void *k1, const void *k2) {
 size_t
 prof_log_bt_count(void) {
 	cassert(config_prof);
-	size_t cnt = 0;
+	size_t          cnt = 0;
 	prof_bt_node_t *node = log_bt_first;
 	while (node != NULL) {
 		cnt++;
@@ -322,7 +322,7 @@ prof_log_bt_count(void) {
 size_t
 prof_log_alloc_count(void) {
 	cassert(config_prof);
-	size_t cnt = 0;
+	size_t             cnt = 0;
 	prof_alloc_node_t *node = log_alloc_first;
 	while (node != NULL) {
 		cnt++;
@@ -335,7 +335,7 @@ prof_log_alloc_count(void) {
 size_t
 prof_log_thr_count(void) {
 	cassert(config_prof);
-	size_t cnt = 0;
+	size_t           cnt = 0;
 	prof_thr_node_t *node = log_thr_first;
 	while (node != NULL) {
 		cnt++;
@@ -374,7 +374,6 @@ prof_log_rep_check(void) {
 	size_t thr_count = prof_log_thr_count();
 	size_t alloc_count = prof_log_alloc_count();
 
-
 	if (prof_logging_state == prof_logging_state_stopped) {
 		if (bt_count != 0 || thr_count != 0 || alloc_count || 0) {
 			return true;
@@ -435,7 +434,8 @@ prof_log_start(tsdn_t *tsdn, const char *filename) {
 	if (!prof_log_atexit_called) {
 		prof_log_atexit_called = true;
 		if (atexit(prof_log_stop_final) != 0) {
-			malloc_write("<jemalloc>: Error in atexit() "
+			malloc_write(
+			    "<jemalloc>: Error in atexit() "
 			    "for logging\n");
 			if (opt_abort) {
 				abort();
@@ -469,14 +469,14 @@ label_done:
 }
 
 struct prof_emitter_cb_arg_s {
-	int fd;
+	int     fd;
 	ssize_t ret;
 };
 
 static void
 prof_emitter_write_cb(void *opaque, const char *to_write) {
-	struct prof_emitter_cb_arg_s *arg =
-	    (struct prof_emitter_cb_arg_s *)opaque;
+	struct prof_emitter_cb_arg_s *arg = (struct prof_emitter_cb_arg_s *)
+	    opaque;
 	size_t bytes = strlen(to_write);
 	if (prof_log_dummy) {
 		return;
@@ -501,8 +501,8 @@ prof_log_emit_threads(tsd_t *tsd, emitter_t *emitter) {
 
 		char *thr_name = thr_node->name;
 
-		emitter_json_kv(emitter, "thr_name", emitter_type_string,
-		    &thr_name);
+		emitter_json_kv(
+		    emitter, "thr_name", emitter_type_string, &thr_name);
 
 		emitter_json_object_end(emitter);
 		thr_old_node = thr_node;
@@ -521,7 +521,7 @@ prof_log_emit_traces(tsd_t *tsd, emitter_t *emitter) {
 	 * Calculate how many hex digits we need: twice number of bytes, two for
 	 * "0x", and then one more for terminating '\0'.
 	 */
-	char buf[2 * sizeof(intptr_t) + 3];
+	char   buf[2 * sizeof(intptr_t) + 3];
 	size_t buf_sz = sizeof(buf);
 	while (bt_node != NULL) {
 		emitter_json_array_begin(emitter);
@@ -529,8 +529,8 @@ prof_log_emit_traces(tsd_t *tsd, emitter_t *emitter) {
 		for (i = 0; i < bt_node->bt.len; i++) {
 			malloc_snprintf(buf, buf_sz, "%p", bt_node->bt.vec[i]);
 			char *trace_str = buf;
-			emitter_json_value(emitter, emitter_type_string,
-			    &trace_str);
+			emitter_json_value(
+			    emitter, emitter_type_string, &trace_str);
 		}
 		emitter_json_array_end(emitter);
 
@@ -561,21 +561,21 @@ prof_log_emit_allocs(tsd_t *tsd, emitter_t *emitter) {
 		emitter_json_kv(emitter, "free_trace", emitter_type_size,
 		    &alloc_node->free_bt_ind);
 
-		emitter_json_kv(emitter, "alloc_timestamp",
-		    emitter_type_uint64, &alloc_node->alloc_time_ns);
+		emitter_json_kv(emitter, "alloc_timestamp", emitter_type_uint64,
+		    &alloc_node->alloc_time_ns);
 
 		emitter_json_kv(emitter, "free_timestamp", emitter_type_uint64,
 		    &alloc_node->free_time_ns);
 
-		emitter_json_kv(emitter, "usize", emitter_type_uint64,
-		    &alloc_node->usize);
+		emitter_json_kv(
+		    emitter, "usize", emitter_type_uint64, &alloc_node->usize);
 
 		emitter_json_object_end(emitter);
 
 		alloc_old_node = alloc_node;
 		alloc_node = alloc_node->next;
-		idalloctm(tsd_tsdn(tsd), alloc_old_node, NULL, NULL, true,
-		    true);
+		idalloctm(
+		    tsd_tsdn(tsd), alloc_old_node, NULL, NULL, true, true);
 	}
 	emitter_json_array_end(emitter);
 }
@@ -591,15 +591,14 @@ prof_log_emit_metadata(emitter_t *emitter) {
 	emitter_json_kv(emitter, "duration", emitter_type_uint64, &ns);
 
 	char *vers = JEMALLOC_VERSION;
-	emitter_json_kv(emitter, "version",
-	    emitter_type_string, &vers);
+	emitter_json_kv(emitter, "version", emitter_type_string, &vers);
 
-	emitter_json_kv(emitter, "lg_sample_rate",
-	    emitter_type_int, &lg_prof_sample);
+	emitter_json_kv(
+	    emitter, "lg_sample_rate", emitter_type_int, &lg_prof_sample);
 
 	const char *res_type = prof_time_res_mode_names[opt_prof_time_res];
-	emitter_json_kv(emitter, "prof_time_resolution", emitter_type_string,
-	    &res_type);
+	emitter_json_kv(
+	    emitter, "prof_time_resolution", emitter_type_string, &res_type);
 
 	int pid = prof_getpid();
 	emitter_json_kv(emitter, "pid", emitter_type_int, &pid);
@@ -632,7 +631,6 @@ prof_log_stop(tsdn_t *tsdn) {
 	prof_logging_state = prof_logging_state_dumping;
 	malloc_mutex_unlock(tsdn, &log_mtx);
 
-
 	emitter_t emitter;
 
 	/* Create a file. */
@@ -645,8 +643,10 @@ prof_log_stop(tsdn_t *tsdn) {
 	}
 
 	if (fd == -1) {
-		malloc_printf("<jemalloc>: creat() for log file \"%s\" "
-			      " failed with %d\n", log_filename, errno);
+		malloc_printf(
+		    "<jemalloc>: creat() for log file \"%s\" "
+		    " failed with %d\n",
+		    log_filename, errno);
 		if (opt_abort) {
 			abort();
 		}
@@ -659,8 +659,8 @@ prof_log_stop(tsdn_t *tsdn) {
 	buf_writer_t buf_writer;
 	buf_writer_init(tsdn, &buf_writer, prof_emitter_write_cb, &arg, NULL,
 	    PROF_LOG_STOP_BUFSIZE);
-	emitter_init(&emitter, emitter_output_json_compact, buf_writer_cb,
-	    &buf_writer);
+	emitter_init(
+	    &emitter, emitter_output_json_compact, buf_writer_cb, &buf_writer);
 
 	emitter_begin(&emitter);
 	prof_log_emit_metadata(&emitter);
@@ -701,8 +701,8 @@ JEMALLOC_COLD
 bool
 prof_log_init(tsd_t *tsd) {
 	cassert(config_prof);
-	if (malloc_mutex_init(&log_mtx, "prof_log",
-	    WITNESS_RANK_PROF_LOG, malloc_mutex_rank_exclusive)) {
+	if (malloc_mutex_init(&log_mtx, "prof_log", WITNESS_RANK_PROF_LOG,
+	        malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 
diff --git a/src/prof_recent.c b/src/prof_recent.c
index b5639b4c..f7108bee 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -7,18 +7,18 @@
 #include "jemalloc/internal/prof_data.h"
 #include "jemalloc/internal/prof_recent.h"
 
-ssize_t opt_prof_recent_alloc_max = PROF_RECENT_ALLOC_MAX_DEFAULT;
-malloc_mutex_t prof_recent_alloc_mtx; /* Protects the fields below */
+ssize_t            opt_prof_recent_alloc_max = PROF_RECENT_ALLOC_MAX_DEFAULT;
+malloc_mutex_t     prof_recent_alloc_mtx; /* Protects the fields below */
 static atomic_zd_t prof_recent_alloc_max;
-static ssize_t prof_recent_alloc_count = 0;
+static ssize_t     prof_recent_alloc_count = 0;
 prof_recent_list_t prof_recent_alloc_list;
 
 malloc_mutex_t prof_recent_dump_mtx; /* Protects dumping. */
 
 static void
 prof_recent_alloc_max_init(void) {
-	atomic_store_zd(&prof_recent_alloc_max, opt_prof_recent_alloc_max,
-	    ATOMIC_RELAXED);
+	atomic_store_zd(
+	    &prof_recent_alloc_max, opt_prof_recent_alloc_max, ATOMIC_RELAXED);
 }
 
 static inline ssize_t
@@ -144,26 +144,26 @@ edata_prof_recent_alloc_get_no_lock_test(const edata_t *edata) {
 static inline prof_recent_t *
 edata_prof_recent_alloc_get(tsd_t *tsd, const edata_t *edata) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	prof_recent_t *recent_alloc =
-	    edata_prof_recent_alloc_get_no_lock(edata);
-	assert(recent_alloc == NULL ||
-	    prof_recent_alloc_edata_get(tsd, recent_alloc) == edata);
+	prof_recent_t *recent_alloc = edata_prof_recent_alloc_get_no_lock(
+	    edata);
+	assert(recent_alloc == NULL
+	    || prof_recent_alloc_edata_get(tsd, recent_alloc) == edata);
 	return recent_alloc;
 }
 
 static prof_recent_t *
-edata_prof_recent_alloc_update_internal(tsd_t *tsd, edata_t *edata,
-    prof_recent_t *recent_alloc) {
+edata_prof_recent_alloc_update_internal(
+    tsd_t *tsd, edata_t *edata, prof_recent_t *recent_alloc) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	prof_recent_t *old_recent_alloc =
-	    edata_prof_recent_alloc_get(tsd, edata);
+	prof_recent_t *old_recent_alloc = edata_prof_recent_alloc_get(
+	    tsd, edata);
 	edata_prof_recent_alloc_set_dont_call_directly(edata, recent_alloc);
 	return old_recent_alloc;
 }
 
 static void
-edata_prof_recent_alloc_set(tsd_t *tsd, edata_t *edata,
-    prof_recent_t *recent_alloc) {
+edata_prof_recent_alloc_set(
+    tsd_t *tsd, edata_t *edata, prof_recent_t *recent_alloc) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	assert(recent_alloc != NULL);
 	prof_recent_t *old_recent_alloc =
@@ -173,8 +173,8 @@ edata_prof_recent_alloc_set(tsd_t *tsd, edata_t *edata,
 }
 
 static void
-edata_prof_recent_alloc_reset(tsd_t *tsd, edata_t *edata,
-    prof_recent_t *recent_alloc) {
+edata_prof_recent_alloc_reset(
+    tsd_t *tsd, edata_t *edata, prof_recent_t *recent_alloc) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	assert(recent_alloc != NULL);
 	prof_recent_t *old_recent_alloc =
@@ -265,14 +265,14 @@ prof_recent_alloc_assert_count(tsd_t *tsd) {
 	if (!config_debug) {
 		return;
 	}
-	ssize_t count = 0;
+	ssize_t        count = 0;
 	prof_recent_t *n;
-	ql_foreach(n, &prof_recent_alloc_list, link) {
+	ql_foreach (n, &prof_recent_alloc_list, link) {
 		++count;
 	}
 	assert(count == prof_recent_alloc_count);
-	assert(prof_recent_alloc_max_get(tsd) == -1 ||
-	    count <= prof_recent_alloc_max_get(tsd));
+	assert(prof_recent_alloc_max_get(tsd) == -1
+	    || count <= prof_recent_alloc_max_get(tsd));
 }
 
 void
@@ -319,8 +319,8 @@ prof_recent_alloc(tsd_t *tsd, edata_t *edata, size_t size, size_t usize) {
 	 * the allocation locks.
 	 */
 	prof_recent_t *reserve = NULL;
-	if (prof_recent_alloc_max_get(tsd) == -1 ||
-	    prof_recent_alloc_count < prof_recent_alloc_max_get(tsd)) {
+	if (prof_recent_alloc_max_get(tsd) == -1
+	    || prof_recent_alloc_count < prof_recent_alloc_max_get(tsd)) {
 		assert(prof_recent_alloc_max_get(tsd) != 0);
 		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 		reserve = prof_recent_allocate_node(tsd_tsdn(tsd));
@@ -346,8 +346,9 @@ prof_recent_alloc(tsd_t *tsd, edata_t *edata, size_t size, size_t usize) {
 		ql_rotate(&prof_recent_alloc_list, link);
 	} else {
 		/* Otherwise make use of the new node. */
-		assert(prof_recent_alloc_max_get(tsd) == -1 ||
-		    prof_recent_alloc_count < prof_recent_alloc_max_get(tsd));
+		assert(prof_recent_alloc_max_get(tsd) == -1
+		    || prof_recent_alloc_count
+		        < prof_recent_alloc_max_get(tsd));
 		if (reserve == NULL) {
 			goto label_rollback;
 		}
@@ -421,7 +422,7 @@ prof_recent_alloc_restore_locked(tsd_t *tsd, prof_recent_list_t *to_delete) {
 	}
 
 	prof_recent_t *node;
-	ql_foreach(node, &prof_recent_alloc_list, link) {
+	ql_foreach (node, &prof_recent_alloc_list, link) {
 		if (prof_recent_alloc_count == max) {
 			break;
 		}
@@ -462,7 +463,7 @@ prof_recent_alloc_max_ctl_write(tsd_t *tsd, ssize_t max) {
 	assert(max >= -1);
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	prof_recent_alloc_assert_count(tsd);
-	const ssize_t old_max = prof_recent_alloc_max_update(tsd, max);
+	const ssize_t      old_max = prof_recent_alloc_max_update(tsd, max);
 	prof_recent_list_t to_delete;
 	prof_recent_alloc_restore_locked(tsd, &to_delete);
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
@@ -472,7 +473,7 @@ prof_recent_alloc_max_ctl_write(tsd_t *tsd, ssize_t max) {
 
 static void
 prof_recent_alloc_dump_bt(emitter_t *emitter, prof_tctx_t *tctx) {
-	char bt_buf[2 * sizeof(intptr_t) + 3];
+	char  bt_buf[2 * sizeof(intptr_t) + 3];
 	char *s = bt_buf;
 	assert(tctx != NULL);
 	prof_bt_t *bt = &tctx->gctx->bt;
@@ -501,8 +502,8 @@ prof_recent_alloc_dump_node(emitter_t *emitter, prof_recent_t *node) {
 		    emitter_type_string, &thread_name);
 	}
 	uint64_t alloc_time_ns = nstime_ns(&node->alloc_time);
-	emitter_json_kv(emitter, "alloc_time", emitter_type_uint64,
-	    &alloc_time_ns);
+	emitter_json_kv(
+	    emitter, "alloc_time", emitter_type_uint64, &alloc_time_ns);
 	emitter_json_array_kv_begin(emitter, "alloc_trace");
 	prof_recent_alloc_dump_bt(emitter, node->alloc_tctx);
 	emitter_json_array_end(emitter);
@@ -539,8 +540,8 @@ prof_recent_alloc_dump(tsd_t *tsd, write_cb_t *write_cb, void *cbopaque) {
 	buf_writer_init(tsd_tsdn(tsd), &buf_writer, write_cb, cbopaque, NULL,
 	    PROF_RECENT_PRINT_BUFSIZE);
 	emitter_t emitter;
-	emitter_init(&emitter, emitter_output_json_compact, buf_writer_cb,
-	    &buf_writer);
+	emitter_init(
+	    &emitter, emitter_output_json_compact, buf_writer_cb, &buf_writer);
 	prof_recent_list_t temp_list;
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
@@ -554,13 +555,13 @@ prof_recent_alloc_dump(tsd_t *tsd, write_cb_t *write_cb, void *cbopaque) {
 
 	emitter_begin(&emitter);
 	uint64_t sample_interval = (uint64_t)1U << lg_prof_sample;
-	emitter_json_kv(&emitter, "sample_interval", emitter_type_uint64,
-	    &sample_interval);
-	emitter_json_kv(&emitter, "recent_alloc_max", emitter_type_ssize,
-	    &dump_max);
+	emitter_json_kv(
+	    &emitter, "sample_interval", emitter_type_uint64, &sample_interval);
+	emitter_json_kv(
+	    &emitter, "recent_alloc_max", emitter_type_ssize, &dump_max);
 	emitter_json_array_kv_begin(&emitter, "recent_alloc");
 	prof_recent_t *node;
-	ql_foreach(node, &temp_list, link) {
+	ql_foreach (node, &temp_list, link) {
 		prof_recent_alloc_dump_node(&emitter, node);
 	}
 	emitter_json_array_end(&emitter);
@@ -587,12 +588,12 @@ prof_recent_init(void) {
 	prof_recent_alloc_max_init();
 
 	if (malloc_mutex_init(&prof_recent_alloc_mtx, "prof_recent_alloc",
-	    WITNESS_RANK_PROF_RECENT_ALLOC, malloc_mutex_rank_exclusive)) {
+	        WITNESS_RANK_PROF_RECENT_ALLOC, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 
 	if (malloc_mutex_init(&prof_recent_dump_mtx, "prof_recent_dump",
-	    WITNESS_RANK_PROF_RECENT_DUMP, malloc_mutex_rank_exclusive)) {
+	        WITNESS_RANK_PROF_RECENT_DUMP, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 
diff --git a/src/prof_stack_range.c b/src/prof_stack_range.c
index f5e5c044..b167b132 100644
--- a/src/prof_stack_range.c
+++ b/src/prof_stack_range.c
@@ -6,12 +6,12 @@
 
 #if defined(__linux__) && defined(JEMALLOC_HAVE_GETTID)
 
-#    include <errno.h>
-#    include <fcntl.h>
-#    include <stdio.h>
-#    include <stdlib.h>  // strtoul
-#    include <string.h>
-#    include <unistd.h>
+#	include <errno.h>
+#	include <fcntl.h>
+#	include <stdio.h>
+#	include <stdlib.h> // strtoul
+#	include <string.h>
+#	include <unistd.h>
 
 /*
  * Converts a string representing a hexadecimal number to an unsigned long long
@@ -25,31 +25,31 @@
  */
 static inline unsigned long long int
 strtoull_hex(const char *nptr, char **endptr) {
-    unsigned long long int val = 0;
-    int ii = 0;
-    for (; ii < 16; ++ii) {
-        char c = nptr[ii];
-        if (c >= '0' && c <= '9') {
-            val = (val << 4) + (c - '0');
-        } else if (c >= 'a' && c <= 'f') {
-            val = (val << 4) + (c - 'a' + 10);
-        } else {
-            break;
-        }
-    }
-    if (endptr) {
-        *endptr = (char *)(nptr + ii);
-    }
-    return val;
+	unsigned long long int val = 0;
+	int                    ii = 0;
+	for (; ii < 16; ++ii) {
+		char c = nptr[ii];
+		if (c >= '0' && c <= '9') {
+			val = (val << 4) + (c - '0');
+		} else if (c >= 'a' && c <= 'f') {
+			val = (val << 4) + (c - 'a' + 10);
+		} else {
+			break;
+		}
+	}
+	if (endptr) {
+		*endptr = (char *)(nptr + ii);
+	}
+	return val;
 }
 
 static int
 prof_mapping_containing_addr(uintptr_t addr, const char *maps_path,
-  uintptr_t *mm_start, uintptr_t *mm_end) {
-    int ret = ENOENT; /* not found */
-    *mm_start = *mm_end = 0;
+    uintptr_t *mm_start, uintptr_t *mm_end) {
+	int ret = ENOENT; /* not found */
+	*mm_start = *mm_end = 0;
 
-    /*
+	/*
      * Each line of /proc/<pid>/maps is:
      * <start>-<end> <perms> <offset> <dev> <inode> <pathname>
      *
@@ -57,90 +57,93 @@ prof_mapping_containing_addr(uintptr_t addr, const char *maps_path,
      * as long as `buf` contains the start of a mapping line it can always be
      * parsed.
      */
-    static const int kMappingFieldsWidth = 34;
+	static const int kMappingFieldsWidth = 34;
 
-    int fd = -1;
-    char buf[4096];
-    ssize_t remaining = 0; /* actual number of bytes read to buf */
-    char *line = NULL;
+	int     fd = -1;
+	char    buf[4096];
+	ssize_t remaining = 0; /* actual number of bytes read to buf */
+	char   *line = NULL;
 
-    while (1) {
-        if (fd < 0) {
-            /* case 0: initial open of maps file */
-            fd = malloc_open(maps_path, O_RDONLY);
-            if (fd < 0) {
-                return errno;
-            }
+	while (1) {
+		if (fd < 0) {
+			/* case 0: initial open of maps file */
+			fd = malloc_open(maps_path, O_RDONLY);
+			if (fd < 0) {
+				return errno;
+			}
 
-            remaining = malloc_read_fd(fd, buf, sizeof(buf));
-            if (remaining <= 0) {
-                ret = errno;
-                break;
-            }
-            line = buf;
-        } else if (line == NULL) {
-            /* case 1: no newline found in buf */
-            remaining = malloc_read_fd(fd, buf, sizeof(buf));
-            if (remaining <= 0) {
-                ret = errno;
-                break;
-            }
-            line = memchr(buf, '\n', remaining);
-            if (line != NULL) {
-                line++;  /* advance to character after newline */
-                remaining -= (line - buf);
-            }
-        } else if (line != NULL && remaining < kMappingFieldsWidth) {
-            /*
+			remaining = malloc_read_fd(fd, buf, sizeof(buf));
+			if (remaining <= 0) {
+				ret = errno;
+				break;
+			}
+			line = buf;
+		} else if (line == NULL) {
+			/* case 1: no newline found in buf */
+			remaining = malloc_read_fd(fd, buf, sizeof(buf));
+			if (remaining <= 0) {
+				ret = errno;
+				break;
+			}
+			line = memchr(buf, '\n', remaining);
+			if (line != NULL) {
+				line++; /* advance to character after newline */
+				remaining -= (line - buf);
+			}
+		} else if (line != NULL && remaining < kMappingFieldsWidth) {
+			/*
              * case 2: found newline but insufficient characters remaining in
              * buf
              */
-            memcpy(buf, line,
-              remaining);  /* copy remaining characters to start of buf */
-            line = buf;
+			memcpy(buf, line,
+			    remaining); /* copy remaining characters to start of buf */
+			line = buf;
 
-            size_t count =
-              malloc_read_fd(fd, buf + remaining, sizeof(buf) - remaining);
-            if (count <= 0) {
-                ret = errno;
-                break;
-            }
+			size_t count = malloc_read_fd(
+			    fd, buf + remaining, sizeof(buf) - remaining);
+			if (count <= 0) {
+				ret = errno;
+				break;
+			}
 
-            remaining += count;  /* actual number of bytes read to buf */
-        } else {
-            /* case 3: found newline and sufficient characters to parse */
+			remaining +=
+			    count; /* actual number of bytes read to buf */
+		} else {
+			/* case 3: found newline and sufficient characters to parse */
 
-            /* parse <start>-<end> */
-            char *tmp = line;
-            uintptr_t start_addr = (uintptr_t)strtoull_hex(tmp, &tmp);
-            if (addr >= start_addr) {
-                tmp++;  /* advance to character after '-' */
-                uintptr_t end_addr = (uintptr_t)strtoull_hex(tmp, NULL);
-                if (addr < end_addr) {
-                    *mm_start = start_addr;
-                    *mm_end = end_addr;
-                    ret = 0;
-                    break;
-                }
-            }
+			/* parse <start>-<end> */
+			char     *tmp = line;
+			uintptr_t start_addr = (uintptr_t)strtoull_hex(
+			    tmp, &tmp);
+			if (addr >= start_addr) {
+				tmp++; /* advance to character after '-' */
+				uintptr_t end_addr = (uintptr_t)strtoull_hex(
+				    tmp, NULL);
+				if (addr < end_addr) {
+					*mm_start = start_addr;
+					*mm_end = end_addr;
+					ret = 0;
+					break;
+				}
+			}
 
-            /* Advance to character after next newline in the current buf. */
-            char *prev_line = line;
-            line = memchr(line, '\n', remaining);
-            if (line != NULL) {
-                line++;  /* advance to character after newline */
-                remaining -= (line - prev_line);
-            }
-        }
-    }
+			/* Advance to character after next newline in the current buf. */
+			char *prev_line = line;
+			line = memchr(line, '\n', remaining);
+			if (line != NULL) {
+				line++; /* advance to character after newline */
+				remaining -= (line - prev_line);
+			}
+		}
+	}
 
-    malloc_close(fd);
-    return ret;
+	malloc_close(fd);
+	return ret;
 }
 
 int
 prof_thread_stack_range(uintptr_t fp, uintptr_t *low, uintptr_t *high) {
-    /*
+	/*
      * NOTE: Prior to kernel 4.5 an entry for every thread stack was included in
      * /proc/<pid>/maps as [STACK:<tid>]. Starting with kernel 4.5 only the main
      * thread stack remains as the [stack] mapping. For other thread stacks the
@@ -148,19 +151,19 @@ prof_thread_stack_range(uintptr_t fp, uintptr_t *low, uintptr_t *high) {
      * labeled as [STACK:tid]).
      * https://lists.ubuntu.com/archives/kernel-team/2016-March/074681.html
     */
-    char maps_path[64];  // "/proc/<pid>/task/<tid>/maps"
-    malloc_snprintf(maps_path, sizeof(maps_path), "/proc/%d/task/%d/maps",
-      getpid(), gettid());
-    return prof_mapping_containing_addr(fp, maps_path, low, high);
+	char maps_path[64]; // "/proc/<pid>/task/<tid>/maps"
+	malloc_snprintf(maps_path, sizeof(maps_path), "/proc/%d/task/%d/maps",
+	    getpid(), gettid());
+	return prof_mapping_containing_addr(fp, maps_path, low, high);
 }
 
 #else
 
 int
 prof_thread_stack_range(
-  UNUSED uintptr_t addr, uintptr_t *stack_start, uintptr_t *stack_end) {
-    *stack_start = *stack_end = 0;
-    return ENOENT;
+    UNUSED uintptr_t addr, uintptr_t *stack_start, uintptr_t *stack_end) {
+	*stack_start = *stack_end = 0;
+	return ENOENT;
 }
 
-#endif  // __linux__
+#endif // __linux__
diff --git a/src/prof_stats.c b/src/prof_stats.c
index 5d1a506b..db248be7 100644
--- a/src/prof_stats.c
+++ b/src/prof_stats.c
@@ -3,8 +3,8 @@
 
 #include "jemalloc/internal/prof_stats.h"
 
-bool opt_prof_stats = false;
-malloc_mutex_t prof_stats_mtx;
+bool                opt_prof_stats = false;
+malloc_mutex_t      prof_stats_mtx;
 static prof_stats_t prof_stats_live[PROF_SC_NSIZES];
 static prof_stats_t prof_stats_accum[PROF_SC_NSIZES];
 
diff --git a/src/prof_sys.c b/src/prof_sys.c
index e3b7bbcb..be50c0be 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -8,8 +8,8 @@
 #include "jemalloc/internal/prof_sys.h"
 
 #ifdef JEMALLOC_PROF_LIBUNWIND
-#define UNW_LOCAL_ONLY
-#include <libunwind.h>
+#	define UNW_LOCAL_ONLY
+#	include <libunwind.h>
 #endif
 
 #ifdef JEMALLOC_PROF_LIBGCC
@@ -18,14 +18,15 @@
  * use libgcc's unwinding functionality, but after we've included that, we've
  * already hooked _Unwind_Backtrace.  We'll temporarily disable hooking.
  */
-#undef _Unwind_Backtrace
-#include <unwind.h>
-#define _Unwind_Backtrace JEMALLOC_TEST_HOOK(_Unwind_Backtrace, test_hooks_libc_hook)
+#	undef _Unwind_Backtrace
+#	include <unwind.h>
+#	define _Unwind_Backtrace                                              \
+		JEMALLOC_TEST_HOOK(_Unwind_Backtrace, test_hooks_libc_hook)
 #endif
 
 #ifdef JEMALLOC_PROF_FRAME_POINTER
 // execinfo backtrace() as fallback unwinder
-#include <execinfo.h>
+#	include <execinfo.h>
 #endif
 
 /******************************************************************************/
@@ -77,7 +78,7 @@ prof_unwind_init_callback(struct _Unwind_Context *context, void *arg) {
 static _Unwind_Reason_Code
 prof_unwind_callback(struct _Unwind_Context *context, void *arg) {
 	prof_unwind_data_t *data = (prof_unwind_data_t *)arg;
-	void *ip;
+	void               *ip;
 
 	cassert(config_prof);
 
@@ -115,14 +116,15 @@ struct stack_range {
 
 struct thread_unwind_info {
 	struct stack_range stack_range;
-	bool fallback;
+	bool               fallback;
 };
 static __thread struct thread_unwind_info unwind_info = {
-	.stack_range = {
-		.start = 0,
-		.end = 0,
-	},
-	.fallback = false,
+    .stack_range =
+        {
+            .start = 0,
+            .end = 0,
+        },
+    .fallback = false,
 }; /* thread local */
 
 static void
@@ -142,10 +144,11 @@ prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
 	uintptr_t fp = (uintptr_t)__builtin_frame_address(0);
 
 	/* new thread - get the stack range */
-	if (!unwind_info.fallback &&
-	    unwind_info.stack_range.start == unwind_info.stack_range.end) {
+	if (!unwind_info.fallback
+	    && unwind_info.stack_range.start == unwind_info.stack_range.end) {
 		if (prof_thread_stack_range(fp, &unwind_info.stack_range.start,
-		    &unwind_info.stack_range.end) != 0) {
+		        &unwind_info.stack_range.end)
+		    != 0) {
 			unwind_info.fallback = true;
 		} else {
 			assert(fp >= unwind_info.stack_range.start
@@ -159,8 +162,8 @@ prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
 
 	unsigned ii = 0;
 	while (ii < max_len && fp != 0) {
-		if (fp < unwind_info.stack_range.start ||
-		    fp >= unwind_info.stack_range.end) {
+		if (fp < unwind_info.stack_range.start
+		    || fp >= unwind_info.stack_range.end) {
 			/*
 			 * Determining the stack range from procfs can be
 			 * relatively expensive especially for programs with
@@ -173,7 +176,7 @@ prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
 			unwind_info.fallback = true;
 			goto label_fallback;
 		}
-		void* ip = ((void **)fp)[1];
+		void *ip = ((void **)fp)[1];
 		if (ip == 0) {
 			break;
 		}
@@ -205,21 +208,21 @@ JEMALLOC_DIAGNOSTIC_IGNORE_FRAME_ADDRESS
 static void
 prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
 /* The input arg must be a constant for __builtin_return_address. */
-#define BT_FRAME(i)							\
-	if ((i) < max_len) {						\
-		void *p;						\
-		if (__builtin_frame_address(i) == 0) {			\
-			return;						\
-		}							\
-		p = __builtin_return_address(i);			\
-		if (p == NULL) {					\
-			return;						\
-		}							\
-		vec[(i)] = p;						\
-		*len = (i) + 1;						\
-	} else {							\
-		return;							\
-	}
+#	define BT_FRAME(i)                                                    \
+		if ((i) < max_len) {                                           \
+			void *p;                                               \
+			if (__builtin_frame_address(i) == 0) {                 \
+				return;                                        \
+			}                                                      \
+			p = __builtin_return_address(i);                       \
+			if (p == NULL) {                                       \
+				return;                                        \
+			}                                                      \
+			vec[(i)] = p;                                          \
+			*len = (i) + 1;                                        \
+		} else {                                                       \
+			return;                                                \
+		}
 
 	cassert(config_prof);
 	assert(vec != NULL);
@@ -506,8 +509,8 @@ prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
 	BT_FRAME(253)
 	BT_FRAME(254)
 	BT_FRAME(255)
-#undef BT_FRAME
-JEMALLOC_DIAGNOSTIC_POP
+#	undef BT_FRAME
+	JEMALLOC_DIAGNOSTIC_POP
 }
 #else
 static void
@@ -568,8 +571,9 @@ prof_sys_thread_name_fetch(tsd_t *tsd) {
 		return;
 	}
 
-	if (prof_sys_thread_name_read(tdata->thread_name,
-	    PROF_THREAD_NAME_MAX_LEN) != 0) {
+	if (prof_sys_thread_name_read(
+	        tdata->thread_name, PROF_THREAD_NAME_MAX_LEN)
+	    != 0) {
 		prof_thread_name_clear(tdata);
 	}
 
@@ -592,32 +596,32 @@ prof_get_pid_namespace(void) {
 #if defined(_WIN32) || defined(__APPLE__)
 	// Not supported, do nothing.
 #else
-	char buf[PATH_MAX];
-	const char* linkname =
-#  if defined(__FreeBSD__) || defined(__DragonFly__)
+	char        buf[PATH_MAX];
+	const char *linkname =
+#	if defined(__FreeBSD__) || defined(__DragonFly__)
 	    "/proc/curproc/ns/pid"
-#  else
+#	else
 	    "/proc/self/ns/pid"
-#  endif
+#	endif
 	    ;
 	ssize_t linklen =
-#  ifndef JEMALLOC_READLINKAT
-	readlink(linkname, buf, PATH_MAX)
-#  else
-	readlinkat(AT_FDCWD, linkname, buf, PATH_MAX)
-#  endif
+#	ifndef JEMALLOC_READLINKAT
+	    readlink(linkname, buf, PATH_MAX)
+#	else
+	    readlinkat(AT_FDCWD, linkname, buf, PATH_MAX)
+#	endif
 	    ;
 
 	// namespace string is expected to be like pid:[4026531836]
 	if (linklen > 0) {
 		// Trim the trailing "]"
-		buf[linklen-1] = '\0';
-		char* index = strtok(buf, "pid:[");
+		buf[linklen - 1] = '\0';
+		char *index = strtok(buf, "pid:[");
 		ret = atol(index);
 	}
 #endif
 
-  return ret;
+	return ret;
 }
 
 /*
@@ -647,8 +651,8 @@ struct prof_dump_arg_s {
 };
 
 static void
-prof_dump_check_possible_error(prof_dump_arg_t *arg, bool err_cond,
-    const char *format, ...) {
+prof_dump_check_possible_error(
+    prof_dump_arg_t *arg, bool err_cond, const char *format, ...) {
 	assert(!arg->error);
 	if (!err_cond) {
 		return;
@@ -660,7 +664,7 @@ prof_dump_check_possible_error(prof_dump_arg_t *arg, bool err_cond,
 	}
 
 	va_list ap;
-	char buf[PROF_PRINTF_BUFSIZE];
+	char    buf[PROF_PRINTF_BUFSIZE];
 	va_start(ap, format);
 	malloc_vsnprintf(buf, sizeof(buf), format, ap);
 	va_end(ap);
@@ -692,8 +696,8 @@ prof_dump_flush(void *opaque, const char *s) {
 	cassert(config_prof);
 	prof_dump_arg_t *arg = (prof_dump_arg_t *)opaque;
 	if (!arg->error) {
-		ssize_t err = prof_dump_write_file(arg->prof_dump_fd, s,
-		    strlen(s));
+		ssize_t err = prof_dump_write_file(
+		    arg->prof_dump_fd, s, strlen(s));
 		prof_dump_check_possible_error(arg, err == -1,
 		    "<jemalloc>: failed to write during heap profile flush\n");
 	}
@@ -707,36 +711,37 @@ prof_dump_close(prof_dump_arg_t *arg) {
 }
 
 #ifdef __APPLE__
-#include <mach-o/dyld.h>
+#	include <mach-o/dyld.h>
 
-#ifdef __LP64__
-typedef struct mach_header_64 mach_header_t;
+#	ifdef __LP64__
+typedef struct mach_header_64     mach_header_t;
 typedef struct segment_command_64 segment_command_t;
-#define MH_MAGIC_VALUE MH_MAGIC_64
-#define MH_CIGAM_VALUE MH_CIGAM_64
-#define LC_SEGMENT_VALUE LC_SEGMENT_64
-#else
-typedef struct mach_header mach_header_t;
+#		define MH_MAGIC_VALUE MH_MAGIC_64
+#		define MH_CIGAM_VALUE MH_CIGAM_64
+#		define LC_SEGMENT_VALUE LC_SEGMENT_64
+#	else
+typedef struct mach_header     mach_header_t;
 typedef struct segment_command segment_command_t;
-#define MH_MAGIC_VALUE MH_MAGIC
-#define MH_CIGAM_VALUE MH_CIGAM
-#define LC_SEGMENT_VALUE LC_SEGMENT
-#endif
+#		define MH_MAGIC_VALUE MH_MAGIC
+#		define MH_CIGAM_VALUE MH_CIGAM
+#		define LC_SEGMENT_VALUE LC_SEGMENT
+#	endif
 
 static void
 prof_dump_dyld_image_vmaddr(buf_writer_t *buf_writer, uint32_t image_index) {
 	const mach_header_t *header = (const mach_header_t *)
 	    _dyld_get_image_header(image_index);
-	if (header == NULL || (header->magic != MH_MAGIC_VALUE &&
-	    header->magic != MH_CIGAM_VALUE)) {
+	if (header == NULL
+	    || (header->magic != MH_MAGIC_VALUE
+	        && header->magic != MH_CIGAM_VALUE)) {
 		// Invalid header
 		return;
 	}
 
-	intptr_t slide = _dyld_get_image_vmaddr_slide(image_index);
-	const char *name = _dyld_get_image_name(image_index);
-	struct load_command *load_cmd = (struct load_command *)
-	    ((char *)header + sizeof(mach_header_t));
+	intptr_t             slide = _dyld_get_image_vmaddr_slide(image_index);
+	const char          *name = _dyld_get_image_name(image_index);
+	struct load_command *load_cmd = (struct load_command *)((char *)header
+	    + sizeof(mach_header_t));
 	for (uint32_t i = 0; load_cmd && (i < header->ncmds); i++) {
 		if (load_cmd->cmd == LC_SEGMENT_VALUE) {
 			const segment_command_t *segment_cmd =
@@ -744,14 +749,17 @@ prof_dump_dyld_image_vmaddr(buf_writer_t *buf_writer, uint32_t image_index) {
 			if (!strcmp(segment_cmd->segname, "__TEXT")) {
 				char buffer[PATH_MAX + 1];
 				malloc_snprintf(buffer, sizeof(buffer),
-				    "%016llx-%016llx: %s\n", segment_cmd->vmaddr + slide,
-				    segment_cmd->vmaddr + slide + segment_cmd->vmsize, name);
+				    "%016llx-%016llx: %s\n",
+				    segment_cmd->vmaddr + slide,
+				    segment_cmd->vmaddr + slide
+				        + segment_cmd->vmsize,
+				    name);
 				buf_writer_cb(buf_writer, buffer);
 				return;
 			}
 		}
-		load_cmd =
-		    (struct load_command *)((char *)load_cmd + load_cmd->cmdsize);
+		load_cmd = (struct load_command *)((char *)load_cmd
+		    + load_cmd->cmdsize);
 	}
 }
 
@@ -772,48 +780,48 @@ prof_dump_maps(buf_writer_t *buf_writer) {
 	prof_dump_dyld_maps(buf_writer);
 }
 #else /* !__APPLE__ */
-#ifndef _WIN32
+#	ifndef _WIN32
 JEMALLOC_FORMAT_PRINTF(1, 2)
 static int
 prof_open_maps_internal(const char *format, ...) {
-	int mfd;
+	int     mfd;
 	va_list ap;
-	char filename[PATH_MAX + 1];
+	char    filename[PATH_MAX + 1];
 
 	va_start(ap, format);
 	malloc_vsnprintf(filename, sizeof(filename), format, ap);
 	va_end(ap);
 
-#if defined(O_CLOEXEC)
+#		if defined(O_CLOEXEC)
 	mfd = open(filename, O_RDONLY | O_CLOEXEC);
-#else
+#		else
 	mfd = open(filename, O_RDONLY);
 	if (mfd != -1) {
 		fcntl(mfd, F_SETFD, fcntl(mfd, F_GETFD) | FD_CLOEXEC);
 	}
-#endif
+#		endif
 
 	return mfd;
 }
-#endif
+#	endif
 
 static int
 prof_dump_open_maps_impl(void) {
 	int mfd;
 
 	cassert(config_prof);
-#if defined(__FreeBSD__) || defined(__DragonFly__)
+#	if defined(__FreeBSD__) || defined(__DragonFly__)
 	mfd = prof_open_maps_internal("/proc/curproc/map");
-#elif defined(_WIN32)
+#	elif defined(_WIN32)
 	mfd = -1; // Not implemented
-#else
+#	else
 	int pid = prof_getpid();
 
 	mfd = prof_open_maps_internal("/proc/%d/task/%d/maps", pid, pid);
 	if (mfd == -1) {
 		mfd = prof_open_maps_internal("/proc/%d/maps", pid);
 	}
-#endif
+#	endif
 	return mfd;
 }
 prof_dump_open_maps_t *JET_MUTABLE prof_dump_open_maps =
@@ -840,12 +848,12 @@ prof_dump_maps(buf_writer_t *buf_writer) {
 #endif /* __APPLE__ */
 
 static bool
-prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
-    bool leakcheck) {
+prof_dump(
+    tsd_t *tsd, bool propagate_err, const char *filename, bool leakcheck) {
 	cassert(config_prof);
 	assert(tsd_reentrancy_level_get(tsd) == 0);
 
-	prof_tdata_t * tdata = prof_tdata_get(tsd, true);
+	prof_tdata_t *tdata = prof_tdata_get(tsd, true);
 	if (tdata == NULL) {
 		return true;
 	}
@@ -892,7 +900,7 @@ prof_strncpy(char *UNUSED dest, const char *UNUSED src, size_t UNUSED size) {
 }
 
 static const char *
-prof_prefix_get(tsdn_t* tsdn) {
+prof_prefix_get(tsdn_t *tsdn) {
 	malloc_mutex_assert_owner(tsdn, &prof_dump_filename_mtx);
 
 	return prof_prefix == NULL ? opt_prof_prefix : prof_prefix;
@@ -919,25 +927,26 @@ prof_dump_filename(tsd_t *tsd, char *filename, char v, uint64_t vseq) {
 		if (opt_prof_pid_namespace) {
 			/* "<prefix>.<pid_namespace>.<pid>.<seq>.v<vseq>.heap" */
 			malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
-			    "%s.%ld.%d.%"FMTu64".%c%"FMTu64".heap", prefix,
-			    prof_get_pid_namespace(), prof_getpid(), prof_dump_seq, v,
-			    vseq);
+			    "%s.%ld.%d.%" FMTu64 ".%c%" FMTu64 ".heap", prefix,
+			    prof_get_pid_namespace(), prof_getpid(),
+			    prof_dump_seq, v, vseq);
 		} else {
 			/* "<prefix>.<pid>.<seq>.v<vseq>.heap" */
 			malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
-			    "%s.%d.%"FMTu64".%c%"FMTu64".heap", prefix, prof_getpid(),
-			    prof_dump_seq, v, vseq);
+			    "%s.%d.%" FMTu64 ".%c%" FMTu64 ".heap", prefix,
+			    prof_getpid(), prof_dump_seq, v, vseq);
 		}
 	} else {
 		if (opt_prof_pid_namespace) {
 			/* "<prefix>.<pid_namespace>.<pid>.<seq>.<v>.heap" */
 			malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
-			    "%s.%ld.%d.%"FMTu64".%c.heap", prefix,
-			    prof_get_pid_namespace(), prof_getpid(), prof_dump_seq, v);
+			    "%s.%ld.%d.%" FMTu64 ".%c.heap", prefix,
+			    prof_get_pid_namespace(), prof_getpid(),
+			    prof_dump_seq, v);
 		} else {
 			/* "<prefix>.<pid>.<seq>.<v>.heap" */
 			malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
-			    "%s.%d.%"FMTu64".%c.heap", prefix, prof_getpid(),
+			    "%s.%d.%" FMTu64 ".%c.heap", prefix, prof_getpid(),
 			    prof_dump_seq, v);
 		}
 	}
@@ -949,11 +958,12 @@ prof_get_default_filename(tsdn_t *tsdn, char *filename, uint64_t ind) {
 	malloc_mutex_lock(tsdn, &prof_dump_filename_mtx);
 	if (opt_prof_pid_namespace) {
 		malloc_snprintf(filename, PROF_DUMP_FILENAME_LEN,
-		    "%s.%ld.%d.%"FMTu64".json", prof_prefix_get(tsdn),
+		    "%s.%ld.%d.%" FMTu64 ".json", prof_prefix_get(tsdn),
 		    prof_get_pid_namespace(), prof_getpid(), ind);
 	} else {
 		malloc_snprintf(filename, PROF_DUMP_FILENAME_LEN,
-		    "%s.%d.%"FMTu64".json", prof_prefix_get(tsdn), prof_getpid(), ind);
+		    "%s.%d.%" FMTu64 ".json", prof_prefix_get(tsdn),
+		    prof_getpid(), ind);
 	}
 	malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
 }
@@ -980,8 +990,8 @@ prof_prefix_set(tsdn_t *tsdn, const char *prefix) {
 	if (prof_prefix == NULL) {
 		malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
 		/* Everything is still guarded by ctl_mtx. */
-		char *buffer = base_alloc(tsdn, prof_base,
-		    PROF_DUMP_FILENAME_LEN, QUANTUM);
+		char *buffer = base_alloc(
+		    tsdn, prof_base, PROF_DUMP_FILENAME_LEN, QUANTUM);
 		if (buffer == NULL) {
 			return true;
 		}
@@ -1018,7 +1028,8 @@ prof_mdump_impl(tsd_t *tsd, const char *filename) {
 		/* No filename specified, so automatically generate one. */
 		malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
 		if (prof_prefix_get(tsd_tsdn(tsd))[0] == '\0') {
-			malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
+			malloc_mutex_unlock(
+			    tsd_tsdn(tsd), &prof_dump_filename_mtx);
 			return true;
 		}
 		prof_dump_filename(tsd, filename_buf, 'm', prof_dump_mseq);
diff --git a/src/prof_threshold.c b/src/prof_threshold.c
index 0b5cb53c..5b72a491 100644
--- a/src/prof_threshold.c
+++ b/src/prof_threshold.c
@@ -22,8 +22,8 @@ prof_threshold_hook_set(prof_threshold_hook_t hook) {
 
 prof_threshold_hook_t
 prof_threshold_hook_get(void) {
-	return (prof_threshold_hook_t)atomic_load_p(&prof_threshold_hook,
-	    ATOMIC_ACQUIRE);
+	return (prof_threshold_hook_t)atomic_load_p(
+	    &prof_threshold_hook, ATOMIC_ACQUIRE);
 }
 
 /* Invoke callback for threshold reached */
@@ -32,10 +32,10 @@ prof_threshold_update(tsd_t *tsd) {
 	prof_threshold_hook_t prof_threshold_hook = prof_threshold_hook_get();
 	if (prof_threshold_hook == NULL) {
 		return;
-        }
+	}
 	uint64_t alloc = tsd_thread_allocated_get(tsd);
 	uint64_t dalloc = tsd_thread_deallocated_get(tsd);
-	peak_t *peak = tsd_peakp_get(tsd);
+	peak_t  *peak = tsd_peakp_get(tsd);
 	pre_reentrancy(tsd, NULL);
 	prof_threshold_hook(alloc, dalloc, peak->cur_max);
 	post_reentrancy(tsd);
@@ -62,8 +62,8 @@ prof_threshold_enabled(void) {
 }
 
 te_base_cb_t prof_threshold_te_handler = {
-	.enabled = &prof_threshold_enabled,
-	.new_event_wait = &prof_threshold_new_event_wait,
-	.postponed_event_wait = &prof_threshold_postponed_event_wait,
-	.event_handler = &prof_threshold_event_handler,
+    .enabled = &prof_threshold_enabled,
+    .new_event_wait = &prof_threshold_new_event_wait,
+    .postponed_event_wait = &prof_threshold_postponed_event_wait,
+    .event_handler = &prof_threshold_event_handler,
 };
diff --git a/src/psset.c b/src/psset.c
index afe9f1c1..509df064 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -32,16 +32,16 @@ psset_stats_accum(psset_stats_t *dst, psset_stats_t *src) {
 	psset_bin_stats_accum(&dst->merged, &src->merged);
 	for (int huge = 0; huge < PSSET_NHUGE; huge++) {
 		psset_bin_stats_accum(&dst->slabs[huge], &src->slabs[huge]);
-		psset_bin_stats_accum(&dst->full_slabs[huge],
-		    &src->full_slabs[huge]);
-		psset_bin_stats_accum(&dst->empty_slabs[huge],
-		    &src->empty_slabs[huge]);
+		psset_bin_stats_accum(
+		    &dst->full_slabs[huge], &src->full_slabs[huge]);
+		psset_bin_stats_accum(
+		    &dst->empty_slabs[huge], &src->empty_slabs[huge]);
 	}
 	for (pszind_t i = 0; i < PSSET_NPSIZES; i++) {
-		psset_bin_stats_accum(&dst->nonfull_slabs[i][0],
-		    &src->nonfull_slabs[i][0]);
-		psset_bin_stats_accum(&dst->nonfull_slabs[i][1],
-		    &src->nonfull_slabs[i][1]);
+		psset_bin_stats_accum(
+		    &dst->nonfull_slabs[i][0], &src->nonfull_slabs[i][0]);
+		psset_bin_stats_accum(
+		    &dst->nonfull_slabs[i][1], &src->nonfull_slabs[i][1]);
 	}
 }
 
@@ -83,10 +83,10 @@ psset_slab_stats_insert_remove(psset_stats_t *stats,
 	if (config_debug) {
 		psset_bin_stats_t check_stats[PSSET_NHUGE] = {{0}};
 		for (int huge = 0; huge < PSSET_NHUGE; huge++) {
-			psset_bin_stats_accum(&check_stats[huge],
-			    &stats->full_slabs[huge]);
-			psset_bin_stats_accum(&check_stats[huge],
-			    &stats->empty_slabs[huge]);
+			psset_bin_stats_accum(
+			    &check_stats[huge], &stats->full_slabs[huge]);
+			psset_bin_stats_accum(
+			    &check_stats[huge], &stats->empty_slabs[huge]);
 			for (pszind_t pind = 0; pind < PSSET_NPSIZES; pind++) {
 				psset_bin_stats_accum(&check_stats[huge],
 				    &stats->nonfull_slabs[pind][huge]);
@@ -112,14 +112,14 @@ psset_slab_stats_insert_remove(psset_stats_t *stats,
 }
 
 static void
-psset_slab_stats_insert(psset_stats_t *stats, psset_bin_stats_t *binstats,
-    hpdata_t *ps) {
+psset_slab_stats_insert(
+    psset_stats_t *stats, psset_bin_stats_t *binstats, hpdata_t *ps) {
 	psset_slab_stats_insert_remove(stats, binstats, ps, true);
 }
 
 static void
-psset_slab_stats_remove(psset_stats_t *stats, psset_bin_stats_t *binstats,
-    hpdata_t *ps) {
+psset_slab_stats_remove(
+    psset_stats_t *stats, psset_bin_stats_t *binstats, hpdata_t *ps) {
 	psset_slab_stats_insert_remove(stats, binstats, ps, false);
 }
 
@@ -127,9 +127,9 @@ static pszind_t
 psset_hpdata_heap_index(const hpdata_t *ps) {
 	assert(!hpdata_full(ps));
 	assert(!hpdata_empty(ps));
-	size_t longest_free_range = hpdata_longest_free_range_get(ps);
-	pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
-	    longest_free_range << LG_PAGE));
+	size_t   longest_free_range = hpdata_longest_free_range_get(ps);
+	pszind_t pind = sz_psz2ind(
+	    sz_psz_quantize_floor(longest_free_range << LG_PAGE));
 	assert(pind < PSSET_NPSIZES);
 	return pind;
 }
@@ -161,8 +161,8 @@ psset_stats_insert(psset_t *psset, hpdata_t *ps) {
 		psset_slab_stats_insert(stats, psset->stats.full_slabs, ps);
 	} else {
 		pszind_t pind = psset_hpdata_heap_index(ps);
-		psset_slab_stats_insert(stats, psset->stats.nonfull_slabs[pind],
-		    ps);
+		psset_slab_stats_insert(
+		    stats, psset->stats.nonfull_slabs[pind], ps);
 	}
 }
 
@@ -175,8 +175,8 @@ psset_stats_remove(psset_t *psset, hpdata_t *ps) {
 		psset_slab_stats_remove(stats, psset->stats.full_slabs, ps);
 	} else {
 		pszind_t pind = psset_hpdata_heap_index(ps);
-		psset_slab_stats_remove(stats, psset->stats.nonfull_slabs[pind],
-		    ps);
+		psset_slab_stats_remove(
+		    stats, psset->stats.nonfull_slabs[pind], ps);
 	}
 }
 
@@ -264,7 +264,7 @@ psset_maybe_remove_purge_list(psset_t *psset, hpdata_t *ps) {
 	 * purge LRU within a given dirtiness bucket.
 	 */
 	if (hpdata_purge_allowed_get(ps)) {
-		size_t ind = psset_purge_list_ind(ps);
+		size_t               ind = psset_purge_list_ind(ps);
 		hpdata_purge_list_t *purge_list = &psset->to_purge[ind];
 		hpdata_purge_list_remove(purge_list, ps);
 		if (hpdata_purge_list_empty(purge_list)) {
@@ -276,14 +276,13 @@ psset_maybe_remove_purge_list(psset_t *psset, hpdata_t *ps) {
 static void
 psset_maybe_insert_purge_list(psset_t *psset, hpdata_t *ps) {
 	if (hpdata_purge_allowed_get(ps)) {
-		size_t ind = psset_purge_list_ind(ps);
+		size_t               ind = psset_purge_list_ind(ps);
 		hpdata_purge_list_t *purge_list = &psset->to_purge[ind];
 		if (hpdata_purge_list_empty(purge_list)) {
 			fb_set(psset->purge_bitmap, PSSET_NPURGE_LISTS, ind);
 		}
 		hpdata_purge_list_append(purge_list, ps);
 	}
-
 }
 
 void
@@ -343,13 +342,13 @@ psset_enumerate_search(psset_t *psset, pszind_t pind, size_t size) {
 		return NULL;
 	}
 
-	hpdata_t *ps = NULL;
+	hpdata_t                          *ps = NULL;
 	hpdata_age_heap_enumerate_helper_t helper;
 	hpdata_age_heap_enumerate_prepare(&psset->pageslabs[pind], &helper,
 	    PSSET_ENUMERATE_MAX_NUM, sizeof(helper.bfs_queue) / sizeof(void *));
 
-	while ((ps = hpdata_age_heap_enumerate_next(&psset->pageslabs[pind],
-	    &helper))) {
+	while ((ps = hpdata_age_heap_enumerate_next(
+	            &psset->pageslabs[pind], &helper))) {
 		if (hpdata_longest_free_range_get(ps) >= size) {
 			return ps;
 		}
@@ -363,7 +362,7 @@ psset_pick_alloc(psset_t *psset, size_t size) {
 	assert((size & PAGE_MASK) == 0);
 	assert(size <= HUGEPAGE);
 
-	pszind_t min_pind = sz_psz2ind(sz_psz_quantize_ceil(size));
+	pszind_t  min_pind = sz_psz2ind(sz_psz_quantize_ceil(size));
 	hpdata_t *ps = NULL;
 
 	/* See comments in eset_first_fit for why we enumerate search below. */
@@ -375,8 +374,8 @@ psset_pick_alloc(psset_t *psset, size_t size) {
 		}
 	}
 
-	pszind_t pind = (pszind_t)fb_ffs(psset->pageslab_bitmap, PSSET_NPSIZES,
-	    (size_t)min_pind);
+	pszind_t pind = (pszind_t)fb_ffs(
+	    psset->pageslab_bitmap, PSSET_NPSIZES, (size_t)min_pind);
 	if (pind == PSSET_NPSIZES) {
 		return hpdata_empty_list_first(&psset->empty);
 	}
@@ -392,8 +391,8 @@ psset_pick_alloc(psset_t *psset, size_t size) {
 
 hpdata_t *
 psset_pick_purge(psset_t *psset) {
-	ssize_t ind_ssz = fb_fls(psset->purge_bitmap, PSSET_NPURGE_LISTS,
-	    PSSET_NPURGE_LISTS - 1);
+	ssize_t ind_ssz = fb_fls(
+	    psset->purge_bitmap, PSSET_NPURGE_LISTS, PSSET_NPURGE_LISTS - 1);
 	if (ind_ssz < 0) {
 		return NULL;
 	}
diff --git a/src/rtree.c b/src/rtree.c
index b6ac04b7..ac27f829 100644
--- a/src/rtree.c
+++ b/src/rtree.c
@@ -20,7 +20,7 @@ rtree_new(rtree_t *rtree, base_t *base, bool zeroed) {
 	rtree->base = base;
 
 	if (malloc_mutex_init(&rtree->init_lock, "rtree", WITNESS_RANK_RTREE,
-	    malloc_mutex_rank_exclusive)) {
+	        malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 
@@ -29,19 +29,19 @@ rtree_new(rtree_t *rtree, base_t *base, bool zeroed) {
 
 static rtree_node_elm_t *
 rtree_node_alloc(tsdn_t *tsdn, rtree_t *rtree, size_t nelms) {
-	return (rtree_node_elm_t *)base_alloc_rtree(tsdn, rtree->base,
-	    nelms * sizeof(rtree_node_elm_t));
+	return (rtree_node_elm_t *)base_alloc_rtree(
+	    tsdn, rtree->base, nelms * sizeof(rtree_node_elm_t));
 }
 
 static rtree_leaf_elm_t *
 rtree_leaf_alloc(tsdn_t *tsdn, rtree_t *rtree, size_t nelms) {
-	return (rtree_leaf_elm_t *)base_alloc_rtree(tsdn, rtree->base,
-	    nelms * sizeof(rtree_leaf_elm_t));
+	return (rtree_leaf_elm_t *)base_alloc_rtree(
+	    tsdn, rtree->base, nelms * sizeof(rtree_leaf_elm_t));
 }
 
 static rtree_node_elm_t *
-rtree_node_init(tsdn_t *tsdn, rtree_t *rtree, unsigned level,
-    atomic_p_t *elmp) {
+rtree_node_init(
+    tsdn_t *tsdn, rtree_t *rtree, unsigned level, atomic_p_t *elmp) {
 	malloc_mutex_lock(tsdn, &rtree->init_lock);
 	/*
 	 * If *elmp is non-null, then it was initialized with the init lock
@@ -49,8 +49,8 @@ rtree_node_init(tsdn_t *tsdn, rtree_t *rtree, unsigned level,
 	 */
 	rtree_node_elm_t *node = atomic_load_p(elmp, ATOMIC_RELAXED);
 	if (node == NULL) {
-		node = rtree_node_alloc(tsdn, rtree, ZU(1) <<
-		    rtree_levels[level].bits);
+		node = rtree_node_alloc(
+		    tsdn, rtree, ZU(1) << rtree_levels[level].bits);
 		if (node == NULL) {
 			malloc_mutex_unlock(tsdn, &rtree->init_lock);
 			return NULL;
@@ -75,8 +75,8 @@ rtree_leaf_init(tsdn_t *tsdn, rtree_t *rtree, atomic_p_t *elmp) {
 	 */
 	rtree_leaf_elm_t *leaf = atomic_load_p(elmp, ATOMIC_RELAXED);
 	if (leaf == NULL) {
-		leaf = rtree_leaf_alloc(tsdn, rtree, ZU(1) <<
-		    rtree_levels[RTREE_HEIGHT-1].bits);
+		leaf = rtree_leaf_alloc(
+		    tsdn, rtree, ZU(1) << rtree_levels[RTREE_HEIGHT - 1].bits);
 		if (leaf == NULL) {
 			malloc_mutex_unlock(tsdn, &rtree->init_lock);
 			return NULL;
@@ -107,11 +107,11 @@ rtree_child_node_tryread(rtree_node_elm_t *elm, bool dependent) {
 	rtree_node_elm_t *node;
 
 	if (dependent) {
-		node = (rtree_node_elm_t *)atomic_load_p(&elm->child,
-		    ATOMIC_RELAXED);
+		node = (rtree_node_elm_t *)atomic_load_p(
+		    &elm->child, ATOMIC_RELAXED);
 	} else {
-		node = (rtree_node_elm_t *)atomic_load_p(&elm->child,
-		    ATOMIC_ACQUIRE);
+		node = (rtree_node_elm_t *)atomic_load_p(
+		    &elm->child, ATOMIC_ACQUIRE);
 	}
 
 	assert(!dependent || node != NULL);
@@ -136,11 +136,11 @@ rtree_child_leaf_tryread(rtree_node_elm_t *elm, bool dependent) {
 	rtree_leaf_elm_t *leaf;
 
 	if (dependent) {
-		leaf = (rtree_leaf_elm_t *)atomic_load_p(&elm->child,
-		    ATOMIC_RELAXED);
+		leaf = (rtree_leaf_elm_t *)atomic_load_p(
+		    &elm->child, ATOMIC_RELAXED);
 	} else {
-		leaf = (rtree_leaf_elm_t *)atomic_load_p(&elm->child,
-		    ATOMIC_ACQUIRE);
+		leaf = (rtree_leaf_elm_t *)atomic_load_p(
+		    &elm->child, ATOMIC_ACQUIRE);
 	}
 
 	assert(!dependent || leaf != NULL);
@@ -181,53 +181,54 @@ rtree_leaf_elm_lookup_hard(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 		}
 	}
 
-#define RTREE_GET_CHILD(level) {					\
-		assert(level < RTREE_HEIGHT-1);				\
-		if (level != 0 && !dependent &&				\
-		    unlikely(!rtree_node_valid(node))) {		\
-			return NULL;					\
-		}							\
-		uintptr_t subkey = rtree_subkey(key, level);		\
-		if (level + 2 < RTREE_HEIGHT) {				\
-			node = init_missing ?				\
-			    rtree_child_node_read(tsdn, rtree,		\
-			    &node[subkey], level, dependent) :		\
-			    rtree_child_node_tryread(&node[subkey],	\
-			    dependent);					\
-		} else {						\
-			leaf = init_missing ?				\
-			    rtree_child_leaf_read(tsdn, rtree,		\
-			    &node[subkey], level, dependent) :		\
-			    rtree_child_leaf_tryread(&node[subkey],	\
-			    dependent);					\
-		}							\
+#define RTREE_GET_CHILD(level)                                                 \
+	{                                                                      \
+		assert(level < RTREE_HEIGHT - 1);                              \
+		if (level != 0 && !dependent                                   \
+		    && unlikely(!rtree_node_valid(node))) {                    \
+			return NULL;                                           \
+		}                                                              \
+		uintptr_t subkey = rtree_subkey(key, level);                   \
+		if (level + 2 < RTREE_HEIGHT) {                                \
+			node = init_missing                                    \
+			    ? rtree_child_node_read(tsdn, rtree,               \
+			          &node[subkey], level, dependent)             \
+			    : rtree_child_node_tryread(                        \
+			          &node[subkey], dependent);                   \
+		} else {                                                       \
+			leaf = init_missing                                    \
+			    ? rtree_child_leaf_read(tsdn, rtree,               \
+			          &node[subkey], level, dependent)             \
+			    : rtree_child_leaf_tryread(                        \
+			          &node[subkey], dependent);                   \
+		}                                                              \
 	}
 	/*
 	 * Cache replacement upon hard lookup (i.e. L1 & L2 rtree cache miss):
 	 * (1) evict last entry in L2 cache; (2) move the collision slot from L1
 	 * cache down to L2; and 3) fill L1.
 	 */
-#define RTREE_GET_LEAF(level) {						\
-		assert(level == RTREE_HEIGHT-1);			\
-		if (!dependent && unlikely(!rtree_leaf_valid(leaf))) {	\
-			return NULL;					\
-		}							\
-		if (RTREE_CTX_NCACHE_L2 > 1) {				\
-			memmove(&rtree_ctx->l2_cache[1],		\
-			    &rtree_ctx->l2_cache[0],			\
-			    sizeof(rtree_ctx_cache_elm_t) *		\
-			    (RTREE_CTX_NCACHE_L2 - 1));			\
-		}							\
-		size_t slot = rtree_cache_direct_map(key);		\
-		rtree_ctx->l2_cache[0].leafkey =			\
-		    rtree_ctx->cache[slot].leafkey;			\
-		rtree_ctx->l2_cache[0].leaf =				\
-		    rtree_ctx->cache[slot].leaf;			\
-		uintptr_t leafkey = rtree_leafkey(key);			\
-		rtree_ctx->cache[slot].leafkey = leafkey;		\
-		rtree_ctx->cache[slot].leaf = leaf;			\
-		uintptr_t subkey = rtree_subkey(key, level);		\
-		return &leaf[subkey];					\
+#define RTREE_GET_LEAF(level)                                                  \
+	{                                                                      \
+		assert(level == RTREE_HEIGHT - 1);                             \
+		if (!dependent && unlikely(!rtree_leaf_valid(leaf))) {         \
+			return NULL;                                           \
+		}                                                              \
+		if (RTREE_CTX_NCACHE_L2 > 1) {                                 \
+			memmove(&rtree_ctx->l2_cache[1],                       \
+			    &rtree_ctx->l2_cache[0],                           \
+			    sizeof(rtree_ctx_cache_elm_t)                      \
+			        * (RTREE_CTX_NCACHE_L2 - 1));                  \
+		}                                                              \
+		size_t slot = rtree_cache_direct_map(key);                     \
+		rtree_ctx->l2_cache[0].leafkey =                               \
+		    rtree_ctx->cache[slot].leafkey;                            \
+		rtree_ctx->l2_cache[0].leaf = rtree_ctx->cache[slot].leaf;     \
+		uintptr_t leafkey = rtree_leafkey(key);                        \
+		rtree_ctx->cache[slot].leafkey = leafkey;                      \
+		rtree_ctx->cache[slot].leaf = leaf;                            \
+		uintptr_t subkey = rtree_subkey(key, level);                   \
+		return &leaf[subkey];                                          \
 	}
 	if (RTREE_HEIGHT > 1) {
 		RTREE_GET_CHILD(0)
@@ -236,11 +237,11 @@ rtree_leaf_elm_lookup_hard(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 		RTREE_GET_CHILD(1)
 	}
 	if (RTREE_HEIGHT > 3) {
-		for (unsigned i = 2; i < RTREE_HEIGHT-1; i++) {
+		for (unsigned i = 2; i < RTREE_HEIGHT - 1; i++) {
 			RTREE_GET_CHILD(i)
 		}
 	}
-	RTREE_GET_LEAF(RTREE_HEIGHT-1)
+	RTREE_GET_LEAF(RTREE_HEIGHT - 1)
 #undef RTREE_GET_CHILD
 #undef RTREE_GET_LEAF
 	not_reached();
diff --git a/src/safety_check.c b/src/safety_check.c
index d3f68fbc..d052718d 100644
--- a/src/safety_check.c
+++ b/src/safety_check.c
@@ -3,20 +3,24 @@
 
 static safety_check_abort_hook_t safety_check_abort;
 
-void safety_check_fail_sized_dealloc(bool current_dealloc, const void *ptr,
+void
+safety_check_fail_sized_dealloc(bool current_dealloc, const void *ptr,
     size_t true_size, size_t input_size) {
-	char *src = current_dealloc ? "the current pointer being freed" :
-	    "in thread cache, possibly from previous deallocations";
+	char *src = current_dealloc
+	    ? "the current pointer being freed"
+	    : "in thread cache, possibly from previous deallocations";
 	char *suggest_debug_build = config_debug ? "" : " --enable-debug or";
 
-	safety_check_fail("<jemalloc>: size mismatch detected (true size %zu "
+	safety_check_fail(
+	    "<jemalloc>: size mismatch detected (true size %zu "
 	    "vs input size %zu), likely caused by application sized "
 	    "deallocation bugs (source address: %p, %s). Suggest building with"
 	    "%s address sanitizer for debugging. Abort.\n",
 	    true_size, input_size, ptr, src, suggest_debug_build);
 }
 
-void safety_check_set_abort(safety_check_abort_hook_t abort_fn) {
+void
+safety_check_set_abort(safety_check_abort_hook_t abort_fn) {
 	safety_check_abort = abort_fn;
 }
 
@@ -25,7 +29,8 @@ void safety_check_set_abort(safety_check_abort_hook_t abort_fn) {
  * because there are cases only logging crash stack traces.
  */
 static void
-safety_check_detected_heap_corruption___run_address_sanitizer_build_to_debug(const char *buf) {
+safety_check_detected_heap_corruption___run_address_sanitizer_build_to_debug(
+    const char *buf) {
 	if (safety_check_abort == NULL) {
 		malloc_write(buf);
 		abort();
@@ -34,7 +39,8 @@ safety_check_detected_heap_corruption___run_address_sanitizer_build_to_debug(con
 	}
 }
 
-void safety_check_fail(const char *format, ...) {
+void
+safety_check_fail(const char *format, ...) {
 	char buf[MALLOC_PRINTF_BUFSIZE];
 
 	va_list ap;
@@ -42,5 +48,6 @@ void safety_check_fail(const char *format, ...) {
 	malloc_vsnprintf(buf, MALLOC_PRINTF_BUFSIZE, format, ap);
 	va_end(ap);
 
-	safety_check_detected_heap_corruption___run_address_sanitizer_build_to_debug(buf);
+	safety_check_detected_heap_corruption___run_address_sanitizer_build_to_debug(
+	    buf);
 }
diff --git a/src/san.c b/src/san.c
index 28ea3d7c..5448c67f 100644
--- a/src/san.c
+++ b/src/san.c
@@ -20,8 +20,8 @@ ssize_t opt_lg_san_uaf_align = SAN_LG_UAF_ALIGN_DEFAULT;
 uintptr_t san_cache_bin_nonfast_mask = SAN_CACHE_BIN_NONFAST_MASK_DEFAULT;
 
 static inline void
-san_find_guarded_addr(edata_t *edata, void **guard1, void **guard2,
-    void **addr, size_t size, bool left, bool right) {
+san_find_guarded_addr(edata_t *edata, void **guard1, void **guard2, void **addr,
+    size_t size, bool left, bool right) {
 	assert(!edata_guarded_get(edata));
 	assert(size % PAGE == 0);
 	*addr = edata_base_get(edata);
@@ -74,8 +74,8 @@ san_guard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap,
 	    : san_one_side_unguarded_sz(size_with_guards);
 
 	void *guard1, *guard2, *addr;
-	san_find_guarded_addr(edata, &guard1, &guard2, &addr, usize, left,
-	    right);
+	san_find_guarded_addr(
+	    edata, &guard1, &guard2, &addr, usize, left, right);
 
 	assert(edata_state_get(edata) == extent_state_active);
 	ehooks_guard(tsdn, ehooks, guard1, guard2);
@@ -109,8 +109,8 @@ san_unguard_pages_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
 	    : san_one_side_guarded_sz(size);
 
 	void *guard1, *guard2, *addr;
-	san_find_unguarded_addr(edata, &guard1, &guard2, &addr, size, left,
-	    right);
+	san_find_unguarded_addr(
+	    edata, &guard1, &guard2, &addr, size, left, right);
 
 	ehooks_unguard(tsdn, ehooks, (void *)guard1, (void *)guard2);
 
@@ -130,15 +130,15 @@ san_unguard_pages_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
 }
 
 void
-san_unguard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
-    emap_t *emap, bool left, bool right) {
+san_unguard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap,
+    bool left, bool right) {
 	san_unguard_pages_impl(tsdn, ehooks, edata, emap, left, right,
 	    /* remap */ true);
 }
 
 void
-san_unguard_pages_pre_destroy(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
-    emap_t *emap) {
+san_unguard_pages_pre_destroy(
+    tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap) {
 	emap_assert_not_mapped(tsdn, emap, edata);
 	/*
 	 * We don't want to touch the emap of about to be destroyed extents, as
@@ -146,7 +146,7 @@ san_unguard_pages_pre_destroy(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
 	 * we unguard the extents to the right, because retained extents only
 	 * own their right guard page per san_bump_alloc's logic.
 	 */
-	 san_unguard_pages_impl(tsdn, ehooks, edata, emap, /* left */ false,
+	san_unguard_pages_impl(tsdn, ehooks, edata, emap, /* left */ false,
 	    /* right */ true, /* remap */ false);
 }
 
@@ -163,9 +163,9 @@ san_stashed_corrupted(void *ptr, size_t size) {
 
 	void *first, *mid, *last;
 	san_junk_ptr_locations(ptr, size, &first, &mid, &last);
-	if (*(uintptr_t *)first != uaf_detect_junk ||
-	    *(uintptr_t *)mid != uaf_detect_junk ||
-	    *(uintptr_t *)last != uaf_detect_junk) {
+	if (*(uintptr_t *)first != uaf_detect_junk
+	    || *(uintptr_t *)mid != uaf_detect_junk
+	    || *(uintptr_t *)last != uaf_detect_junk) {
 		return true;
 	}
 
@@ -183,7 +183,8 @@ san_check_stashed_ptrs(void **ptrs, size_t nstashed, size_t usize) {
 		assert(stashed != NULL);
 		assert(cache_bin_nonfast_aligned(stashed));
 		if (unlikely(san_stashed_corrupted(stashed, usize))) {
-			safety_check_fail("<jemalloc>: Write-after-free "
+			safety_check_fail(
+			    "<jemalloc>: Write-after-free "
 			    "detected on deallocated pointer %p (size %zu).\n",
 			    stashed, usize);
 		}
diff --git a/src/san_bump.c b/src/san_bump.c
index 88897455..09ed18ca 100644
--- a/src/san_bump.c
+++ b/src/san_bump.c
@@ -7,30 +7,29 @@
 #include "jemalloc/internal/ehooks.h"
 #include "jemalloc/internal/edata_cache.h"
 
-static bool
-san_bump_grow_locked(tsdn_t *tsdn, san_bump_alloc_t *sba, pac_t *pac,
-    ehooks_t *ehooks, size_t size);
+static bool san_bump_grow_locked(tsdn_t *tsdn, san_bump_alloc_t *sba,
+    pac_t *pac, ehooks_t *ehooks, size_t size);
 
 edata_t *
-san_bump_alloc(tsdn_t *tsdn, san_bump_alloc_t* sba, pac_t *pac,
+san_bump_alloc(tsdn_t *tsdn, san_bump_alloc_t *sba, pac_t *pac,
     ehooks_t *ehooks, size_t size, bool zero) {
 	assert(san_bump_enabled());
 
-	edata_t* to_destroy;
-	size_t guarded_size = san_one_side_guarded_sz(size);
+	edata_t *to_destroy;
+	size_t   guarded_size = san_one_side_guarded_sz(size);
 
 	malloc_mutex_lock(tsdn, &sba->mtx);
 
-	if (sba->curr_reg == NULL ||
-	    edata_size_get(sba->curr_reg) < guarded_size) {
+	if (sba->curr_reg == NULL
+	    || edata_size_get(sba->curr_reg) < guarded_size) {
 		/*
 		 * If the current region can't accommodate the allocation,
 		 * try replacing it with a larger one and destroy current if the
 		 * replacement succeeds.
 		 */
 		to_destroy = sba->curr_reg;
-		bool err = san_bump_grow_locked(tsdn, sba, pac, ehooks,
-		    guarded_size);
+		bool err = san_bump_grow_locked(
+		    tsdn, sba, pac, ehooks, guarded_size);
 		if (err) {
 			goto label_err;
 		}
@@ -40,9 +39,9 @@ san_bump_alloc(tsdn_t *tsdn, san_bump_alloc_t* sba, pac_t *pac,
 	assert(guarded_size <= edata_size_get(sba->curr_reg));
 	size_t trail_size = edata_size_get(sba->curr_reg) - guarded_size;
 
-	edata_t* edata;
+	edata_t *edata;
 	if (trail_size != 0) {
-		edata_t* curr_reg_trail = extent_split_wrapper(tsdn, pac,
+		edata_t *curr_reg_trail = extent_split_wrapper(tsdn, pac,
 		    ehooks, sba->curr_reg, guarded_size, trail_size,
 		    /* holding_core_locks */ true);
 		if (curr_reg_trail == NULL) {
@@ -69,9 +68,8 @@ san_bump_alloc(tsdn_t *tsdn, san_bump_alloc_t* sba, pac_t *pac,
 	    /* right */ true, /* remap */ true);
 
 	if (extent_commit_zero(tsdn, ehooks, edata, /* commit */ true, zero,
-	    /* growing_retained */ false)) {
-		extent_record(tsdn, pac, ehooks, &pac->ecache_retained,
-		    edata);
+	        /* growing_retained */ false)) {
+		extent_record(tsdn, pac, ehooks, &pac->ecache_retained, edata);
 		return NULL;
 	}
 
@@ -90,9 +88,10 @@ san_bump_grow_locked(tsdn_t *tsdn, san_bump_alloc_t *sba, pac_t *pac,
     ehooks_t *ehooks, size_t size) {
 	malloc_mutex_assert_owner(tsdn, &sba->mtx);
 
-	bool committed = false, zeroed = false;
-	size_t alloc_size = size > SBA_RETAINED_ALLOC_SIZE ? size :
-	    SBA_RETAINED_ALLOC_SIZE;
+	bool   committed = false, zeroed = false;
+	size_t alloc_size = size > SBA_RETAINED_ALLOC_SIZE
+	    ? size
+	    : SBA_RETAINED_ALLOC_SIZE;
 	assert((alloc_size & PAGE_MASK) == 0);
 	sba->curr_reg = extent_alloc_wrapper(tsdn, pac, ehooks, NULL,
 	    alloc_size, PAGE, zeroed, &committed,
diff --git a/src/sc.c b/src/sc.c
index e4a94d89..014ab95d 100644
--- a/src/sc.c
+++ b/src/sc.c
@@ -27,7 +27,7 @@ slab_size(int lg_page, int lg_base, int lg_delta, int ndelta) {
 	size_t try_slab_size = page;
 	size_t try_nregs = try_slab_size / reg_size;
 	size_t perfect_slab_size = 0;
-	bool perfect = false;
+	bool   perfect = false;
 	/*
 	 * This loop continues until we find the least common multiple of the
 	 * page size and size class size.  Size classes are all of the form
@@ -106,7 +106,7 @@ size_classes(
 	/* Outputs that we update as we go. */
 	size_t lookup_maxclass = 0;
 	size_t small_maxclass = 0;
-	int lg_large_minclass = 0;
+	int    lg_large_minclass = 0;
 	size_t large_maxclass = 0;
 
 	/* Tiny size classes. */
@@ -209,7 +209,7 @@ size_classes(
 		lg_delta++;
 	}
 	/* Additional outputs. */
-	int nsizes = index;
+	int      nsizes = index;
 	unsigned lg_ceil_nsizes = lg_ceil(nsizes);
 
 	/* Fill in the output data. */
@@ -292,8 +292,8 @@ sc_data_update_slab_size(sc_data_t *data, size_t begin, size_t end, int pgs) {
 		if (!sc->bin) {
 			break;
 		}
-		size_t reg_size = reg_size_compute(sc->lg_base, sc->lg_delta,
-		    sc->ndelta);
+		size_t reg_size = reg_size_compute(
+		    sc->lg_base, sc->lg_delta, sc->ndelta);
 		if (begin <= reg_size && reg_size <= end) {
 			sc_data_update_sc_slab_size(sc, reg_size, pgs);
 		}
diff --git a/src/sec.c b/src/sec.c
index 67585a71..36cd2dcc 100644
--- a/src/sec.c
+++ b/src/sec.c
@@ -6,12 +6,12 @@
 static edata_t *sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
     size_t alignment, bool zero, bool guarded, bool frequent_reuse,
     bool *deferred_work_generated);
-static bool sec_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated);
-static bool sec_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size, bool *deferred_work_generated);
-static void sec_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    bool *deferred_work_generated);
+static bool     sec_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+        size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated);
+static bool     sec_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+        size_t old_size, size_t new_size, bool *deferred_work_generated);
+static void     sec_dalloc(
+        tsdn_t *tsdn, pai_t *self, edata_t *edata, bool *deferred_work_generated);
 
 static void
 sec_bin_init(sec_bin_t *bin) {
@@ -29,16 +29,16 @@ sec_init(tsdn_t *tsdn, sec_t *sec, base_t *base, pai_t *fallback,
 	 * USIZE_GROW_SLOW_THRESHOLD because the usize above this increases
 	 * by PAGE and the number of usizes is too large.
 	 */
-	assert(!sz_large_size_classes_disabled() ||
-	    opts->max_alloc <= USIZE_GROW_SLOW_THRESHOLD);
+	assert(!sz_large_size_classes_disabled()
+	    || opts->max_alloc <= USIZE_GROW_SLOW_THRESHOLD);
 
-	size_t max_alloc = PAGE_FLOOR(opts->max_alloc);
+	size_t   max_alloc = PAGE_FLOOR(opts->max_alloc);
 	pszind_t npsizes = sz_psz2ind(max_alloc) + 1;
 
 	size_t sz_shards = opts->nshards * sizeof(sec_shard_t);
 	size_t sz_bins = opts->nshards * (size_t)npsizes * sizeof(sec_bin_t);
 	size_t sz_alloc = sz_shards + sz_bins;
-	void *dynalloc = base_alloc(tsdn, base, sz_alloc, CACHELINE);
+	void  *dynalloc = base_alloc(tsdn, base, sz_alloc, CACHELINE);
 	if (dynalloc == NULL) {
 		return true;
 	}
@@ -74,7 +74,6 @@ sec_init(tsdn_t *tsdn, sec_t *sec, base_t *base, pai_t *fallback,
 	assert((char *)bin_cur == ((char *)dynalloc + sz_alloc));
 	sec->fallback = fallback;
 
-
 	sec->opts = *opts;
 	sec->npsizes = npsizes;
 
@@ -102,7 +101,7 @@ sec_shard_pick(tsdn_t *tsdn, sec_t *sec) {
 	if (tsdn_null(tsdn)) {
 		return &sec->shards[0];
 	}
-	tsd_t *tsd = tsdn_tsd(tsdn);
+	tsd_t   *tsd = tsdn_tsd(tsdn);
 	uint8_t *idxp = tsd_sec_shardp_get(tsd);
 	if (*idxp == (uint8_t)-1) {
 		/*
@@ -111,9 +110,10 @@ sec_shard_pick(tsdn_t *tsdn, sec_t *sec) {
 		 * number to store 32 bits, since we'll deliberately overflow
 		 * when we multiply by the number of shards.
 		 */
-		uint64_t rand32 = prng_lg_range_u64(tsd_prng_statep_get(tsd), 32);
-		uint32_t idx =
-		    (uint32_t)((rand32 * (uint64_t)sec->opts.nshards) >> 32);
+		uint64_t rand32 = prng_lg_range_u64(
+		    tsd_prng_statep_get(tsd), 32);
+		uint32_t idx = (uint32_t)((rand32 * (uint64_t)sec->opts.nshards)
+		    >> 32);
 		assert(idx < (uint32_t)sec->opts.nshards);
 		*idxp = (uint8_t)idx;
 	}
@@ -157,13 +157,13 @@ sec_flush_some_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
 
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 	bool deferred_work_generated = false;
-	pai_dalloc_batch(tsdn, sec->fallback, &to_flush,
-	    &deferred_work_generated);
+	pai_dalloc_batch(
+	    tsdn, sec->fallback, &to_flush, &deferred_work_generated);
 }
 
 static edata_t *
-sec_shard_alloc_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
-    sec_bin_t *bin) {
+sec_shard_alloc_locked(
+    tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard, sec_bin_t *bin) {
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
 	if (!shard->enabled) {
 		return NULL;
@@ -186,7 +186,7 @@ sec_batch_fill_and_alloc(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
 
 	edata_list_active_t result;
 	edata_list_active_init(&result);
-	bool deferred_work_generated = false;
+	bool   deferred_work_generated = false;
 	size_t nalloc = pai_alloc_batch(tsdn, sec->fallback, size,
 	    1 + sec->opts.batch_fill_extra, &result, frequent_reuse,
 	    &deferred_work_generated);
@@ -243,8 +243,8 @@ sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
 	assert(pszind < sec->npsizes);
 
 	sec_shard_t *shard = sec_shard_pick(tsdn, sec);
-	sec_bin_t *bin = &shard->bins[pszind];
-	bool do_batch_fill = false;
+	sec_bin_t   *bin = &shard->bins[pszind];
+	bool         do_batch_fill = false;
 
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	edata_t *edata = sec_shard_alloc_locked(tsdn, sec, shard, bin);
@@ -258,8 +258,8 @@ sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 	if (edata == NULL) {
 		if (do_batch_fill) {
-			edata = sec_batch_fill_and_alloc(tsdn, sec, shard, bin,
-			    size, frequent_reuse);
+			edata = sec_batch_fill_and_alloc(
+			    tsdn, sec, shard, bin, size, frequent_reuse);
 		} else {
 			edata = pai_alloc(tsdn, sec->fallback, size, alignment,
 			    zero, /* guarded */ false, frequent_reuse,
@@ -304,16 +304,16 @@ sec_flush_all_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
 	 * rare pathways.
 	 */
 	bool deferred_work_generated = false;
-	pai_dalloc_batch(tsdn, sec->fallback, &to_flush,
-	    &deferred_work_generated);
+	pai_dalloc_batch(
+	    tsdn, sec->fallback, &to_flush, &deferred_work_generated);
 }
 
 static void
-sec_shard_dalloc_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
-    edata_t *edata) {
+sec_shard_dalloc_and_unlock(
+    tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard, edata_t *edata) {
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
 	assert(shard->bytes_cur <= sec->opts.max_bytes);
-	size_t size = edata_size_get(edata);
+	size_t   size = edata_size_get(edata);
 	pszind_t pszind = sz_psz2ind(size);
 	assert(pszind < sec->npsizes);
 	/*
@@ -342,13 +342,12 @@ sec_shard_dalloc_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
 }
 
 static void
-sec_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    bool *deferred_work_generated) {
+sec_dalloc(
+    tsdn_t *tsdn, pai_t *self, edata_t *edata, bool *deferred_work_generated) {
 	sec_t *sec = (sec_t *)self;
 	if (sec->opts.nshards == 0
 	    || edata_size_get(edata) > sec->opts.max_alloc) {
-		pai_dalloc(tsdn, sec->fallback, edata,
-		    deferred_work_generated);
+		pai_dalloc(tsdn, sec->fallback, edata, deferred_work_generated);
 		return;
 	}
 	sec_shard_t *shard = sec_shard_pick(tsdn, sec);
@@ -357,8 +356,7 @@ sec_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
 		sec_shard_dalloc_and_unlock(tsdn, sec, shard, edata);
 	} else {
 		malloc_mutex_unlock(tsdn, &shard->mtx);
-		pai_dalloc(tsdn, sec->fallback, edata,
-		    deferred_work_generated);
+		pai_dalloc(tsdn, sec->fallback, edata, deferred_work_generated);
 	}
 }
 
@@ -398,12 +396,12 @@ sec_stats_merge(tsdn_t *tsdn, sec_t *sec, sec_stats_t *stats) {
 }
 
 void
-sec_mutex_stats_read(tsdn_t *tsdn, sec_t *sec,
-    mutex_prof_data_t *mutex_prof_data) {
+sec_mutex_stats_read(
+    tsdn_t *tsdn, sec_t *sec, mutex_prof_data_t *mutex_prof_data) {
 	for (size_t i = 0; i < sec->opts.nshards; i++) {
 		malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
-		malloc_mutex_prof_accum(tsdn, mutex_prof_data,
-		    &sec->shards[i].mtx);
+		malloc_mutex_prof_accum(
+		    tsdn, mutex_prof_data, &sec->shards[i].mtx);
 		malloc_mutex_unlock(tsdn, &sec->shards[i].mtx);
 	}
 }
diff --git a/src/stats.c b/src/stats.c
index b2a00319..84af3911 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -11,45 +11,49 @@
 
 static const char *const global_mutex_names[mutex_prof_num_global_mutexes] = {
 #define OP(mtx) #mtx,
-	MUTEX_PROF_GLOBAL_MUTEXES
+    MUTEX_PROF_GLOBAL_MUTEXES
 #undef OP
 };
 
 static const char *const arena_mutex_names[mutex_prof_num_arena_mutexes] = {
 #define OP(mtx) #mtx,
-	MUTEX_PROF_ARENA_MUTEXES
+    MUTEX_PROF_ARENA_MUTEXES
 #undef OP
 };
 
-#define CTL_GET(n, v, t) do {						\
-	size_t sz = sizeof(t);						\
-	xmallctl(n, (void *)v, &sz, NULL, 0);				\
-} while (0)
+#define CTL_GET(n, v, t)                                                       \
+	do {                                                                   \
+		size_t sz = sizeof(t);                                         \
+		xmallctl(n, (void *)v, &sz, NULL, 0);                          \
+	} while (0)
 
-#define CTL_LEAF_PREPARE(mib, miblen, name) do {			\
-	assert(miblen < CTL_MAX_DEPTH);					\
-	size_t miblen_new = CTL_MAX_DEPTH;				\
-	xmallctlmibnametomib(mib, miblen, name, &miblen_new);		\
-	assert(miblen_new > miblen);					\
-} while (0)
+#define CTL_LEAF_PREPARE(mib, miblen, name)                                    \
+	do {                                                                   \
+		assert(miblen < CTL_MAX_DEPTH);                                \
+		size_t miblen_new = CTL_MAX_DEPTH;                             \
+		xmallctlmibnametomib(mib, miblen, name, &miblen_new);          \
+		assert(miblen_new > miblen);                                   \
+	} while (0)
 
-#define CTL_LEAF(mib, miblen, leaf, v, t) do {			\
-	assert(miblen < CTL_MAX_DEPTH);					\
-	size_t miblen_new = CTL_MAX_DEPTH;				\
-	size_t sz = sizeof(t);						\
-	xmallctlbymibname(mib, miblen, leaf, &miblen_new, (void *)v,	\
-	    &sz, NULL, 0);						\
-	assert(miblen_new == miblen + 1);				\
-} while (0)
+#define CTL_LEAF(mib, miblen, leaf, v, t)                                      \
+	do {                                                                   \
+		assert(miblen < CTL_MAX_DEPTH);                                \
+		size_t miblen_new = CTL_MAX_DEPTH;                             \
+		size_t sz = sizeof(t);                                         \
+		xmallctlbymibname(                                             \
+		    mib, miblen, leaf, &miblen_new, (void *)v, &sz, NULL, 0);  \
+		assert(miblen_new == miblen + 1);                              \
+	} while (0)
 
-#define CTL_MIB_GET(n, i, v, t, ind) do {				\
-	size_t mib[CTL_MAX_DEPTH];					\
-	size_t miblen = sizeof(mib) / sizeof(size_t);			\
-	size_t sz = sizeof(t);						\
-	xmallctlnametomib(n, mib, &miblen);				\
-	mib[(ind)] = (i);							\
-	xmallctlbymib(mib, miblen, (void *)v, &sz, NULL, 0);		\
-} while (0)
+#define CTL_MIB_GET(n, i, v, t, ind)                                           \
+	do {                                                                   \
+		size_t mib[CTL_MAX_DEPTH];                                     \
+		size_t miblen = sizeof(mib) / sizeof(size_t);                  \
+		size_t sz = sizeof(t);                                         \
+		xmallctlnametomib(n, mib, &miblen);                            \
+		mib[(ind)] = (i);                                              \
+		xmallctlbymib(mib, miblen, (void *)v, &sz, NULL, 0);           \
+	} while (0)
 
 #define CTL_M1_GET(n, i, v, t) CTL_MIB_GET(n, i, v, t, 1)
 #define CTL_M2_GET(n, i, v, t) CTL_MIB_GET(n, i, v, t, 2)
@@ -58,10 +62,10 @@ static const char *const arena_mutex_names[mutex_prof_num_arena_mutexes] = {
 /* Data. */
 
 bool opt_stats_print = false;
-char opt_stats_print_opts[stats_print_tot_num_options+1] = "";
+char opt_stats_print_opts[stats_print_tot_num_options + 1] = "";
 
 int64_t opt_stats_interval = STATS_INTERVAL_DEFAULT;
-char opt_stats_interval_opts[stats_print_tot_num_options+1] = "";
+char    opt_stats_interval_opts[stats_print_tot_num_options + 1] = "";
 
 static counter_accum_t stats_interval_accumulated;
 /* Per thread batch accum size for stats_interval. */
@@ -111,8 +115,8 @@ get_rate_str(uint64_t dividend, uint64_t divisor, char str[6]) {
 static void
 mutex_stats_init_cols(emitter_row_t *row, const char *table_name,
     emitter_col_t *name,
-    emitter_col_t col_uint64_t[mutex_prof_num_uint64_t_counters],
-    emitter_col_t col_uint32_t[mutex_prof_num_uint32_t_counters]) {
+    emitter_col_t  col_uint64_t[mutex_prof_num_uint64_t_counters],
+    emitter_col_t  col_uint32_t[mutex_prof_num_uint32_t_counters]) {
 	mutex_prof_uint64_t_counter_ind_t k_uint64_t = 0;
 	mutex_prof_uint32_t_counter_ind_t k_uint32_t = 0;
 
@@ -128,13 +132,13 @@ mutex_stats_init_cols(emitter_row_t *row, const char *table_name,
 
 #define WIDTH_uint32_t 12
 #define WIDTH_uint64_t 16
-#define OP(counter, counter_type, human, derived, base_counter)		\
-	col = &col_##counter_type[k_##counter_type];			\
-	++k_##counter_type;						\
-	emitter_col_init(col, row);					\
-	col->justify = emitter_justify_right;				\
-	col->width = derived ? 8 : WIDTH_##counter_type;		\
-	col->type = emitter_type_title;					\
+#define OP(counter, counter_type, human, derived, base_counter)                \
+	col = &col_##counter_type[k_##counter_type];                           \
+	++k_##counter_type;                                                    \
+	emitter_col_init(col, row);                                            \
+	col->justify = emitter_justify_right;                                  \
+	col->width = derived ? 8 : WIDTH_##counter_type;                       \
+	col->type = emitter_type_title;                                        \
 	col->str_val = human;
 	MUTEX_PROF_COUNTERS
 #undef OP
@@ -146,9 +150,9 @@ mutex_stats_init_cols(emitter_row_t *row, const char *table_name,
 static void
 mutex_stats_read_global(size_t mib[], size_t miblen, const char *name,
     emitter_col_t *col_name,
-    emitter_col_t col_uint64_t[mutex_prof_num_uint64_t_counters],
-    emitter_col_t col_uint32_t[mutex_prof_num_uint32_t_counters],
-    uint64_t uptime) {
+    emitter_col_t  col_uint64_t[mutex_prof_num_uint64_t_counters],
+    emitter_col_t  col_uint32_t[mutex_prof_num_uint32_t_counters],
+    uint64_t       uptime) {
 	CTL_LEAF_PREPARE(mib, miblen, name);
 	size_t miblen_name = miblen + 1;
 
@@ -157,18 +161,17 @@ mutex_stats_read_global(size_t mib[], size_t miblen, const char *name,
 	emitter_col_t *dst;
 #define EMITTER_TYPE_uint32_t emitter_type_uint32
 #define EMITTER_TYPE_uint64_t emitter_type_uint64
-#define OP(counter, counter_type, human, derived, base_counter)		\
-	dst = &col_##counter_type[mutex_counter_##counter];		\
-	dst->type = EMITTER_TYPE_##counter_type;			\
-	if (!derived) {							\
-		CTL_LEAF(mib, miblen_name, #counter,			\
-		    (counter_type *)&dst->bool_val, counter_type);	\
-	} else {							\
-		emitter_col_t *base =					\
-		    &col_##counter_type[mutex_counter_##base_counter];	\
-		dst->counter_type##_val =				\
-		    (counter_type)rate_per_second(			\
-		    base->counter_type##_val, uptime);			\
+#define OP(counter, counter_type, human, derived, base_counter)                \
+	dst = &col_##counter_type[mutex_counter_##counter];                    \
+	dst->type = EMITTER_TYPE_##counter_type;                               \
+	if (!derived) {                                                        \
+		CTL_LEAF(mib, miblen_name, #counter,                           \
+		    (counter_type *)&dst->bool_val, counter_type);             \
+	} else {                                                               \
+		emitter_col_t *base =                                          \
+		    &col_##counter_type[mutex_counter_##base_counter];         \
+		dst->counter_type##_val = (counter_type)rate_per_second(       \
+		    base->counter_type##_val, uptime);                         \
 	}
 	MUTEX_PROF_COUNTERS
 #undef OP
@@ -179,9 +182,9 @@ mutex_stats_read_global(size_t mib[], size_t miblen, const char *name,
 static void
 mutex_stats_read_arena(size_t mib[], size_t miblen, const char *name,
     emitter_col_t *col_name,
-    emitter_col_t col_uint64_t[mutex_prof_num_uint64_t_counters],
-    emitter_col_t col_uint32_t[mutex_prof_num_uint32_t_counters],
-    uint64_t uptime) {
+    emitter_col_t  col_uint64_t[mutex_prof_num_uint64_t_counters],
+    emitter_col_t  col_uint32_t[mutex_prof_num_uint32_t_counters],
+    uint64_t       uptime) {
 	CTL_LEAF_PREPARE(mib, miblen, name);
 	size_t miblen_name = miblen + 1;
 
@@ -190,18 +193,17 @@ mutex_stats_read_arena(size_t mib[], size_t miblen, const char *name,
 	emitter_col_t *dst;
 #define EMITTER_TYPE_uint32_t emitter_type_uint32
 #define EMITTER_TYPE_uint64_t emitter_type_uint64
-#define OP(counter, counter_type, human, derived, base_counter)		\
-	dst = &col_##counter_type[mutex_counter_##counter];		\
-	dst->type = EMITTER_TYPE_##counter_type;			\
-	if (!derived) {							\
-		CTL_LEAF(mib, miblen_name, #counter,			\
-		    (counter_type *)&dst->bool_val, counter_type);	\
-	} else {							\
-		emitter_col_t *base =					\
-		    &col_##counter_type[mutex_counter_##base_counter];	\
-		dst->counter_type##_val =				\
-		    (counter_type)rate_per_second(			\
-		    base->counter_type##_val, uptime);			\
+#define OP(counter, counter_type, human, derived, base_counter)                \
+	dst = &col_##counter_type[mutex_counter_##counter];                    \
+	dst->type = EMITTER_TYPE_##counter_type;                               \
+	if (!derived) {                                                        \
+		CTL_LEAF(mib, miblen_name, #counter,                           \
+		    (counter_type *)&dst->bool_val, counter_type);             \
+	} else {                                                               \
+		emitter_col_t *base =                                          \
+		    &col_##counter_type[mutex_counter_##base_counter];         \
+		dst->counter_type##_val = (counter_type)rate_per_second(       \
+		    base->counter_type##_val, uptime);                         \
 	}
 	MUTEX_PROF_COUNTERS
 #undef OP
@@ -213,7 +215,7 @@ static void
 mutex_stats_read_arena_bin(size_t mib[], size_t miblen,
     emitter_col_t col_uint64_t[mutex_prof_num_uint64_t_counters],
     emitter_col_t col_uint32_t[mutex_prof_num_uint32_t_counters],
-    uint64_t uptime) {
+    uint64_t      uptime) {
 	CTL_LEAF_PREPARE(mib, miblen, "mutex");
 	size_t miblen_mutex = miblen + 1;
 
@@ -221,18 +223,17 @@ mutex_stats_read_arena_bin(size_t mib[], size_t miblen,
 
 #define EMITTER_TYPE_uint32_t emitter_type_uint32
 #define EMITTER_TYPE_uint64_t emitter_type_uint64
-#define OP(counter, counter_type, human, derived, base_counter)		\
-	dst = &col_##counter_type[mutex_counter_##counter];		\
-	dst->type = EMITTER_TYPE_##counter_type;			\
-	if (!derived) {							\
-		CTL_LEAF(mib, miblen_mutex, #counter,			\
-		    (counter_type *)&dst->bool_val, counter_type);	\
-	} else {							\
-		emitter_col_t *base =					\
-		    &col_##counter_type[mutex_counter_##base_counter];	\
-		dst->counter_type##_val =				\
-		    (counter_type)rate_per_second(			\
-		    base->counter_type##_val, uptime);			\
+#define OP(counter, counter_type, human, derived, base_counter)                \
+	dst = &col_##counter_type[mutex_counter_##counter];                    \
+	dst->type = EMITTER_TYPE_##counter_type;                               \
+	if (!derived) {                                                        \
+		CTL_LEAF(mib, miblen_mutex, #counter,                          \
+		    (counter_type *)&dst->bool_val, counter_type);             \
+	} else {                                                               \
+		emitter_col_t *base =                                          \
+		    &col_##counter_type[mutex_counter_##base_counter];         \
+		dst->counter_type##_val = (counter_type)rate_per_second(       \
+		    base->counter_type##_val, uptime);                         \
 	}
 	MUTEX_PROF_COUNTERS
 #undef OP
@@ -256,12 +257,12 @@ mutex_stats_emit(emitter_t *emitter, emitter_row_t *row,
 
 #define EMITTER_TYPE_uint32_t emitter_type_uint32
 #define EMITTER_TYPE_uint64_t emitter_type_uint64
-#define OP(counter, type, human, derived, base_counter)		\
-	if (!derived) {                    \
-		col = &col_##type[k_##type];                        \
-		++k_##type;                            \
+#define OP(counter, type, human, derived, base_counter)                        \
+	if (!derived) {                                                        \
+		col = &col_##type[k_##type];                                   \
+		++k_##type;                                                    \
 		emitter_json_kv(emitter, #counter, EMITTER_TYPE_##type,        \
-		    (const void *)&col->bool_val); \
+		    (const void *)&col->bool_val);                             \
 	}
 	MUTEX_PROF_COUNTERS;
 #undef OP
@@ -269,44 +270,42 @@ mutex_stats_emit(emitter_t *emitter, emitter_row_t *row,
 #undef EMITTER_TYPE_uint64_t
 }
 
-#define COL_DECLARE(column_name)					\
-	emitter_col_t col_##column_name;
+#define COL_DECLARE(column_name) emitter_col_t col_##column_name;
 
-#define COL_INIT(row_name, column_name, left_or_right, col_width, etype)\
-	emitter_col_init(&col_##column_name, &row_name);		\
-	col_##column_name.justify = emitter_justify_##left_or_right;	\
-	col_##column_name.width = col_width;				\
+#define COL_INIT(row_name, column_name, left_or_right, col_width, etype)       \
+	emitter_col_init(&col_##column_name, &row_name);                       \
+	col_##column_name.justify = emitter_justify_##left_or_right;           \
+	col_##column_name.width = col_width;                                   \
 	col_##column_name.type = emitter_type_##etype;
 
-#define COL(row_name, column_name, left_or_right, col_width, etype)	\
-	COL_DECLARE(column_name);					\
+#define COL(row_name, column_name, left_or_right, col_width, etype)            \
+	COL_DECLARE(column_name);                                              \
 	COL_INIT(row_name, column_name, left_or_right, col_width, etype)
 
-#define COL_HDR_DECLARE(column_name)					\
-	COL_DECLARE(column_name);					\
+#define COL_HDR_DECLARE(column_name)                                           \
+	COL_DECLARE(column_name);                                              \
 	emitter_col_t header_##column_name;
 
-#define COL_HDR_INIT(row_name, column_name, human, left_or_right,	\
-	col_width, etype)						\
-	COL_INIT(row_name, column_name, left_or_right, col_width, etype)\
-	emitter_col_init(&header_##column_name, &header_##row_name);	\
-	header_##column_name.justify = emitter_justify_##left_or_right;	\
-	header_##column_name.width = col_width;				\
-	header_##column_name.type = emitter_type_title;			\
+#define COL_HDR_INIT(                                                          \
+    row_name, column_name, human, left_or_right, col_width, etype)             \
+	COL_INIT(row_name, column_name, left_or_right, col_width, etype)       \
+	emitter_col_init(&header_##column_name, &header_##row_name);           \
+	header_##column_name.justify = emitter_justify_##left_or_right;        \
+	header_##column_name.width = col_width;                                \
+	header_##column_name.type = emitter_type_title;                        \
 	header_##column_name.str_val = human ? human : #column_name;
 
-#define COL_HDR(row_name, column_name, human, left_or_right, col_width,	\
-    etype)								\
-	COL_HDR_DECLARE(column_name)					\
-	COL_HDR_INIT(row_name, column_name, human, left_or_right,	\
-	    col_width, etype)
+#define COL_HDR(row_name, column_name, human, left_or_right, col_width, etype) \
+	COL_HDR_DECLARE(column_name)                                           \
+	COL_HDR_INIT(                                                          \
+	    row_name, column_name, human, left_or_right, col_width, etype)
 
 JEMALLOC_COLD
 static void
-stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i,
-    uint64_t uptime) {
-	size_t page;
-	bool in_gap, in_gap_prev;
+stats_arena_bins_print(
+    emitter_t *emitter, bool mutex, unsigned i, uint64_t uptime) {
+	size_t   page;
+	bool     in_gap, in_gap_prev;
 	unsigned nbins, j;
 
 	CTL_GET("arenas.page", &page, size_t);
@@ -378,17 +377,17 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i,
 	emitter_col_t header_mutex32[mutex_prof_num_uint32_t_counters];
 
 	if (mutex) {
-		mutex_stats_init_cols(&row, NULL, NULL, col_mutex64,
-		    col_mutex32);
-		mutex_stats_init_cols(&header_row, NULL, NULL, header_mutex64,
-		    header_mutex32);
+		mutex_stats_init_cols(
+		    &row, NULL, NULL, col_mutex64, col_mutex32);
+		mutex_stats_init_cols(
+		    &header_row, NULL, NULL, header_mutex64, header_mutex32);
 	}
 
 	/*
 	 * We print a "bins:" header as part of the table row; we need to adjust
 	 * the header size column to compensate.
 	 */
-	header_size.width -=5;
+	header_size.width -= 5;
 	emitter_table_printf(emitter, "bins:");
 	emitter_table_row(emitter, &header_row);
 	emitter_json_array_kv_begin(emitter, "bins");
@@ -408,9 +407,9 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i,
 
 	for (j = 0, in_gap = false; j < nbins; j++) {
 		uint64_t nslabs;
-		size_t reg_size, slab_size, curregs;
-		size_t curslabs;
-		size_t nonfull_slabs;
+		size_t   reg_size, slab_size, curregs;
+		size_t   curslabs;
+		size_t   nonfull_slabs;
 		uint32_t nregs, nshards;
 		uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes;
 		uint64_t nreslabs;
@@ -440,8 +439,8 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i,
 		}
 
 		if (in_gap_prev && !in_gap) {
-			emitter_table_printf(emitter,
-			    "                     ---\n");
+			emitter_table_printf(
+			    emitter, "                     ---\n");
 		}
 
 		if (in_gap && !emitter_outputs_json(emitter)) {
@@ -455,8 +454,8 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i,
 		CTL_LEAF(stats_arenas_mib, 5, "nmalloc", &nmalloc, uint64_t);
 		CTL_LEAF(stats_arenas_mib, 5, "ndalloc", &ndalloc, uint64_t);
 		CTL_LEAF(stats_arenas_mib, 5, "curregs", &curregs, size_t);
-		CTL_LEAF(stats_arenas_mib, 5, "nrequests", &nrequests,
-		    uint64_t);
+		CTL_LEAF(
+		    stats_arenas_mib, 5, "nrequests", &nrequests, uint64_t);
 		CTL_LEAF(stats_arenas_mib, 5, "nfills", &nfills, uint64_t);
 		CTL_LEAF(stats_arenas_mib, 5, "nflushes", &nflushes, uint64_t);
 		CTL_LEAF(stats_arenas_mib, 5, "nreslabs", &nreslabs, uint64_t);
@@ -464,12 +463,12 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i,
 		CTL_LEAF(stats_arenas_mib, 5, "nonfull_slabs", &nonfull_slabs,
 		    size_t);
 
-		CTL_LEAF(stats_arenas_mib, 5, "batch_pops", &batch_pops,
-		    uint64_t);
+		CTL_LEAF(
+		    stats_arenas_mib, 5, "batch_pops", &batch_pops, uint64_t);
 		CTL_LEAF(stats_arenas_mib, 5, "batch_failed_pushes",
 		    &batch_failed_pushes, uint64_t);
-		CTL_LEAF(stats_arenas_mib, 5, "batch_pushes",
-		    &batch_pushes, uint64_t);
+		CTL_LEAF(stats_arenas_mib, 5, "batch_pushes", &batch_pushes,
+		    uint64_t);
 		CTL_LEAF(stats_arenas_mib, 5, "batch_pushed_elems",
 		    &batch_pushed_elems, uint64_t);
 
@@ -479,14 +478,14 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i,
 		}
 
 		emitter_json_object_begin(emitter);
-		emitter_json_kv(emitter, "nmalloc", emitter_type_uint64,
-		    &nmalloc);
-		emitter_json_kv(emitter, "ndalloc", emitter_type_uint64,
-		    &ndalloc);
-		emitter_json_kv(emitter, "curregs", emitter_type_size,
-		    &curregs);
-		emitter_json_kv(emitter, "nrequests", emitter_type_uint64,
-		    &nrequests);
+		emitter_json_kv(
+		    emitter, "nmalloc", emitter_type_uint64, &nmalloc);
+		emitter_json_kv(
+		    emitter, "ndalloc", emitter_type_uint64, &ndalloc);
+		emitter_json_kv(
+		    emitter, "curregs", emitter_type_size, &curregs);
+		emitter_json_kv(
+		    emitter, "nrequests", emitter_type_uint64, &nrequests);
 		if (prof_stats_on) {
 			emitter_json_kv(emitter, "prof_live_requested",
 			    emitter_type_uint64, &prof_live.req_sum);
@@ -497,36 +496,36 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i,
 			emitter_json_kv(emitter, "prof_accum_count",
 			    emitter_type_uint64, &prof_accum.count);
 		}
-		emitter_json_kv(emitter, "nfills", emitter_type_uint64,
-		    &nfills);
-		emitter_json_kv(emitter, "nflushes", emitter_type_uint64,
-		    &nflushes);
-		emitter_json_kv(emitter, "nreslabs", emitter_type_uint64,
-		    &nreslabs);
-		emitter_json_kv(emitter, "curslabs", emitter_type_size,
-		    &curslabs);
+		emitter_json_kv(
+		    emitter, "nfills", emitter_type_uint64, &nfills);
+		emitter_json_kv(
+		    emitter, "nflushes", emitter_type_uint64, &nflushes);
+		emitter_json_kv(
+		    emitter, "nreslabs", emitter_type_uint64, &nreslabs);
+		emitter_json_kv(
+		    emitter, "curslabs", emitter_type_size, &curslabs);
 		emitter_json_kv(emitter, "nonfull_slabs", emitter_type_size,
 		    &nonfull_slabs);
-		emitter_json_kv(emitter, "batch_pops",
-		    emitter_type_uint64, &batch_pops);
+		emitter_json_kv(
+		    emitter, "batch_pops", emitter_type_uint64, &batch_pops);
 		emitter_json_kv(emitter, "batch_failed_pushes",
 		    emitter_type_uint64, &batch_failed_pushes);
-		emitter_json_kv(emitter, "batch_pushes",
-		    emitter_type_uint64, &batch_pushes);
+		emitter_json_kv(emitter, "batch_pushes", emitter_type_uint64,
+		    &batch_pushes);
 		emitter_json_kv(emitter, "batch_pushed_elems",
 		    emitter_type_uint64, &batch_pushed_elems);
 		if (mutex) {
 			emitter_json_object_kv_begin(emitter, "mutex");
-			mutex_stats_emit(emitter, NULL, col_mutex64,
-			    col_mutex32);
+			mutex_stats_emit(
+			    emitter, NULL, col_mutex64, col_mutex32);
 			emitter_json_object_end(emitter);
 		}
 		emitter_json_object_end(emitter);
 
 		size_t availregs = nregs * curslabs;
-		char util[6];
-		if (get_rate_str((uint64_t)curregs, (uint64_t)availregs, util))
-		{
+		char   util[6];
+		if (get_rate_str(
+		        (uint64_t)curregs, (uint64_t)availregs, util)) {
 			if (availregs == 0) {
 				malloc_snprintf(util, sizeof(util), "1");
 			} else if (curregs > availregs) {
@@ -550,7 +549,8 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i,
 		col_ndalloc.uint64_val = ndalloc;
 		col_ndalloc_ps.uint64_val = rate_per_second(ndalloc, uptime);
 		col_nrequests.uint64_val = nrequests;
-		col_nrequests_ps.uint64_val = rate_per_second(nrequests, uptime);
+		col_nrequests_ps.uint64_val = rate_per_second(
+		    nrequests, uptime);
 		if (prof_stats_on) {
 			col_prof_live_requested.uint64_val = prof_live.req_sum;
 			col_prof_live_count.uint64_val = prof_live.count;
@@ -574,19 +574,17 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i,
 		col_nreslabs_ps.uint64_val = rate_per_second(nreslabs, uptime);
 
 		col_pops.uint64_val = batch_pops;
-		col_pops_ps.uint64_val
-		    = rate_per_second(batch_pops, uptime);
+		col_pops_ps.uint64_val = rate_per_second(batch_pops, uptime);
 
 		col_failed_push.uint64_val = batch_failed_pushes;
-		col_failed_push_ps.uint64_val
-		    = rate_per_second(batch_failed_pushes, uptime);
+		col_failed_push_ps.uint64_val = rate_per_second(
+		    batch_failed_pushes, uptime);
 		col_push.uint64_val = batch_pushes;
-		col_push_ps.uint64_val
-		    = rate_per_second(batch_pushes, uptime);
+		col_push_ps.uint64_val = rate_per_second(batch_pushes, uptime);
 
 		col_push_elem.uint64_val = batch_pushed_elems;
-		col_push_elem_ps.uint64_val
-		    = rate_per_second(batch_pushed_elems, uptime);
+		col_push_elem_ps.uint64_val = rate_per_second(
+		    batch_pushed_elems, uptime);
 
 		/*
 		 * Note that mutex columns were initialized above, if mutex ==
@@ -606,7 +604,7 @@ JEMALLOC_COLD
 static void
 stats_arena_lextents_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	unsigned nbins, nlextents, j;
-	bool in_gap, in_gap_prev;
+	bool     in_gap, in_gap_prev;
 
 	CTL_GET("arenas.nbins", &nbins, unsigned);
 	CTL_GET("arenas.nlextents", &nlextents, unsigned);
@@ -660,8 +658,8 @@ stats_arena_lextents_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	}
 
 	for (j = 0, in_gap = false; j < nlextents; j++) {
-		uint64_t nmalloc, ndalloc, nrequests;
-		size_t lextent_size, curlextents;
+		uint64_t     nmalloc, ndalloc, nrequests;
+		size_t       lextent_size, curlextents;
 		prof_stats_t prof_live;
 		prof_stats_t prof_accum;
 
@@ -670,20 +668,20 @@ stats_arena_lextents_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 
 		CTL_LEAF(stats_arenas_mib, 5, "nmalloc", &nmalloc, uint64_t);
 		CTL_LEAF(stats_arenas_mib, 5, "ndalloc", &ndalloc, uint64_t);
-		CTL_LEAF(stats_arenas_mib, 5, "nrequests", &nrequests,
-		    uint64_t);
+		CTL_LEAF(
+		    stats_arenas_mib, 5, "nrequests", &nrequests, uint64_t);
 
 		in_gap_prev = in_gap;
 		in_gap = (nrequests == 0);
 
 		if (in_gap_prev && !in_gap) {
-			emitter_table_printf(emitter,
-			    "                     ---\n");
+			emitter_table_printf(
+			    emitter, "                     ---\n");
 		}
 
 		CTL_LEAF(arenas_lextent_mib, 3, "size", &lextent_size, size_t);
-		CTL_LEAF(stats_arenas_mib, 5, "curlextents", &curlextents,
-		    size_t);
+		CTL_LEAF(
+		    stats_arenas_mib, 5, "curlextents", &curlextents, size_t);
 
 		if (prof_stats_on) {
 			prof_stats_mib[3] = j;
@@ -704,8 +702,8 @@ stats_arena_lextents_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 			emitter_json_kv(emitter, "prof_accum_count",
 			    emitter_type_uint64, &prof_accum.count);
 		}
-		emitter_json_kv(emitter, "curlextents", emitter_type_size,
-		    &curlextents);
+		emitter_json_kv(
+		    emitter, "curlextents", emitter_type_size, &curlextents);
 		emitter_json_object_end(emitter);
 
 		col_size.size_val = lextent_size;
@@ -716,7 +714,8 @@ stats_arena_lextents_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 		col_ndalloc.uint64_val = ndalloc;
 		col_ndalloc_ps.uint64_val = rate_per_second(ndalloc, uptime);
 		col_nrequests.uint64_val = nrequests;
-		col_nrequests_ps.uint64_val = rate_per_second(nrequests, uptime);
+		col_nrequests_ps.uint64_val = rate_per_second(
+		    nrequests, uptime);
 		if (prof_stats_on) {
 			col_prof_live_requested.uint64_val = prof_live.req_sum;
 			col_prof_live_count.uint64_val = prof_live.count;
@@ -739,8 +738,8 @@ stats_arena_lextents_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 JEMALLOC_COLD
 static void
 stats_arena_extents_print(emitter_t *emitter, unsigned i) {
-	unsigned j;
-	bool in_gap, in_gap_prev;
+	unsigned      j;
+	bool          in_gap, in_gap_prev;
 	emitter_row_t header_row;
 	emitter_row_init(&header_row);
 	emitter_row_t row;
@@ -777,12 +776,12 @@ stats_arena_extents_print(emitter_t *emitter, unsigned i) {
 		CTL_LEAF(stats_arenas_mib, 5, "ndirty", &ndirty, size_t);
 		CTL_LEAF(stats_arenas_mib, 5, "nmuzzy", &nmuzzy, size_t);
 		CTL_LEAF(stats_arenas_mib, 5, "nretained", &nretained, size_t);
-		CTL_LEAF(stats_arenas_mib, 5, "dirty_bytes", &dirty_bytes,
+		CTL_LEAF(
+		    stats_arenas_mib, 5, "dirty_bytes", &dirty_bytes, size_t);
+		CTL_LEAF(
+		    stats_arenas_mib, 5, "muzzy_bytes", &muzzy_bytes, size_t);
+		CTL_LEAF(stats_arenas_mib, 5, "retained_bytes", &retained_bytes,
 		    size_t);
-		CTL_LEAF(stats_arenas_mib, 5, "muzzy_bytes", &muzzy_bytes,
-		    size_t);
-		CTL_LEAF(stats_arenas_mib, 5, "retained_bytes",
-		    &retained_bytes, size_t);
 
 		total = ndirty + nmuzzy + nretained;
 		total_bytes = dirty_bytes + muzzy_bytes + retained_bytes;
@@ -791,20 +790,20 @@ stats_arena_extents_print(emitter_t *emitter, unsigned i) {
 		in_gap = (total == 0);
 
 		if (in_gap_prev && !in_gap) {
-			emitter_table_printf(emitter,
-			    "                     ---\n");
+			emitter_table_printf(
+			    emitter, "                     ---\n");
 		}
 
 		emitter_json_object_begin(emitter);
 		emitter_json_kv(emitter, "ndirty", emitter_type_size, &ndirty);
 		emitter_json_kv(emitter, "nmuzzy", emitter_type_size, &nmuzzy);
-		emitter_json_kv(emitter, "nretained", emitter_type_size,
-		    &nretained);
+		emitter_json_kv(
+		    emitter, "nretained", emitter_type_size, &nretained);
 
-		emitter_json_kv(emitter, "dirty_bytes", emitter_type_size,
-		    &dirty_bytes);
-		emitter_json_kv(emitter, "muzzy_bytes", emitter_type_size,
-		    &muzzy_bytes);
+		emitter_json_kv(
+		    emitter, "dirty_bytes", emitter_type_size, &dirty_bytes);
+		emitter_json_kv(
+		    emitter, "muzzy_bytes", emitter_type_size, &muzzy_bytes);
 		emitter_json_kv(emitter, "retained_bytes", emitter_type_size,
 		    &retained_bytes);
 		emitter_json_object_end(emitter);
@@ -839,8 +838,8 @@ stats_arena_hpa_shard_sec_print(emitter_t *emitter, unsigned i) {
 }
 
 static void
-stats_arena_hpa_shard_counters_print(emitter_t *emitter, unsigned i,
-    uint64_t uptime) {
+stats_arena_hpa_shard_counters_print(
+    emitter_t *emitter, unsigned i, uint64_t uptime) {
 	size_t npageslabs;
 	size_t nactive;
 	size_t ndirty;
@@ -860,39 +859,36 @@ stats_arena_hpa_shard_counters_print(emitter_t *emitter, unsigned i,
 	uint64_t nhugify_failures;
 	uint64_t ndehugifies;
 
-	CTL_M2_GET("stats.arenas.0.hpa_shard.npageslabs",
-	    i, &npageslabs, size_t);
-	CTL_M2_GET("stats.arenas.0.hpa_shard.nactive",
-	    i, &nactive, size_t);
-	CTL_M2_GET("stats.arenas.0.hpa_shard.ndirty",
-	    i, &ndirty, size_t);
+	CTL_M2_GET(
+	    "stats.arenas.0.hpa_shard.npageslabs", i, &npageslabs, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.nactive", i, &nactive, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.ndirty", i, &ndirty, size_t);
 
-	CTL_M2_GET("stats.arenas.0.hpa_shard.slabs.npageslabs_nonhuge",
-	    i, &npageslabs_nonhuge, size_t);
-	CTL_M2_GET("stats.arenas.0.hpa_shard.slabs.nactive_nonhuge",
-	    i, &nactive_nonhuge, size_t);
-	CTL_M2_GET("stats.arenas.0.hpa_shard.slabs.ndirty_nonhuge",
-	    i, &ndirty_nonhuge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.slabs.npageslabs_nonhuge", i,
+	    &npageslabs_nonhuge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.slabs.nactive_nonhuge", i,
+	    &nactive_nonhuge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.slabs.ndirty_nonhuge", i,
+	    &ndirty_nonhuge, size_t);
 	nretained_nonhuge = npageslabs_nonhuge * HUGEPAGE_PAGES
 	    - nactive_nonhuge - ndirty_nonhuge;
 
-	CTL_M2_GET("stats.arenas.0.hpa_shard.slabs.npageslabs_huge",
-	    i, &npageslabs_huge, size_t);
-	CTL_M2_GET("stats.arenas.0.hpa_shard.slabs.nactive_huge",
-	    i, &nactive_huge, size_t);
-	CTL_M2_GET("stats.arenas.0.hpa_shard.slabs.ndirty_huge",
-	    i, &ndirty_huge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.slabs.npageslabs_huge", i,
+	    &npageslabs_huge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.slabs.nactive_huge", i,
+	    &nactive_huge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.slabs.ndirty_huge", i,
+	    &ndirty_huge, size_t);
 
-	CTL_M2_GET("stats.arenas.0.hpa_shard.npurge_passes",
-	    i, &npurge_passes, uint64_t);
-	CTL_M2_GET("stats.arenas.0.hpa_shard.npurges",
-	    i, &npurges, uint64_t);
-	CTL_M2_GET("stats.arenas.0.hpa_shard.nhugifies",
-	    i, &nhugifies, uint64_t);
-	CTL_M2_GET("stats.arenas.0.hpa_shard.nhugify_failures",
-	    i, &nhugify_failures, uint64_t);
-	CTL_M2_GET("stats.arenas.0.hpa_shard.ndehugifies",
-	    i, &ndehugifies, uint64_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.npurge_passes", i, &npurge_passes,
+	    uint64_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.npurges", i, &npurges, uint64_t);
+	CTL_M2_GET(
+	    "stats.arenas.0.hpa_shard.nhugifies", i, &nhugifies, uint64_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.nhugify_failures", i,
+	    &nhugify_failures, uint64_t);
+	CTL_M2_GET(
+	    "stats.arenas.0.hpa_shard.ndehugifies", i, &ndehugifies, uint64_t);
 
 	emitter_table_printf(emitter,
 	    "HPA shard stats:\n"
@@ -900,56 +896,55 @@ stats_arena_hpa_shard_counters_print(emitter_t *emitter, unsigned i,
 	    "  Active pages: %zu (%zu huge, %zu nonhuge)\n"
 	    "  Dirty pages: %zu (%zu huge, %zu nonhuge)\n"
 	    "  Retained pages: %zu\n"
-	    "  Purge passes: %" FMTu64 " (%" FMTu64 " / sec)\n"
-	    "  Purges: %" FMTu64 " (%" FMTu64 " / sec)\n"
-	    "  Hugeifies: %" FMTu64 " (%" FMTu64 " / sec)\n"
-	    "  Hugify failures: %" FMTu64 " (%" FMTu64 " / sec)\n"
-	    "  Dehugifies: %" FMTu64 " (%" FMTu64 " / sec)\n"
+	    "  Purge passes: %" FMTu64 " (%" FMTu64
+	    " / sec)\n"
+	    "  Purges: %" FMTu64 " (%" FMTu64
+	    " / sec)\n"
+	    "  Hugeifies: %" FMTu64 " (%" FMTu64
+	    " / sec)\n"
+	    "  Hugify failures: %" FMTu64 " (%" FMTu64
+	    " / sec)\n"
+	    "  Dehugifies: %" FMTu64 " (%" FMTu64
+	    " / sec)\n"
 	    "\n",
-	    npageslabs, npageslabs_huge, npageslabs_nonhuge,
-	    nactive, nactive_huge, nactive_nonhuge,
-	    ndirty, ndirty_huge, ndirty_nonhuge,
-	    nretained_nonhuge,
-	    npurge_passes, rate_per_second(npurge_passes, uptime),
-	    npurges, rate_per_second(npurges, uptime),
-	    nhugifies, rate_per_second(nhugifies, uptime),
-	    nhugify_failures, rate_per_second(nhugify_failures, uptime),
-	    ndehugifies, rate_per_second(ndehugifies, uptime));
+	    npageslabs, npageslabs_huge, npageslabs_nonhuge, nactive,
+	    nactive_huge, nactive_nonhuge, ndirty, ndirty_huge, ndirty_nonhuge,
+	    nretained_nonhuge, npurge_passes,
+	    rate_per_second(npurge_passes, uptime), npurges,
+	    rate_per_second(npurges, uptime), nhugifies,
+	    rate_per_second(nhugifies, uptime), nhugify_failures,
+	    rate_per_second(nhugify_failures, uptime), ndehugifies,
+	    rate_per_second(ndehugifies, uptime));
 
-	emitter_json_kv(emitter, "npageslabs", emitter_type_size,
-	    &npageslabs);
-	emitter_json_kv(emitter, "nactive", emitter_type_size,
-	    &nactive);
-	emitter_json_kv(emitter, "ndirty", emitter_type_size,
-	    &ndirty);
+	emitter_json_kv(emitter, "npageslabs", emitter_type_size, &npageslabs);
+	emitter_json_kv(emitter, "nactive", emitter_type_size, &nactive);
+	emitter_json_kv(emitter, "ndirty", emitter_type_size, &ndirty);
 
-	emitter_json_kv(emitter, "npurge_passes", emitter_type_uint64,
-	    &npurge_passes);
-	emitter_json_kv(emitter, "npurges", emitter_type_uint64,
-	    &npurges);
-	emitter_json_kv(emitter, "nhugifies", emitter_type_uint64,
-	    &nhugifies);
+	emitter_json_kv(
+	    emitter, "npurge_passes", emitter_type_uint64, &npurge_passes);
+	emitter_json_kv(emitter, "npurges", emitter_type_uint64, &npurges);
+	emitter_json_kv(emitter, "nhugifies", emitter_type_uint64, &nhugifies);
 	emitter_json_kv(emitter, "nhugify_failures", emitter_type_uint64,
 	    &nhugify_failures);
-	emitter_json_kv(emitter, "ndehugifies", emitter_type_uint64,
-	    &ndehugifies);
+	emitter_json_kv(
+	    emitter, "ndehugifies", emitter_type_uint64, &ndehugifies);
 
 	emitter_json_object_kv_begin(emitter, "slabs");
 	emitter_json_kv(emitter, "npageslabs_nonhuge", emitter_type_size,
 	    &npageslabs_nonhuge);
-	emitter_json_kv(emitter, "nactive_nonhuge", emitter_type_size,
-	    &nactive_nonhuge);
-	emitter_json_kv(emitter, "ndirty_nonhuge", emitter_type_size,
-	    &ndirty_nonhuge);
+	emitter_json_kv(
+	    emitter, "nactive_nonhuge", emitter_type_size, &nactive_nonhuge);
+	emitter_json_kv(
+	    emitter, "ndirty_nonhuge", emitter_type_size, &ndirty_nonhuge);
 	emitter_json_kv(emitter, "nretained_nonhuge", emitter_type_size,
 	    &nretained_nonhuge);
 
-	emitter_json_kv(emitter, "npageslabs_huge", emitter_type_size,
-	    &npageslabs_huge);
-	emitter_json_kv(emitter, "nactive_huge", emitter_type_size,
-	    &nactive_huge);
-	emitter_json_kv(emitter, "ndirty_huge", emitter_type_size,
-	    &ndirty_huge);
+	emitter_json_kv(
+	    emitter, "npageslabs_huge", emitter_type_size, &npageslabs_huge);
+	emitter_json_kv(
+	    emitter, "nactive_huge", emitter_type_size, &nactive_huge);
+	emitter_json_kv(
+	    emitter, "ndirty_huge", emitter_type_size, &ndirty_huge);
 	emitter_json_object_end(emitter); /* End "slabs" */
 }
 
@@ -970,19 +965,19 @@ stats_arena_hpa_shard_slabs_print(emitter_t *emitter, unsigned i) {
 	size_t nretained_nonhuge;
 
 	/* Full slab stats. */
-	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.npageslabs_huge",
-	    i, &npageslabs_huge, size_t);
-	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.nactive_huge",
-	    i, &nactive_huge, size_t);
-	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.ndirty_huge",
-	    i, &ndirty_huge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.npageslabs_huge", i,
+	    &npageslabs_huge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.nactive_huge", i,
+	    &nactive_huge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.ndirty_huge", i,
+	    &ndirty_huge, size_t);
 
-	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.npageslabs_nonhuge",
-	    i, &npageslabs_nonhuge, size_t);
-	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.nactive_nonhuge",
-	    i, &nactive_nonhuge, size_t);
-	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.ndirty_nonhuge",
-	    i, &ndirty_nonhuge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.npageslabs_nonhuge", i,
+	    &npageslabs_nonhuge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.nactive_nonhuge", i,
+	    &nactive_nonhuge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.ndirty_nonhuge", i,
+	    &ndirty_nonhuge, size_t);
 	nretained_nonhuge = npageslabs_nonhuge * HUGEPAGE_PAGES
 	    - nactive_nonhuge - ndirty_nonhuge;
 
@@ -992,40 +987,38 @@ stats_arena_hpa_shard_slabs_print(emitter_t *emitter, unsigned i) {
 	    "      nactive: %zu huge, %zu nonhuge \n"
 	    "      ndirty: %zu huge, %zu nonhuge \n"
 	    "      nretained: 0 huge, %zu nonhuge \n",
-	    npageslabs_huge, npageslabs_nonhuge,
-	    nactive_huge, nactive_nonhuge,
-	    ndirty_huge, ndirty_nonhuge,
-	    nretained_nonhuge);
+	    npageslabs_huge, npageslabs_nonhuge, nactive_huge, nactive_nonhuge,
+	    ndirty_huge, ndirty_nonhuge, nretained_nonhuge);
 
 	emitter_json_object_kv_begin(emitter, "full_slabs");
-	emitter_json_kv(emitter, "npageslabs_huge", emitter_type_size,
-	    &npageslabs_huge);
-	emitter_json_kv(emitter, "nactive_huge", emitter_type_size,
-	    &nactive_huge);
-	emitter_json_kv(emitter, "nactive_huge", emitter_type_size,
-	    &nactive_huge);
+	emitter_json_kv(
+	    emitter, "npageslabs_huge", emitter_type_size, &npageslabs_huge);
+	emitter_json_kv(
+	    emitter, "nactive_huge", emitter_type_size, &nactive_huge);
+	emitter_json_kv(
+	    emitter, "nactive_huge", emitter_type_size, &nactive_huge);
 	emitter_json_kv(emitter, "npageslabs_nonhuge", emitter_type_size,
 	    &npageslabs_nonhuge);
-	emitter_json_kv(emitter, "nactive_nonhuge", emitter_type_size,
-	    &nactive_nonhuge);
-	emitter_json_kv(emitter, "ndirty_nonhuge", emitter_type_size,
-	    &ndirty_nonhuge);
+	emitter_json_kv(
+	    emitter, "nactive_nonhuge", emitter_type_size, &nactive_nonhuge);
+	emitter_json_kv(
+	    emitter, "ndirty_nonhuge", emitter_type_size, &ndirty_nonhuge);
 	emitter_json_object_end(emitter); /* End "full_slabs" */
 
 	/* Next, empty slab stats. */
-	CTL_M2_GET("stats.arenas.0.hpa_shard.empty_slabs.npageslabs_huge",
-	    i, &npageslabs_huge, size_t);
-	CTL_M2_GET("stats.arenas.0.hpa_shard.empty_slabs.nactive_huge",
-	    i, &nactive_huge, size_t);
-	CTL_M2_GET("stats.arenas.0.hpa_shard.empty_slabs.ndirty_huge",
-	    i, &ndirty_huge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.empty_slabs.npageslabs_huge", i,
+	    &npageslabs_huge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.empty_slabs.nactive_huge", i,
+	    &nactive_huge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.empty_slabs.ndirty_huge", i,
+	    &ndirty_huge, size_t);
 
-	CTL_M2_GET("stats.arenas.0.hpa_shard.empty_slabs.npageslabs_nonhuge",
-	    i, &npageslabs_nonhuge, size_t);
-	CTL_M2_GET("stats.arenas.0.hpa_shard.empty_slabs.nactive_nonhuge",
-	    i, &nactive_nonhuge, size_t);
-	CTL_M2_GET("stats.arenas.0.hpa_shard.empty_slabs.ndirty_nonhuge",
-	    i, &ndirty_nonhuge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.empty_slabs.npageslabs_nonhuge", i,
+	    &npageslabs_nonhuge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.empty_slabs.nactive_nonhuge", i,
+	    &nactive_nonhuge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.empty_slabs.ndirty_nonhuge", i,
+	    &ndirty_nonhuge, size_t);
 	nretained_nonhuge = npageslabs_nonhuge * HUGEPAGE_PAGES
 	    - nactive_nonhuge - ndirty_nonhuge;
 
@@ -1035,24 +1028,22 @@ stats_arena_hpa_shard_slabs_print(emitter_t *emitter, unsigned i) {
 	    "      nactive: %zu huge, %zu nonhuge \n"
 	    "      ndirty: %zu huge, %zu nonhuge \n"
 	    "      nretained: 0 huge, %zu nonhuge \n",
-	    npageslabs_huge, npageslabs_nonhuge,
-	    nactive_huge, nactive_nonhuge,
-	    ndirty_huge, ndirty_nonhuge,
-	    nretained_nonhuge);
+	    npageslabs_huge, npageslabs_nonhuge, nactive_huge, nactive_nonhuge,
+	    ndirty_huge, ndirty_nonhuge, nretained_nonhuge);
 
 	emitter_json_object_kv_begin(emitter, "empty_slabs");
-	emitter_json_kv(emitter, "npageslabs_huge", emitter_type_size,
-	    &npageslabs_huge);
-	emitter_json_kv(emitter, "nactive_huge", emitter_type_size,
-	    &nactive_huge);
-	emitter_json_kv(emitter, "nactive_huge", emitter_type_size,
-	    &nactive_huge);
+	emitter_json_kv(
+	    emitter, "npageslabs_huge", emitter_type_size, &npageslabs_huge);
+	emitter_json_kv(
+	    emitter, "nactive_huge", emitter_type_size, &nactive_huge);
+	emitter_json_kv(
+	    emitter, "nactive_huge", emitter_type_size, &nactive_huge);
 	emitter_json_kv(emitter, "npageslabs_nonhuge", emitter_type_size,
 	    &npageslabs_nonhuge);
-	emitter_json_kv(emitter, "nactive_nonhuge", emitter_type_size,
-	    &nactive_nonhuge);
-	emitter_json_kv(emitter, "ndirty_nonhuge", emitter_type_size,
-	    &ndirty_nonhuge);
+	emitter_json_kv(
+	    emitter, "nactive_nonhuge", emitter_type_size, &nactive_nonhuge);
+	emitter_json_kv(
+	    emitter, "ndirty_nonhuge", emitter_type_size, &ndirty_nonhuge);
 	emitter_json_object_end(emitter); /* End "empty_slabs" */
 
 	/* Last, nonfull slab stats. */
@@ -1080,25 +1071,25 @@ stats_arena_hpa_shard_slabs_print(emitter_t *emitter, unsigned i) {
 
 		CTL_LEAF(stats_arenas_mib, 6, "npageslabs_huge",
 		    &npageslabs_huge, size_t);
-		CTL_LEAF(stats_arenas_mib, 6, "nactive_huge",
-		    &nactive_huge, size_t);
-		CTL_LEAF(stats_arenas_mib, 6, "ndirty_huge",
-		    &ndirty_huge, size_t);
+		CTL_LEAF(
+		    stats_arenas_mib, 6, "nactive_huge", &nactive_huge, size_t);
+		CTL_LEAF(
+		    stats_arenas_mib, 6, "ndirty_huge", &ndirty_huge, size_t);
 
 		CTL_LEAF(stats_arenas_mib, 6, "npageslabs_nonhuge",
 		    &npageslabs_nonhuge, size_t);
 		CTL_LEAF(stats_arenas_mib, 6, "nactive_nonhuge",
 		    &nactive_nonhuge, size_t);
-		CTL_LEAF(stats_arenas_mib, 6, "ndirty_nonhuge",
-		    &ndirty_nonhuge, size_t);
+		CTL_LEAF(stats_arenas_mib, 6, "ndirty_nonhuge", &ndirty_nonhuge,
+		    size_t);
 		nretained_nonhuge = npageslabs_nonhuge * HUGEPAGE_PAGES
 		    - nactive_nonhuge - ndirty_nonhuge;
 
 		bool in_gap_prev = in_gap;
 		in_gap = (npageslabs_huge == 0 && npageslabs_nonhuge == 0);
 		if (in_gap_prev && !in_gap) {
-			emitter_table_printf(emitter,
-			    "                     ---\n");
+			emitter_table_printf(
+			    emitter, "                     ---\n");
 		}
 
 		col_size.size_val = sz_pind2sz(j);
@@ -1117,12 +1108,12 @@ stats_arena_hpa_shard_slabs_print(emitter_t *emitter, unsigned i) {
 		emitter_json_object_begin(emitter);
 		emitter_json_kv(emitter, "npageslabs_huge", emitter_type_size,
 		    &npageslabs_huge);
-		emitter_json_kv(emitter, "nactive_huge", emitter_type_size,
-		    &nactive_huge);
-		emitter_json_kv(emitter, "ndirty_huge", emitter_type_size,
-		    &ndirty_huge);
-		emitter_json_kv(emitter, "npageslabs_nonhuge", emitter_type_size,
-		    &npageslabs_nonhuge);
+		emitter_json_kv(
+		    emitter, "nactive_huge", emitter_type_size, &nactive_huge);
+		emitter_json_kv(
+		    emitter, "ndirty_huge", emitter_type_size, &ndirty_huge);
+		emitter_json_kv(emitter, "npageslabs_nonhuge",
+		    emitter_type_size, &npageslabs_nonhuge);
 		emitter_json_kv(emitter, "nactive_nonhuge", emitter_type_size,
 		    &nactive_nonhuge);
 		emitter_json_kv(emitter, "ndirty_nonhuge", emitter_type_size,
@@ -1146,7 +1137,8 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 }
 
 static void
-stats_arena_mutexes_print(emitter_t *emitter, unsigned arena_ind, uint64_t uptime) {
+stats_arena_mutexes_print(
+    emitter_t *emitter, unsigned arena_ind, uint64_t uptime) {
 	emitter_row_t row;
 	emitter_col_t col_name;
 	emitter_col_t col64[mutex_prof_num_uint64_t_counters];
@@ -1164,11 +1156,11 @@ stats_arena_mutexes_print(emitter_t *emitter, unsigned arena_ind, uint64_t uptim
 	CTL_LEAF_PREPARE(stats_arenas_mib, 3, "mutexes");
 
 	for (mutex_prof_arena_ind_t i = 0; i < mutex_prof_num_arena_mutexes;
-	    i++) {
+	     i++) {
 		const char *name = arena_mutex_names[i];
 		emitter_json_object_kv_begin(emitter, name);
-		mutex_stats_read_arena(stats_arenas_mib, 4, name, &col_name,
-		    col64, col32, uptime);
+		mutex_stats_read_arena(
+		    stats_arenas_mib, 4, name, &col_name, col64, col32, uptime);
 		mutex_stats_emit(emitter, &row, col64, col32);
 		emitter_json_object_end(emitter); /* Close the mutex dict. */
 	}
@@ -1179,29 +1171,30 @@ JEMALLOC_COLD
 static void
 stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
     bool mutex, bool extents, bool hpa) {
-	char name[ARENA_NAME_LEN];
-	char *namep = name;
-	unsigned nthreads;
+	char        name[ARENA_NAME_LEN];
+	char       *namep = name;
+	unsigned    nthreads;
 	const char *dss;
-	ssize_t dirty_decay_ms, muzzy_decay_ms;
-	size_t page, pactive, pdirty, pmuzzy, mapped, retained;
-	size_t base, internal, resident, metadata_edata, metadata_rtree,
+	ssize_t     dirty_decay_ms, muzzy_decay_ms;
+	size_t      page, pactive, pdirty, pmuzzy, mapped, retained;
+	size_t      base, internal, resident, metadata_edata, metadata_rtree,
 	    metadata_thp, extent_avail;
 	uint64_t dirty_npurge, dirty_nmadvise, dirty_purged;
 	uint64_t muzzy_npurge, muzzy_nmadvise, muzzy_purged;
-	size_t small_allocated;
+	size_t   small_allocated;
 	uint64_t small_nmalloc, small_ndalloc, small_nrequests, small_nfills,
 	    small_nflushes;
-	size_t large_allocated;
+	size_t   large_allocated;
 	uint64_t large_nmalloc, large_ndalloc, large_nrequests, large_nfills,
 	    large_nflushes;
-	size_t tcache_bytes, tcache_stashed_bytes, abandoned_vm;
+	size_t   tcache_bytes, tcache_stashed_bytes, abandoned_vm;
 	uint64_t uptime;
 
 	CTL_GET("arenas.page", &page, size_t);
 	if (i != MALLCTL_ARENAS_ALL && i != MALLCTL_ARENAS_DESTROYED) {
 		CTL_M1_GET("arena.0.name", i, (void *)&namep, const char *);
-		emitter_kv(emitter, "name", "name", emitter_type_string, &namep);
+		emitter_kv(
+		    emitter, "name", "name", emitter_type_string, &namep);
 	}
 
 	CTL_M2_GET("stats.arenas.0.nthreads", i, &nthreads, unsigned);
@@ -1209,55 +1202,55 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	    emitter_type_unsigned, &nthreads);
 
 	CTL_M2_GET("stats.arenas.0.uptime", i, &uptime, uint64_t);
-	emitter_kv(emitter, "uptime_ns", "uptime", emitter_type_uint64,
-	    &uptime);
+	emitter_kv(
+	    emitter, "uptime_ns", "uptime", emitter_type_uint64, &uptime);
 
 	CTL_M2_GET("stats.arenas.0.dss", i, &dss, const char *);
 	emitter_kv(emitter, "dss", "dss allocation precedence",
 	    emitter_type_string, &dss);
 
-	CTL_M2_GET("stats.arenas.0.dirty_decay_ms", i, &dirty_decay_ms,
-	    ssize_t);
-	CTL_M2_GET("stats.arenas.0.muzzy_decay_ms", i, &muzzy_decay_ms,
-	    ssize_t);
+	CTL_M2_GET(
+	    "stats.arenas.0.dirty_decay_ms", i, &dirty_decay_ms, ssize_t);
+	CTL_M2_GET(
+	    "stats.arenas.0.muzzy_decay_ms", i, &muzzy_decay_ms, ssize_t);
 	CTL_M2_GET("stats.arenas.0.pactive", i, &pactive, size_t);
 	CTL_M2_GET("stats.arenas.0.pdirty", i, &pdirty, size_t);
 	CTL_M2_GET("stats.arenas.0.pmuzzy", i, &pmuzzy, size_t);
 	CTL_M2_GET("stats.arenas.0.dirty_npurge", i, &dirty_npurge, uint64_t);
-	CTL_M2_GET("stats.arenas.0.dirty_nmadvise", i, &dirty_nmadvise,
-	    uint64_t);
+	CTL_M2_GET(
+	    "stats.arenas.0.dirty_nmadvise", i, &dirty_nmadvise, uint64_t);
 	CTL_M2_GET("stats.arenas.0.dirty_purged", i, &dirty_purged, uint64_t);
 	CTL_M2_GET("stats.arenas.0.muzzy_npurge", i, &muzzy_npurge, uint64_t);
-	CTL_M2_GET("stats.arenas.0.muzzy_nmadvise", i, &muzzy_nmadvise,
-	    uint64_t);
+	CTL_M2_GET(
+	    "stats.arenas.0.muzzy_nmadvise", i, &muzzy_nmadvise, uint64_t);
 	CTL_M2_GET("stats.arenas.0.muzzy_purged", i, &muzzy_purged, uint64_t);
 
 	emitter_row_t decay_row;
 	emitter_row_init(&decay_row);
 
 	/* JSON-style emission. */
-	emitter_json_kv(emitter, "dirty_decay_ms", emitter_type_ssize,
-	    &dirty_decay_ms);
-	emitter_json_kv(emitter, "muzzy_decay_ms", emitter_type_ssize,
-	    &muzzy_decay_ms);
+	emitter_json_kv(
+	    emitter, "dirty_decay_ms", emitter_type_ssize, &dirty_decay_ms);
+	emitter_json_kv(
+	    emitter, "muzzy_decay_ms", emitter_type_ssize, &muzzy_decay_ms);
 
 	emitter_json_kv(emitter, "pactive", emitter_type_size, &pactive);
 	emitter_json_kv(emitter, "pdirty", emitter_type_size, &pdirty);
 	emitter_json_kv(emitter, "pmuzzy", emitter_type_size, &pmuzzy);
 
-	emitter_json_kv(emitter, "dirty_npurge", emitter_type_uint64,
-	    &dirty_npurge);
-	emitter_json_kv(emitter, "dirty_nmadvise", emitter_type_uint64,
-	    &dirty_nmadvise);
-	emitter_json_kv(emitter, "dirty_purged", emitter_type_uint64,
-	    &dirty_purged);
+	emitter_json_kv(
+	    emitter, "dirty_npurge", emitter_type_uint64, &dirty_npurge);
+	emitter_json_kv(
+	    emitter, "dirty_nmadvise", emitter_type_uint64, &dirty_nmadvise);
+	emitter_json_kv(
+	    emitter, "dirty_purged", emitter_type_uint64, &dirty_purged);
 
-	emitter_json_kv(emitter, "muzzy_npurge", emitter_type_uint64,
-	    &muzzy_npurge);
-	emitter_json_kv(emitter, "muzzy_nmadvise", emitter_type_uint64,
-	    &muzzy_nmadvise);
-	emitter_json_kv(emitter, "muzzy_purged", emitter_type_uint64,
-	    &muzzy_purged);
+	emitter_json_kv(
+	    emitter, "muzzy_npurge", emitter_type_uint64, &muzzy_npurge);
+	emitter_json_kv(
+	    emitter, "muzzy_nmadvise", emitter_type_uint64, &muzzy_nmadvise);
+	emitter_json_kv(
+	    emitter, "muzzy_purged", emitter_type_uint64, &muzzy_purged);
 
 	/* Table-style emission. */
 	COL(decay_row, decay_type, right, 9, title);
@@ -1374,12 +1367,12 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	col_count_nfills_ps.type = emitter_type_uint64;
 	col_count_nflushes_ps.type = emitter_type_uint64;
 
-#define GET_AND_EMIT_ALLOC_STAT(small_or_large, name, valtype)		\
-	CTL_M2_GET("stats.arenas.0." #small_or_large "." #name, i,	\
-	    &small_or_large##_##name, valtype##_t);			\
-	emitter_json_kv(emitter, #name, emitter_type_##valtype,		\
-	    &small_or_large##_##name);					\
-	col_count_##name.type = emitter_type_##valtype;		\
+#define GET_AND_EMIT_ALLOC_STAT(small_or_large, name, valtype)                 \
+	CTL_M2_GET("stats.arenas.0." #small_or_large "." #name, i,             \
+	    &small_or_large##_##name, valtype##_t);                            \
+	emitter_json_kv(                                                       \
+	    emitter, #name, emitter_type_##valtype, &small_or_large##_##name); \
+	col_count_##name.type = emitter_type_##valtype;                        \
 	col_count_##name.valtype##_val = small_or_large##_##name;
 
 	emitter_json_object_kv_begin(emitter, "small");
@@ -1387,20 +1380,20 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 
 	GET_AND_EMIT_ALLOC_STAT(small, allocated, size)
 	GET_AND_EMIT_ALLOC_STAT(small, nmalloc, uint64)
-	col_count_nmalloc_ps.uint64_val =
-	    rate_per_second(col_count_nmalloc.uint64_val, uptime);
+	col_count_nmalloc_ps.uint64_val = rate_per_second(
+	    col_count_nmalloc.uint64_val, uptime);
 	GET_AND_EMIT_ALLOC_STAT(small, ndalloc, uint64)
-	col_count_ndalloc_ps.uint64_val =
-	    rate_per_second(col_count_ndalloc.uint64_val, uptime);
+	col_count_ndalloc_ps.uint64_val = rate_per_second(
+	    col_count_ndalloc.uint64_val, uptime);
 	GET_AND_EMIT_ALLOC_STAT(small, nrequests, uint64)
-	col_count_nrequests_ps.uint64_val =
-	    rate_per_second(col_count_nrequests.uint64_val, uptime);
+	col_count_nrequests_ps.uint64_val = rate_per_second(
+	    col_count_nrequests.uint64_val, uptime);
 	GET_AND_EMIT_ALLOC_STAT(small, nfills, uint64)
-	col_count_nfills_ps.uint64_val =
-	    rate_per_second(col_count_nfills.uint64_val, uptime);
+	col_count_nfills_ps.uint64_val = rate_per_second(
+	    col_count_nfills.uint64_val, uptime);
 	GET_AND_EMIT_ALLOC_STAT(small, nflushes, uint64)
-	col_count_nflushes_ps.uint64_val =
-	    rate_per_second(col_count_nflushes.uint64_val, uptime);
+	col_count_nflushes_ps.uint64_val = rate_per_second(
+	    col_count_nflushes.uint64_val, uptime);
 
 	emitter_table_row(emitter, &alloc_count_row);
 	emitter_json_object_end(emitter); /* Close "small". */
@@ -1410,20 +1403,20 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 
 	GET_AND_EMIT_ALLOC_STAT(large, allocated, size)
 	GET_AND_EMIT_ALLOC_STAT(large, nmalloc, uint64)
-	col_count_nmalloc_ps.uint64_val =
-	    rate_per_second(col_count_nmalloc.uint64_val, uptime);
+	col_count_nmalloc_ps.uint64_val = rate_per_second(
+	    col_count_nmalloc.uint64_val, uptime);
 	GET_AND_EMIT_ALLOC_STAT(large, ndalloc, uint64)
-	col_count_ndalloc_ps.uint64_val =
-	    rate_per_second(col_count_ndalloc.uint64_val, uptime);
+	col_count_ndalloc_ps.uint64_val = rate_per_second(
+	    col_count_ndalloc.uint64_val, uptime);
 	GET_AND_EMIT_ALLOC_STAT(large, nrequests, uint64)
-	col_count_nrequests_ps.uint64_val =
-	    rate_per_second(col_count_nrequests.uint64_val, uptime);
+	col_count_nrequests_ps.uint64_val = rate_per_second(
+	    col_count_nrequests.uint64_val, uptime);
 	GET_AND_EMIT_ALLOC_STAT(large, nfills, uint64)
-	col_count_nfills_ps.uint64_val =
-	    rate_per_second(col_count_nfills.uint64_val, uptime);
+	col_count_nfills_ps.uint64_val = rate_per_second(
+	    col_count_nfills.uint64_val, uptime);
 	GET_AND_EMIT_ALLOC_STAT(large, nflushes, uint64)
-	col_count_nflushes_ps.uint64_val =
-	    rate_per_second(col_count_nflushes.uint64_val, uptime);
+	col_count_nflushes_ps.uint64_val = rate_per_second(
+	    col_count_nflushes.uint64_val, uptime);
 
 	emitter_table_row(emitter, &alloc_count_row);
 	emitter_json_object_end(emitter); /* Close "large". */
@@ -1438,16 +1431,16 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	col_count_nrequests.uint64_val = small_nrequests + large_nrequests;
 	col_count_nfills.uint64_val = small_nfills + large_nfills;
 	col_count_nflushes.uint64_val = small_nflushes + large_nflushes;
-	col_count_nmalloc_ps.uint64_val =
-	    rate_per_second(col_count_nmalloc.uint64_val, uptime);
-	col_count_ndalloc_ps.uint64_val =
-	    rate_per_second(col_count_ndalloc.uint64_val, uptime);
-	col_count_nrequests_ps.uint64_val =
-	    rate_per_second(col_count_nrequests.uint64_val, uptime);
-	col_count_nfills_ps.uint64_val =
-	    rate_per_second(col_count_nfills.uint64_val, uptime);
-	col_count_nflushes_ps.uint64_val =
-	    rate_per_second(col_count_nflushes.uint64_val, uptime);
+	col_count_nmalloc_ps.uint64_val = rate_per_second(
+	    col_count_nmalloc.uint64_val, uptime);
+	col_count_ndalloc_ps.uint64_val = rate_per_second(
+	    col_count_ndalloc.uint64_val, uptime);
+	col_count_nrequests_ps.uint64_val = rate_per_second(
+	    col_count_nrequests.uint64_val, uptime);
+	col_count_nfills_ps.uint64_val = rate_per_second(
+	    col_count_nfills.uint64_val, uptime);
+	col_count_nflushes_ps.uint64_val = rate_per_second(
+	    col_count_nflushes.uint64_val, uptime);
 	emitter_table_row(emitter, &alloc_count_row);
 
 	emitter_row_t mem_count_row;
@@ -1475,11 +1468,11 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	mem_count_val.size_val = pactive * page;
 	emitter_table_row(emitter, &mem_count_row);
 
-#define GET_AND_EMIT_MEM_STAT(stat)					\
-	CTL_M2_GET("stats.arenas.0."#stat, i, &stat, size_t);		\
-	emitter_json_kv(emitter, #stat, emitter_type_size, &stat);	\
-	mem_count_title.str_val = #stat":";				\
-	mem_count_val.size_val = stat;					\
+#define GET_AND_EMIT_MEM_STAT(stat)                                            \
+	CTL_M2_GET("stats.arenas.0." #stat, i, &stat, size_t);                 \
+	emitter_json_kv(emitter, #stat, emitter_type_size, &stat);             \
+	mem_count_title.str_val = #stat ":";                                   \
+	mem_count_val.size_val = stat;                                         \
 	emitter_table_row(emitter, &mem_count_row);
 
 	GET_AND_EMIT_MEM_STAT(mapped)
@@ -1517,13 +1510,13 @@ JEMALLOC_COLD
 static void
 stats_general_print(emitter_t *emitter) {
 	const char *cpv;
-	bool bv, bv2;
-	unsigned uv;
-	uint32_t u32v;
-	uint64_t u64v;
-	int64_t i64v;
-	ssize_t ssv, ssv2;
-	size_t sv, bsz, usz, u32sz, u64sz, i64sz, ssz, sssz, cpsz;
+	bool        bv, bv2;
+	unsigned    uv;
+	uint32_t    u32v;
+	uint64_t    u64v;
+	int64_t     i64v;
+	ssize_t     ssv, ssv2;
+	size_t      sv, bsz, usz, u32sz, u64sz, i64sz, ssz, sssz, cpsz;
 
 	bsz = sizeof(bool);
 	usz = sizeof(unsigned);
@@ -1539,11 +1532,11 @@ stats_general_print(emitter_t *emitter) {
 
 	/* config. */
 	emitter_dict_begin(emitter, "config", "Build-time option settings");
-#define CONFIG_WRITE_BOOL(name)						\
-	do {								\
-		CTL_GET("config."#name, &bv, bool);			\
-		emitter_kv(emitter, #name, "config."#name,		\
-		    emitter_type_bool, &bv);				\
+#define CONFIG_WRITE_BOOL(name)                                                \
+	do {                                                                   \
+		CTL_GET("config." #name, &bv, bool);                           \
+		emitter_kv(                                                    \
+		    emitter, #name, "config." #name, emitter_type_bool, &bv);  \
 	} while (0)
 
 	CONFIG_WRITE_BOOL(cache_oblivious);
@@ -1565,45 +1558,33 @@ stats_general_print(emitter_t *emitter) {
 	emitter_dict_end(emitter); /* Close "config" dict. */
 
 	/* opt. */
-#define OPT_WRITE(name, var, size, emitter_type)			\
-	if (je_mallctl("opt."name, (void *)&var, &size, NULL, 0) ==	\
-	    0) {							\
-		emitter_kv(emitter, name, "opt."name, emitter_type,	\
-		    &var);						\
+#define OPT_WRITE(name, var, size, emitter_type)                               \
+	if (je_mallctl("opt." name, (void *)&var, &size, NULL, 0) == 0) {      \
+		emitter_kv(emitter, name, "opt." name, emitter_type, &var);    \
 	}
 
-#define OPT_WRITE_MUTABLE(name, var1, var2, size, emitter_type,		\
-    altname)								\
-	if (je_mallctl("opt."name, (void *)&var1, &size, NULL, 0) ==	\
-	    0 && je_mallctl(altname, (void *)&var2, &size, NULL, 0)	\
-	    == 0) {							\
-		emitter_kv_note(emitter, name, "opt."name,		\
-		    emitter_type, &var1, altname, emitter_type,		\
-		    &var2);						\
+#define OPT_WRITE_MUTABLE(name, var1, var2, size, emitter_type, altname)       \
+	if (je_mallctl("opt." name, (void *)&var1, &size, NULL, 0) == 0        \
+	    && je_mallctl(altname, (void *)&var2, &size, NULL, 0) == 0) {      \
+		emitter_kv_note(emitter, name, "opt." name, emitter_type,      \
+		    &var1, altname, emitter_type, &var2);                      \
 	}
 
 #define OPT_WRITE_BOOL(name) OPT_WRITE(name, bv, bsz, emitter_type_bool)
-#define OPT_WRITE_BOOL_MUTABLE(name, altname)				\
+#define OPT_WRITE_BOOL_MUTABLE(name, altname)                                  \
 	OPT_WRITE_MUTABLE(name, bv, bv2, bsz, emitter_type_bool, altname)
 
-#define OPT_WRITE_UNSIGNED(name)					\
-	OPT_WRITE(name, uv, usz, emitter_type_unsigned)
+#define OPT_WRITE_UNSIGNED(name) OPT_WRITE(name, uv, usz, emitter_type_unsigned)
 
-#define OPT_WRITE_INT64(name)						\
-	OPT_WRITE(name, i64v, i64sz, emitter_type_int64)
-#define OPT_WRITE_UINT64(name)						\
-	OPT_WRITE(name, u64v, u64sz, emitter_type_uint64)
+#define OPT_WRITE_INT64(name) OPT_WRITE(name, i64v, i64sz, emitter_type_int64)
+#define OPT_WRITE_UINT64(name) OPT_WRITE(name, u64v, u64sz, emitter_type_uint64)
 
-#define OPT_WRITE_SIZE_T(name)						\
-	OPT_WRITE(name, sv, ssz, emitter_type_size)
-#define OPT_WRITE_SSIZE_T(name)						\
-	OPT_WRITE(name, ssv, sssz, emitter_type_ssize)
-#define OPT_WRITE_SSIZE_T_MUTABLE(name, altname)			\
-	OPT_WRITE_MUTABLE(name, ssv, ssv2, sssz, emitter_type_ssize,	\
-	    altname)
+#define OPT_WRITE_SIZE_T(name) OPT_WRITE(name, sv, ssz, emitter_type_size)
+#define OPT_WRITE_SSIZE_T(name) OPT_WRITE(name, ssv, sssz, emitter_type_ssize)
+#define OPT_WRITE_SSIZE_T_MUTABLE(name, altname)                               \
+	OPT_WRITE_MUTABLE(name, ssv, ssv2, sssz, emitter_type_ssize, altname)
 
-#define OPT_WRITE_CHAR_P(name)						\
-	OPT_WRITE(name, cpv, cpsz, emitter_type_string)
+#define OPT_WRITE_CHAR_P(name) OPT_WRITE(name, cpv, cpsz, emitter_type_string)
 
 	emitter_dict_begin(emitter, "opt", "Run-time option settings");
 
@@ -1623,21 +1604,24 @@ stats_general_print(emitter_t *emitter) {
 	 * Note: The outputs are strictly ordered by priorities (low -> high).
 	 *
 	 */
-#define MALLOC_CONF_WRITE(name, message)					\
-	if (je_mallctl("opt.malloc_conf."name, (void *)&cpv, &cpsz, NULL, 0) !=	\
-	    0) {								\
-		cpv = "";							\
-	}									\
-	emitter_kv(emitter, name, message, emitter_type_string,	&cpv);
+#define MALLOC_CONF_WRITE(name, message)                                       \
+	if (je_mallctl("opt.malloc_conf." name, (void *)&cpv, &cpsz, NULL, 0)  \
+	    != 0) {                                                            \
+		cpv = "";                                                      \
+	}                                                                      \
+	emitter_kv(emitter, name, message, emitter_type_string, &cpv);
 
 	MALLOC_CONF_WRITE("global_var", "Global variable malloc_conf");
 	MALLOC_CONF_WRITE("symlink", "Symbolic link malloc.conf");
 	MALLOC_CONF_WRITE("env_var", "Environment variable MALLOC_CONF");
 	/* As this config is unofficial, skip the output if it's NULL */
-	if (je_mallctl("opt.malloc_conf.global_var_2_conf_harder",
-	    (void *)&cpv, &cpsz, NULL, 0) == 0) {
-		emitter_kv(emitter, "global_var_2_conf_harder", "Global "
-		    "variable malloc_conf_2_conf_harder", emitter_type_string, &cpv);
+	if (je_mallctl("opt.malloc_conf.global_var_2_conf_harder", (void *)&cpv,
+	        &cpsz, NULL, 0)
+	    == 0) {
+		emitter_kv(emitter, "global_var_2_conf_harder",
+		    "Global "
+		    "variable malloc_conf_2_conf_harder",
+		    emitter_type_string, &cpv);
 	}
 #undef MALLOC_CONF_WRITE
 
@@ -1712,8 +1696,8 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_UNSIGNED("prof_bt_max")
 	OPT_WRITE_CHAR_P("prof_prefix")
 	OPT_WRITE_BOOL_MUTABLE("prof_active", "prof.active")
-	OPT_WRITE_BOOL_MUTABLE("prof_thread_active_init",
-	    "prof.thread_active_init")
+	OPT_WRITE_BOOL_MUTABLE(
+	    "prof_thread_active_init", "prof.thread_active_init")
 	OPT_WRITE_SSIZE_T_MUTABLE("lg_prof_sample", "prof.lg_sample")
 	OPT_WRITE_BOOL("prof_accum")
 	OPT_WRITE_SSIZE_T("lg_prof_interval")
@@ -1751,12 +1735,12 @@ stats_general_print(emitter_t *emitter) {
 		    "prof.thread_active_init", emitter_type_bool, &bv);
 
 		CTL_GET("prof.active", &bv, bool);
-		emitter_kv(emitter, "active", "prof.active", emitter_type_bool,
-		    &bv);
+		emitter_kv(
+		    emitter, "active", "prof.active", emitter_type_bool, &bv);
 
 		CTL_GET("prof.gdump", &bv, bool);
-		emitter_kv(emitter, "gdump", "prof.gdump", emitter_type_bool,
-		    &bv);
+		emitter_kv(
+		    emitter, "gdump", "prof.gdump", emitter_type_bool, &bv);
 
 		CTL_GET("prof.interval", &u64v, uint64_t);
 		emitter_kv(emitter, "interval", "prof.interval",
@@ -1796,8 +1780,8 @@ stats_general_print(emitter_t *emitter) {
 	emitter_kv(emitter, "page", "Page size", emitter_type_size, &sv);
 
 	CTL_GET("arenas.hugepage", &sv, size_t);
-	emitter_kv(emitter, "hugepage", "Hugepage size", emitter_type_size,
-	    &sv);
+	emitter_kv(
+	    emitter, "hugepage", "Hugepage size", emitter_type_size, &sv);
 
 	if (je_mallctl("arenas.tcache_max", (void *)&sv, &ssz, NULL, 0) == 0) {
 		emitter_kv(emitter, "tcache_max",
@@ -1827,20 +1811,20 @@ stats_general_print(emitter_t *emitter) {
 			emitter_json_object_begin(emitter);
 
 			CTL_LEAF(arenas_bin_mib, 3, "size", &sv, size_t);
-			emitter_json_kv(emitter, "size", emitter_type_size,
-			    &sv);
+			emitter_json_kv(
+			    emitter, "size", emitter_type_size, &sv);
 
 			CTL_LEAF(arenas_bin_mib, 3, "nregs", &u32v, uint32_t);
-			emitter_json_kv(emitter, "nregs", emitter_type_uint32,
-			    &u32v);
+			emitter_json_kv(
+			    emitter, "nregs", emitter_type_uint32, &u32v);
 
 			CTL_LEAF(arenas_bin_mib, 3, "slab_size", &sv, size_t);
-			emitter_json_kv(emitter, "slab_size", emitter_type_size,
-			    &sv);
+			emitter_json_kv(
+			    emitter, "slab_size", emitter_type_size, &sv);
 
 			CTL_LEAF(arenas_bin_mib, 3, "nshards", &u32v, uint32_t);
-			emitter_json_kv(emitter, "nshards", emitter_type_uint32,
-			    &u32v);
+			emitter_json_kv(
+			    emitter, "nshards", emitter_type_uint32, &u32v);
 
 			emitter_json_object_end(emitter);
 		}
@@ -1861,8 +1845,8 @@ stats_general_print(emitter_t *emitter) {
 			emitter_json_object_begin(emitter);
 
 			CTL_LEAF(arenas_lextent_mib, 3, "size", &sv, size_t);
-			emitter_json_kv(emitter, "size", emitter_type_size,
-			    &sv);
+			emitter_json_kv(
+			    emitter, "size", emitter_type_size, &sv);
 
 			emitter_json_object_end(emitter);
 		}
@@ -1882,8 +1866,8 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 	 */
 	size_t allocated, active, metadata, metadata_edata, metadata_rtree,
 	    metadata_thp, resident, mapped, retained;
-	size_t num_background_threads;
-	size_t zero_reallocs;
+	size_t   num_background_threads;
+	size_t   zero_reallocs;
 	uint64_t background_thread_num_runs, background_thread_run_interval;
 
 	CTL_GET("stats.allocated", &allocated, size_t);
@@ -1916,23 +1900,24 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 	emitter_json_kv(emitter, "allocated", emitter_type_size, &allocated);
 	emitter_json_kv(emitter, "active", emitter_type_size, &active);
 	emitter_json_kv(emitter, "metadata", emitter_type_size, &metadata);
-	emitter_json_kv(emitter, "metadata_edata", emitter_type_size,
-	    &metadata_edata);
-	emitter_json_kv(emitter, "metadata_rtree", emitter_type_size,
-	    &metadata_rtree);
-	emitter_json_kv(emitter, "metadata_thp", emitter_type_size,
-	    &metadata_thp);
+	emitter_json_kv(
+	    emitter, "metadata_edata", emitter_type_size, &metadata_edata);
+	emitter_json_kv(
+	    emitter, "metadata_rtree", emitter_type_size, &metadata_rtree);
+	emitter_json_kv(
+	    emitter, "metadata_thp", emitter_type_size, &metadata_thp);
 	emitter_json_kv(emitter, "resident", emitter_type_size, &resident);
 	emitter_json_kv(emitter, "mapped", emitter_type_size, &mapped);
 	emitter_json_kv(emitter, "retained", emitter_type_size, &retained);
-	emitter_json_kv(emitter, "zero_reallocs", emitter_type_size,
-	    &zero_reallocs);
+	emitter_json_kv(
+	    emitter, "zero_reallocs", emitter_type_size, &zero_reallocs);
 
-	emitter_table_printf(emitter, "Allocated: %zu, active: %zu, "
+	emitter_table_printf(emitter,
+	    "Allocated: %zu, active: %zu, "
 	    "metadata: %zu (n_thp %zu, edata %zu, rtree %zu), resident: %zu, "
-	    "mapped: %zu, retained: %zu\n", allocated, active, metadata,
-		metadata_thp, metadata_edata, metadata_rtree, resident, mapped,
-	    retained);
+	    "mapped: %zu, retained: %zu\n",
+	    allocated, active, metadata, metadata_thp, metadata_edata,
+	    metadata_rtree, resident, mapped, retained);
 
 	/* Strange behaviors */
 	emitter_table_printf(emitter,
@@ -1940,16 +1925,17 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 
 	/* Background thread stats. */
 	emitter_json_object_kv_begin(emitter, "background_thread");
-	emitter_json_kv(emitter, "num_threads", emitter_type_size,
-	    &num_background_threads);
+	emitter_json_kv(
+	    emitter, "num_threads", emitter_type_size, &num_background_threads);
 	emitter_json_kv(emitter, "num_runs", emitter_type_uint64,
 	    &background_thread_num_runs);
 	emitter_json_kv(emitter, "run_interval", emitter_type_uint64,
 	    &background_thread_run_interval);
 	emitter_json_object_end(emitter); /* Close "background_thread". */
 
-	emitter_table_printf(emitter, "Background threads: %zu, "
-	    "num_runs: %"FMTu64", run_interval: %"FMTu64" ns\n",
+	emitter_table_printf(emitter,
+	    "Background threads: %zu, "
+	    "num_runs: %" FMTu64 ", run_interval: %" FMTu64 " ns\n",
 	    num_background_threads, background_thread_num_runs,
 	    background_thread_run_interval);
 
@@ -1958,7 +1944,7 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 		emitter_col_t name;
 		emitter_col_t col64[mutex_prof_num_uint64_t_counters];
 		emitter_col_t col32[mutex_prof_num_uint32_t_counters];
-		uint64_t uptime;
+		uint64_t      uptime;
 
 		emitter_row_init(&row);
 		mutex_stats_init_cols(&row, "", &name, col64, col32);
@@ -1973,7 +1959,8 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 		for (int i = 0; i < mutex_prof_num_global_mutexes; i++) {
 			mutex_stats_read_global(stats_mutexes_mib, 2,
 			    global_mutex_names[i], &name, col64, col32, uptime);
-			emitter_json_object_kv_begin(emitter, global_mutex_names[i]);
+			emitter_json_object_kv_begin(
+			    emitter, global_mutex_names[i]);
 			mutex_stats_emit(emitter, &row, col64, col32);
 			emitter_json_object_end(emitter);
 		}
@@ -1993,23 +1980,23 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 		size_t miblen = sizeof(mib) / sizeof(size_t);
 		size_t sz;
 		VARIABLE_ARRAY_UNSAFE(bool, initialized, narenas);
-		bool destroyed_initialized;
+		bool     destroyed_initialized;
 		unsigned i, ninitialized;
 
 		xmallctlnametomib("arena.0.initialized", mib, &miblen);
 		for (i = ninitialized = 0; i < narenas; i++) {
 			mib[1] = i;
 			sz = sizeof(bool);
-			xmallctlbymib(mib, miblen, &initialized[i], &sz,
-			    NULL, 0);
+			xmallctlbymib(
+			    mib, miblen, &initialized[i], &sz, NULL, 0);
 			if (initialized[i]) {
 				ninitialized++;
 			}
 		}
 		mib[1] = MALLCTL_ARENAS_DESTROYED;
 		sz = sizeof(bool);
-		xmallctlbymib(mib, miblen, &destroyed_initialized, &sz,
-		    NULL, 0);
+		xmallctlbymib(
+		    mib, miblen, &destroyed_initialized, &sz, NULL, 0);
 
 		/* Merged stats. */
 		if (merged && (ninitialized > 1 || !unmerged)) {
@@ -2024,12 +2011,13 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 		/* Destroyed stats. */
 		if (destroyed_initialized && destroyed) {
 			/* Print destroyed arena stats. */
-			emitter_table_printf(emitter,
-			    "Destroyed arenas stats:\n");
+			emitter_table_printf(
+			    emitter, "Destroyed arenas stats:\n");
 			emitter_json_object_kv_begin(emitter, "destroyed");
 			stats_arena_print(emitter, MALLCTL_ARENAS_DESTROYED,
 			    bins, large, mutex, extents, hpa);
-			emitter_json_object_end(emitter); /* Close "destroyed". */
+			emitter_json_object_end(
+			    emitter); /* Close "destroyed". */
 		}
 
 		/* Unmerged stats. */
@@ -2039,8 +2027,8 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 					char arena_ind_str[20];
 					malloc_snprintf(arena_ind_str,
 					    sizeof(arena_ind_str), "%u", i);
-					emitter_json_object_kv_begin(emitter,
-					    arena_ind_str);
+					emitter_json_object_kv_begin(
+					    emitter, arena_ind_str);
 					emitter_table_printf(emitter,
 					    "arenas[%s]:\n", arena_ind_str);
 					stats_arena_print(emitter, i, bins,
@@ -2056,9 +2044,9 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 
 void
 stats_print(write_cb_t *write_cb, void *cbopaque, const char *opts) {
-	int err;
+	int      err;
 	uint64_t epoch;
-	size_t u64sz;
+	size_t   u64sz;
 #define OPTION(o, v, d, s) bool v = d;
 	STATS_PRINT_OPTIONS
 #undef OPTION
@@ -2072,15 +2060,17 @@ stats_print(write_cb_t *write_cb, void *cbopaque, const char *opts) {
 	 * */
 	epoch = 1;
 	u64sz = sizeof(uint64_t);
-	err = je_mallctl("epoch", (void *)&epoch, &u64sz, (void *)&epoch,
-	    sizeof(uint64_t));
+	err = je_mallctl(
+	    "epoch", (void *)&epoch, &u64sz, (void *)&epoch, sizeof(uint64_t));
 	if (err != 0) {
 		if (err == EAGAIN) {
-			malloc_write("<jemalloc>: Memory allocation failure in "
+			malloc_write(
+			    "<jemalloc>: Memory allocation failure in "
 			    "mallctl(\"epoch\", ...)\n");
 			return;
 		}
-		malloc_write("<jemalloc>: Failure in mallctl(\"epoch\", "
+		malloc_write(
+		    "<jemalloc>: Failure in mallctl(\"epoch\", "
 		    "...)\n");
 		abort();
 	}
@@ -2088,7 +2078,10 @@ stats_print(write_cb_t *write_cb, void *cbopaque, const char *opts) {
 	if (opts != NULL) {
 		for (unsigned i = 0; opts[i] != '\0'; i++) {
 			switch (opts[i]) {
-#define OPTION(o, v, d, s) case o: v = s; break;
+#define OPTION(o, v, d, s)                                                     \
+	case o:                                                                \
+		v = s;                                                         \
+		break;
 				STATS_PRINT_OPTIONS
 #undef OPTION
 			default:;
@@ -2098,8 +2091,8 @@ stats_print(write_cb_t *write_cb, void *cbopaque, const char *opts) {
 
 	emitter_t emitter;
 	emitter_init(&emitter,
-	    json ? emitter_output_json_compact : emitter_output_table,
-	    write_cb, cbopaque);
+	    json ? emitter_output_json_compact : emitter_output_table, write_cb,
+	    cbopaque);
 	emitter_begin(&emitter);
 	emitter_table_printf(&emitter, "___ Begin jemalloc statistics ___\n");
 	emitter_json_object_kv_begin(&emitter, "jemalloc");
@@ -2108,8 +2101,8 @@ stats_print(write_cb_t *write_cb, void *cbopaque, const char *opts) {
 		stats_general_print(&emitter);
 	}
 	if (config_stats) {
-		stats_print_helper(&emitter, merged, destroyed, unmerged,
-		    bins, large, mutex, extents, hpa);
+		stats_print_helper(&emitter, merged, destroyed, unmerged, bins,
+		    large, mutex, extents, hpa);
 	}
 
 	emitter_json_object_end(&emitter); /* Closes the "jemalloc" dict. */
@@ -2135,8 +2128,8 @@ stats_interval_event_handler(tsd_t *tsd) {
 	uint64_t elapsed = last_event - last_sample_event;
 
 	assert(elapsed > 0 && elapsed != TE_INVALID_ELAPSED);
-	if (counter_accum(tsd_tsdn(tsd), &stats_interval_accumulated,
-	    elapsed)) {
+	if (counter_accum(
+	        tsd_tsdn(tsd), &stats_interval_accumulated, elapsed)) {
 		je_malloc_stats_print(NULL, NULL, opt_stats_interval_opts);
 	}
 }
@@ -2147,10 +2140,10 @@ stats_interval_enabled(void) {
 }
 
 te_base_cb_t stats_interval_te_handler = {
-	.enabled = &stats_interval_enabled,
-	.new_event_wait = &stats_interval_new_event_wait,
-	.postponed_event_wait = &stats_interval_postponed_event_wait,
-	.event_handler = &stats_interval_event_handler,
+    .enabled = &stats_interval_enabled,
+    .new_event_wait = &stats_interval_new_event_wait,
+    .postponed_event_wait = &stats_interval_postponed_event_wait,
+    .event_handler = &stats_interval_event_handler,
 };
 
 bool
@@ -2160,12 +2153,12 @@ stats_boot(void) {
 		assert(opt_stats_interval == -1);
 		stats_interval = 0;
 		stats_interval_accum_batch = 0;
-	} else{
+	} else {
 		/* See comments in stats.h */
-		stats_interval = (opt_stats_interval > 0) ?
-		    opt_stats_interval : 1;
-		uint64_t batch = stats_interval >>
-		    STATS_INTERVAL_ACCUM_LG_BATCH_SIZE;
+		stats_interval = (opt_stats_interval > 0) ? opt_stats_interval
+		                                          : 1;
+		uint64_t batch = stats_interval
+		    >> STATS_INTERVAL_ACCUM_LG_BATCH_SIZE;
 		if (batch > STATS_INTERVAL_ACCUM_BATCH_MAX) {
 			batch = STATS_INTERVAL_ACCUM_BATCH_MAX;
 		} else if (batch == 0) {
diff --git a/src/sz.c b/src/sz.c
index 89def9d5..4a4c057d 100644
--- a/src/sz.c
+++ b/src/sz.c
@@ -3,12 +3,12 @@
 #include "jemalloc/internal/sz.h"
 
 JEMALLOC_ALIGNED(CACHELINE)
-size_t sz_pind2sz_tab[SC_NPSIZES+1];
+size_t sz_pind2sz_tab[SC_NPSIZES + 1];
 size_t sz_large_pad;
 
 size_t
 sz_psz_quantize_floor(size_t size) {
-	size_t ret;
+	size_t   ret;
 	pszind_t pind;
 
 	assert(size > 0);
@@ -47,8 +47,8 @@ sz_psz_quantize_ceil(size_t size) {
 		 * search would potentially find sufficiently aligned available
 		 * memory somewhere lower.
 		 */
-		ret = sz_pind2sz(sz_psz2ind(ret - sz_large_pad + 1)) +
-		    sz_large_pad;
+		ret = sz_pind2sz(sz_psz2ind(ret - sz_large_pad + 1))
+		    + sz_large_pad;
 	}
 	return ret;
 }
@@ -93,12 +93,12 @@ sz_boot_size2index_tab(const sc_data_t *sc_data) {
 	size_t dst_max = (SC_LOOKUP_MAXCLASS >> SC_LG_TINY_MIN) + 1;
 	size_t dst_ind = 0;
 	for (unsigned sc_ind = 0; sc_ind < SC_NSIZES && dst_ind < dst_max;
-	    sc_ind++) {
+	     sc_ind++) {
 		const sc_t *sc = &sc_data->sc[sc_ind];
-		size_t sz = (ZU(1) << sc->lg_base)
+		size_t      sz = (ZU(1) << sc->lg_base)
 		    + (ZU(sc->ndelta) << sc->lg_delta);
 		size_t max_ind = ((sz + (ZU(1) << SC_LG_TINY_MIN) - 1)
-				   >> SC_LG_TINY_MIN);
+		    >> SC_LG_TINY_MIN);
 		for (; dst_ind <= max_ind && dst_ind < dst_max; dst_ind++) {
 			assert(sc_ind < 1 << (sizeof(uint8_t) * 8));
 			sz_size2index_tab[dst_ind] = (uint8_t)sc_ind;
diff --git a/src/tcache.c b/src/tcache.c
index 0154403d..44a96841 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -29,7 +29,7 @@ unsigned opt_tcache_nslots_large = 20;
  * This is bounded by some other constraints as well, like the fact that it
  * must be even, must be less than opt_tcache_nslots_small_max, etc..
  */
-ssize_t	opt_lg_tcache_nslots_mul = 1;
+ssize_t opt_lg_tcache_nslots_mul = 1;
 
 /*
  * Number of allocation bytes between tcache incremental GCs.  Again, this
@@ -63,13 +63,13 @@ unsigned opt_lg_tcache_flush_large_div = 1;
  * is only used to initialize tcache_nbins in the per-thread tcache.
  * Directly modifying it will not affect threads already launched.
  */
-unsigned		global_do_not_change_tcache_nbins;
+unsigned global_do_not_change_tcache_nbins;
 /*
  * Max size class to be cached (can be small or large). This value is only used
  * to initialize tcache_max in the per-thread tcache.   Directly modifying it
  * will not affect threads already launched.
  */
-size_t			global_do_not_change_tcache_maxclass;
+size_t global_do_not_change_tcache_maxclass;
 
 /*
  * Default bin info for each bin.  Will be initialized in malloc_conf_init
@@ -83,16 +83,16 @@ static cache_bin_info_t opt_tcache_ncached_max[TCACHE_NBINS_MAX] = {{0}};
  */
 static bool opt_tcache_ncached_max_set[TCACHE_NBINS_MAX] = {0};
 
-tcaches_t		*tcaches;
+tcaches_t *tcaches;
 
 /* Index of first element within tcaches that has never been used. */
-static unsigned		tcaches_past;
+static unsigned tcaches_past;
 
 /* Head of singly linked list tracking available tcaches elements. */
-static tcaches_t	*tcaches_avail;
+static tcaches_t *tcaches_avail;
 
 /* Protects tcaches{,_past,_avail}. */
-static malloc_mutex_t	tcaches_mtx;
+static malloc_mutex_t tcaches_mtx;
 
 /******************************************************************************/
 
@@ -180,8 +180,8 @@ tcache_nfill_small_burst_reset(tcache_slow_t *tcache_slow, szind_t szind) {
  * count should be decreased, i.e. lg_div(base) should be increased.
  */
 static inline void
-tcache_nfill_small_gc_update(tcache_slow_t *tcache_slow, szind_t szind,
-    cache_bin_sz_t limit) {
+tcache_nfill_small_gc_update(
+    tcache_slow_t *tcache_slow, szind_t szind, cache_bin_sz_t limit) {
 	cache_bin_fill_ctl_t *ctl = tcache_bin_fill_ctl_get(tcache_slow, szind);
 	if (!limit && ctl->base > 1) {
 		/*
@@ -214,16 +214,17 @@ tcache_gc_item_delay_compute(szind_t szind) {
 }
 
 static inline void *
-tcache_gc_small_heuristic_addr_get(tsd_t *tsd, tcache_slow_t *tcache_slow,
-    szind_t szind) {
+tcache_gc_small_heuristic_addr_get(
+    tsd_t *tsd, tcache_slow_t *tcache_slow, szind_t szind) {
 	assert(szind < SC_NBINS);
 	tsdn_t *tsdn = tsd_tsdn(tsd);
-	bin_t *bin = arena_bin_choose(tsdn, tcache_slow->arena, szind, NULL);
+	bin_t  *bin = arena_bin_choose(tsdn, tcache_slow->arena, szind, NULL);
 	assert(bin != NULL);
 
 	malloc_mutex_lock(tsdn, &bin->lock);
-	edata_t *slab = (bin->slabcur == NULL) ?
-	    edata_heap_first(&bin->slabs_nonfull) : bin->slabcur;
+	edata_t *slab = (bin->slabcur == NULL)
+	    ? edata_heap_first(&bin->slabs_nonfull)
+	    : bin->slabcur;
 	assert(slab != NULL || edata_heap_empty(&bin->slabs_nonfull));
 	void *ret = (slab != NULL) ? edata_addr_get(slab) : NULL;
 	assert(ret != NULL || slab == NULL);
@@ -250,21 +251,23 @@ tcache_gc_small_nremote_get(cache_bin_t *cache_bin, void *addr,
 	 * starting from 2M, so that the total number of disjoint virtual
 	 * memory ranges retained by each shard is limited.
 	 */
-	uintptr_t neighbor_min = ((uintptr_t)addr > TCACHE_GC_NEIGHBOR_LIMIT) ?
-	    ((uintptr_t)addr - TCACHE_GC_NEIGHBOR_LIMIT) : 0;
-	uintptr_t neighbor_max = ((uintptr_t)addr < (UINTPTR_MAX -
-	    TCACHE_GC_NEIGHBOR_LIMIT)) ? ((uintptr_t)addr +
-	    TCACHE_GC_NEIGHBOR_LIMIT) : UINTPTR_MAX;
+	uintptr_t neighbor_min = ((uintptr_t)addr > TCACHE_GC_NEIGHBOR_LIMIT)
+	    ? ((uintptr_t)addr - TCACHE_GC_NEIGHBOR_LIMIT)
+	    : 0;
+	uintptr_t neighbor_max = ((uintptr_t)addr
+	                             < (UINTPTR_MAX - TCACHE_GC_NEIGHBOR_LIMIT))
+	    ? ((uintptr_t)addr + TCACHE_GC_NEIGHBOR_LIMIT)
+	    : UINTPTR_MAX;
 
 	/* Scan the entire bin to count the number of remote pointers. */
-	void **head = cache_bin->stack_head;
+	void         **head = cache_bin->stack_head;
 	cache_bin_sz_t n_remote_slab = 0, n_remote_neighbor = 0;
 	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin);
 	for (void **cur = head; cur < head + ncached; cur++) {
-		n_remote_slab += (cache_bin_sz_t)tcache_gc_is_addr_remote(*cur,
-		    slab_min, slab_max);
-		n_remote_neighbor += (cache_bin_sz_t)tcache_gc_is_addr_remote(*cur,
-		    neighbor_min, neighbor_max);
+		n_remote_slab += (cache_bin_sz_t)tcache_gc_is_addr_remote(
+		    *cur, slab_min, slab_max);
+		n_remote_neighbor += (cache_bin_sz_t)tcache_gc_is_addr_remote(
+		    *cur, neighbor_min, neighbor_max);
 	}
 	/*
 	 * Note: since slab size is dynamic and can be larger than 2M, i.e.
@@ -295,8 +298,8 @@ tcache_gc_small_nremote_get(cache_bin_t *cache_bin, void *addr,
 /* Shuffle the ptrs in the bin to put the remote pointers at the bottom. */
 static inline void
 tcache_gc_small_bin_shuffle(cache_bin_t *cache_bin, cache_bin_sz_t nremote,
-   uintptr_t addr_min, uintptr_t addr_max) {
-	void **swap = NULL;
+    uintptr_t addr_min, uintptr_t addr_max) {
+	void         **swap = NULL;
 	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin);
 	cache_bin_sz_t ntop = ncached - nremote, cnt = 0;
 	assert(ntop > 0 && ntop < ncached);
@@ -320,13 +323,15 @@ tcache_gc_small_bin_shuffle(cache_bin_t *cache_bin, cache_bin_sz_t nremote,
 			 */
 			if (swap != NULL) {
 				assert(swap < cur);
-				assert(tcache_gc_is_addr_remote(*swap, addr_min, addr_max));
+				assert(tcache_gc_is_addr_remote(
+				    *swap, addr_min, addr_max));
 				void *tmp = *cur;
 				*cur = *swap;
 				*swap = tmp;
 				swap++;
 				assert(swap <= cur);
-				assert(tcache_gc_is_addr_remote(*swap, addr_min, addr_max));
+				assert(tcache_gc_is_addr_remote(
+				    *swap, addr_min, addr_max));
 			}
 			continue;
 		} else if (swap == NULL) {
@@ -344,8 +349,8 @@ tcache_gc_small_bin_shuffle(cache_bin_t *cache_bin, cache_bin_sz_t nremote,
 			break;
 		}
 		if (!tcache_gc_is_addr_remote(*cur, addr_min, addr_max)) {
-			assert(tcache_gc_is_addr_remote(*(head + cnt), addr_min,
-			    addr_max));
+			assert(tcache_gc_is_addr_remote(
+			    *(head + cnt), addr_min, addr_max));
 			void *tmp = *cur;
 			*cur = *(head + cnt);
 			*(head + cnt) = tmp;
@@ -356,15 +361,17 @@ tcache_gc_small_bin_shuffle(cache_bin_t *cache_bin, cache_bin_sz_t nremote,
 	/* Sanity check to make sure the shuffle is done correctly. */
 	for (void **cur = head; cur < head + ncached; cur++) {
 		assert(*cur != NULL);
-		assert(((cur < head + ntop) && !tcache_gc_is_addr_remote(
-		    *cur, addr_min, addr_max)) || ((cur >= head + ntop) &&
-		    tcache_gc_is_addr_remote(*cur, addr_min, addr_max)));
+		assert(
+		    ((cur < head + ntop)
+		        && !tcache_gc_is_addr_remote(*cur, addr_min, addr_max))
+		    || ((cur >= head + ntop)
+		        && tcache_gc_is_addr_remote(*cur, addr_min, addr_max)));
 	}
 }
 
 static bool
-tcache_gc_small(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
-    szind_t szind) {
+tcache_gc_small(
+    tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache, szind_t szind) {
 	/*
 	 * Aim to flush 3/4 of items below low-water, with remote pointers being
 	 * prioritized for flushing.
@@ -403,24 +410,26 @@ tcache_gc_small(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 		if (nflush < tcache_slow->bin_flush_delay_items[szind]) {
 			/* Workaround for a conversion warning. */
 			uint8_t nflush_uint8 = (uint8_t)nflush;
-			assert(sizeof(tcache_slow->bin_flush_delay_items[0]) ==
-			    sizeof(nflush_uint8));
-			tcache_slow->bin_flush_delay_items[szind] -= nflush_uint8;
+			assert(sizeof(tcache_slow->bin_flush_delay_items[0])
+			    == sizeof(nflush_uint8));
+			tcache_slow->bin_flush_delay_items[szind] -=
+			    nflush_uint8;
 			return false;
 		}
 
-		tcache_slow->bin_flush_delay_items[szind]
-		    = tcache_gc_item_delay_compute(szind);
+		tcache_slow->bin_flush_delay_items[szind] =
+		    tcache_gc_item_delay_compute(szind);
 		goto label_flush;
 	}
 
 	/* Directly goto the flush path when the entire bin needs to be flushed. */
-	if ( nflush == ncached) {
+	if (nflush == ncached) {
 		goto label_flush;
 	}
 
 	/* Query arena binshard to get heuristic locality info. */
-	void *addr = tcache_gc_small_heuristic_addr_get(tsd, tcache_slow, szind);
+	void *addr = tcache_gc_small_heuristic_addr_get(
+	    tsd, tcache_slow, szind);
 	if (addr == NULL) {
 		goto label_flush;
 	}
@@ -429,9 +438,9 @@ tcache_gc_small(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	 * Use the queried addr above to get the number of remote ptrs in the
 	 * bin, and the min/max of the local addr range.
 	 */
-	uintptr_t addr_min, addr_max;
-	cache_bin_sz_t nremote = tcache_gc_small_nremote_get(cache_bin, addr,
-	    &addr_min, &addr_max, szind, nflush);
+	uintptr_t      addr_min, addr_max;
+	cache_bin_sz_t nremote = tcache_gc_small_nremote_get(
+	    cache_bin, addr, &addr_min, &addr_max, szind, nflush);
 
 	/*
 	 * Update the nflush to the larger value between the intended flush count
@@ -448,7 +457,7 @@ tcache_gc_small(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	 * also be flushed.
 	 */
 	assert(nflush < ncached || nremote == ncached);
-	if (nremote == 0 || nremote == ncached)	{
+	if (nremote == 0 || nremote == ncached) {
 		goto label_flush;
 	}
 
@@ -467,14 +476,14 @@ label_flush:
 		return false;
 	}
 	assert(nflush <= ncached);
-	tcache_bin_flush_small(tsd, tcache, cache_bin, szind,
-	    (unsigned)(ncached - nflush));
+	tcache_bin_flush_small(
+	    tsd, tcache, cache_bin, szind, (unsigned)(ncached - nflush));
 	return true;
 }
 
 static bool
-tcache_gc_large(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
-    szind_t szind) {
+tcache_gc_large(
+    tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache, szind_t szind) {
 	/*
 	 * Like the small GC, flush 3/4 of untouched items. However, simply flush
 	 * the bottom nflush items, without any locality check.
@@ -486,16 +495,16 @@ tcache_gc_large(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	if (low_water == 0) {
 		return false;
 	}
-	unsigned nrem = (unsigned)(cache_bin_ncached_get_local(cache_bin) -
-	    low_water + (low_water >> 2));
+	unsigned nrem = (unsigned)(cache_bin_ncached_get_local(cache_bin)
+	    - low_water + (low_water >> 2));
 	tcache_bin_flush_large(tsd, tcache, cache_bin, szind, nrem);
 	return true;
 }
 
 /* Try to gc one bin by szind, return true if there is item flushed. */
 static bool
-tcache_try_gc_bin(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
-    szind_t szind) {
+tcache_try_gc_bin(
+    tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache, szind_t szind) {
 	assert(tcache != NULL);
 	cache_bin_t *cache_bin = &tcache->bins[szind];
 	if (tcache_bin_disabled(szind, cache_bin, tcache_slow)) {
@@ -504,8 +513,8 @@ tcache_try_gc_bin(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 
 	bool is_small = (szind < SC_NBINS);
 	tcache_bin_flush_stashed(tsd, tcache, cache_bin, szind, is_small);
-	bool ret = is_small ? tcache_gc_small(tsd, tcache_slow, tcache, szind) :
-	    tcache_gc_large(tsd, tcache_slow, tcache, szind);
+	bool ret = is_small ? tcache_gc_small(tsd, tcache_slow, tcache, szind)
+	                    : tcache_gc_large(tsd, tcache_slow, tcache, szind);
 	cache_bin_low_water_set(cache_bin);
 	return ret;
 }
@@ -536,8 +545,8 @@ tcache_gc_event(tsd_t *tsd) {
 	nstime_update(&now);
 	assert(nstime_compare(&now, &tcache_slow->last_gc_time) >= 0);
 
-	if (nstime_ns(&now) - nstime_ns(&tcache_slow->last_gc_time) <
-	    TCACHE_GC_INTERVAL_NS) {
+	if (nstime_ns(&now) - nstime_ns(&tcache_slow->last_gc_time)
+	    < TCACHE_GC_INTERVAL_NS) {
 		// time interval is too short, skip this event.
 		return;
 	}
@@ -546,13 +555,15 @@ tcache_gc_event(tsd_t *tsd) {
 
 	unsigned gc_small_nbins = 0, gc_large_nbins = 0;
 	unsigned tcache_nbins = tcache_nbins_get(tcache_slow);
-	unsigned small_nbins = tcache_nbins > SC_NBINS ? SC_NBINS : tcache_nbins;
-	szind_t szind_small = tcache_slow->next_gc_bin_small;
-	szind_t szind_large = tcache_slow->next_gc_bin_large;
+	unsigned small_nbins = tcache_nbins > SC_NBINS ? SC_NBINS
+	                                               : tcache_nbins;
+	szind_t  szind_small = tcache_slow->next_gc_bin_small;
+	szind_t  szind_large = tcache_slow->next_gc_bin_large;
 
 	/* Flush at most TCACHE_GC_SMALL_NBINS_MAX small bins at a time. */
-	for (unsigned i = 0; i < small_nbins && gc_small_nbins <
-	    TCACHE_GC_SMALL_NBINS_MAX; i++) {
+	for (unsigned i = 0;
+	     i < small_nbins && gc_small_nbins < TCACHE_GC_SMALL_NBINS_MAX;
+	     i++) {
 		assert(szind_small < SC_NBINS);
 		if (tcache_try_gc_bin(tsd, tcache_slow, tcache, szind_small)) {
 			gc_small_nbins++;
@@ -568,8 +579,9 @@ tcache_gc_event(tsd_t *tsd) {
 	}
 
 	/* Flush at most TCACHE_GC_LARGE_NBINS_MAX large bins at a time. */
-	for (unsigned i = SC_NBINS; i < tcache_nbins && gc_large_nbins <
-	    TCACHE_GC_LARGE_NBINS_MAX; i++) {
+	for (unsigned i = SC_NBINS;
+	     i < tcache_nbins && gc_large_nbins < TCACHE_GC_LARGE_NBINS_MAX;
+	     i++) {
 		assert(szind_large >= SC_NBINS && szind_large < tcache_nbins);
 		if (tcache_try_gc_bin(tsd, tcache_slow, tcache, szind_large)) {
 			gc_large_nbins++;
@@ -582,11 +594,10 @@ tcache_gc_event(tsd_t *tsd) {
 }
 
 void *
-tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena,
-    tcache_t *tcache, cache_bin_t *cache_bin, szind_t binind,
-    bool *tcache_success) {
+tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
+    cache_bin_t *cache_bin, szind_t binind, bool *tcache_success) {
 	tcache_slow_t *tcache_slow = tcache->tcache_slow;
-	void *ret;
+	void          *ret;
 
 	assert(tcache_slow->arena != NULL);
 	assert(!tcache_bin_disabled(binind, cache_bin, tcache_slow));
@@ -596,8 +607,9 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena,
 		nfill = 1;
 	}
 	arena_cache_bin_fill_small(tsdn, arena, cache_bin, binind,
-	    /* nfill_min */ opt_experimental_tcache_gc ?
-	    ((nfill >> 1) + 1) : nfill, /* nfill_max */ nfill);
+	    /* nfill_min */
+	        opt_experimental_tcache_gc ? ((nfill >> 1) + 1) : nfill,
+	    /* nfill_max */ nfill);
 	tcache_slow->bin_refilled[binind] = true;
 	tcache_nfill_small_burst_prepare(tcache_slow, binind);
 	ret = cache_bin_alloc(cache_bin, tcache_success);
@@ -612,8 +624,8 @@ tcache_bin_flush_ptr_getter(void *arr_ctx, size_t ind) {
 }
 
 static void
-tcache_bin_flush_metadata_visitor(void *szind_sum_ctx,
-    emap_full_alloc_ctx_t *alloc_ctx) {
+tcache_bin_flush_metadata_visitor(
+    void *szind_sum_ctx, emap_full_alloc_ctx_t *alloc_ctx) {
 	size_t *szind_sum = (size_t *)szind_sum_ctx;
 	*szind_sum -= alloc_ctx->szind;
 	util_prefetch_write_range(alloc_ctx->edata, sizeof(edata_t));
@@ -640,7 +652,6 @@ tcache_bin_flush_size_check_fail(cache_bin_ptr_array_t *arr, szind_t szind,
 static void
 tcache_bin_flush_edatas_lookup(tsd_t *tsd, cache_bin_ptr_array_t *arr,
     szind_t binind, size_t nflush, emap_batch_lookup_result_t *edatas) {
-
 	/*
 	 * This gets compiled away when config_opt_safety_checks is false.
 	 * Checks for sized deallocation bugs, failing early rather than
@@ -649,16 +660,16 @@ tcache_bin_flush_edatas_lookup(tsd_t *tsd, cache_bin_ptr_array_t *arr,
 	size_t szind_sum = binind * nflush;
 	emap_edata_lookup_batch(tsd, &arena_emap_global, nflush,
 	    &tcache_bin_flush_ptr_getter, (void *)arr,
-	    &tcache_bin_flush_metadata_visitor, (void *)&szind_sum,
-	    edatas);
+	    &tcache_bin_flush_metadata_visitor, (void *)&szind_sum, edatas);
 	if (config_opt_safety_checks && unlikely(szind_sum != 0)) {
 		tcache_bin_flush_size_check_fail(arr, binind, nflush, edatas);
 	}
 }
 
 JEMALLOC_ALWAYS_INLINE void
-tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
-    szind_t binind, cache_bin_ptr_array_t *ptrs, unsigned nflush) {
+tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache,
+    cache_bin_t *cache_bin, szind_t binind, cache_bin_ptr_array_t *ptrs,
+    unsigned nflush) {
 	tcache_slow_t *tcache_slow = tcache->tcache_slow;
 	/*
 	 * A couple lookup calls take tsdn; declare it once for convenience
@@ -669,7 +680,8 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin
 	assert(binind < SC_NBINS);
 	arena_t *tcache_arena = tcache_slow->arena;
 	assert(tcache_arena != NULL);
-	unsigned tcache_binshard = tsd_binshardsp_get(tsdn_tsd(tsdn))->binshard[binind];
+	unsigned tcache_binshard =
+	    tsd_binshardsp_get(tsdn_tsd(tsdn))->binshard[binind];
 
 	/*
 	 * Variable length array must have > 0 length; the last element is never
@@ -727,8 +739,7 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin
 		arena_t *cur_arena = arena_get(tsdn, cur_arena_ind, false);
 
 		unsigned cur_binshard = edata_binshard_get(cur_edata);
-		bin_t *cur_bin = arena_get_bin(cur_arena, binind,
-		    cur_binshard);
+		bin_t *cur_bin = arena_get_bin(cur_arena, binind, cur_binshard);
 		assert(cur_binshard < bin_infos[binind].n_shards);
 
 		/*
@@ -737,16 +748,18 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin
 		 */
 		flush_start++;
 		for (unsigned i = flush_start; i < nflush; i++) {
-			void *ptr = ptrs->ptr[i];
+			void    *ptr = ptrs->ptr[i];
 			edata_t *edata = item_edata[i].edata;
 			assert(ptr != NULL && edata != NULL);
-			assert((uintptr_t)ptr >= (uintptr_t)edata_addr_get(edata));
-			assert((uintptr_t)ptr < (uintptr_t)edata_past_get(edata));
+			assert(
+			    (uintptr_t)ptr >= (uintptr_t)edata_addr_get(edata));
+			assert(
+			    (uintptr_t)ptr < (uintptr_t)edata_past_get(edata));
 			if (edata_arena_ind_get(edata) == cur_arena_ind
 			    && edata_binshard_get(edata) == cur_binshard) {
 				/* Swap the edatas. */
-				emap_batch_lookup_result_t temp_edata
-				    = item_edata[flush_start];
+				emap_batch_lookup_result_t temp_edata =
+				    item_edata[flush_start];
 				item_edata[flush_start] = item_edata[i];
 				item_edata[i] = temp_edata;
 				/* Swap the pointers */
@@ -759,7 +772,7 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin
 		/* Make sure we implemented partitioning correctly. */
 		if (config_debug) {
 			for (unsigned i = prev_flush_start; i < flush_start;
-			    i++) {
+			     i++) {
 				edata_t *edata = item_edata[i].edata;
 				unsigned arena_ind = edata_arena_ind_get(edata);
 				assert(arena_ind == cur_arena_ind);
@@ -768,10 +781,10 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin
 			}
 			for (unsigned i = flush_start; i < nflush; i++) {
 				edata_t *edata = item_edata[i].edata;
-				assert(edata_arena_ind_get(edata)
-				    != cur_arena_ind
+				assert(
+				    edata_arena_ind_get(edata) != cur_arena_ind
 				    || edata_binshard_get(edata)
-				    != cur_binshard);
+				        != cur_binshard);
 			}
 		}
 
@@ -817,7 +830,7 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin
 		bool home_binshard = (cur_arena == tcache_arena
 		    && cur_binshard == tcache_binshard);
 		bool can_batch = (flush_start - prev_flush_start
-		    <= opt_bin_info_remote_free_max_batch)
+		                     <= opt_bin_info_remote_free_max_batch)
 		    && !home_binshard && bin_is_batched;
 
 		/*
@@ -831,8 +844,8 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin
 			locked = !malloc_mutex_trylock(tsdn, &cur_bin->lock);
 		}
 		if (can_batch && !locked) {
-			bin_with_batch_t *batched_bin =
-			    (bin_with_batch_t *)cur_bin;
+			bin_with_batch_t *batched_bin = (bin_with_batch_t *)
+			    cur_bin;
 			size_t push_idx = batcher_push_begin(tsdn,
 			    &batched_bin->remote_frees,
 			    flush_start - prev_flush_start);
@@ -840,19 +853,19 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin
 
 			if (push_idx != BATCHER_NO_IDX) {
 				batched = true;
-				unsigned nbatched
-				    = flush_start - prev_flush_start;
+				unsigned nbatched = flush_start
+				    - prev_flush_start;
 				for (unsigned i = 0; i < nbatched; i++) {
 					unsigned src_ind = prev_flush_start + i;
-					batched_bin->remote_free_data[
-					    push_idx + i].ptr
-						= ptrs->ptr[src_ind];
-					batched_bin->remote_free_data[
-					    push_idx + i].slab
-						= item_edata[src_ind].edata;
+					batched_bin
+					    ->remote_free_data[push_idx + i]
+					    .ptr = ptrs->ptr[src_ind];
+					batched_bin
+					    ->remote_free_data[push_idx + i]
+					    .slab = item_edata[src_ind].edata;
 				}
-				batcher_push_end(tsdn,
-				    &batched_bin->remote_frees);
+				batcher_push_end(
+				    tsdn, &batched_bin->remote_frees);
 			} else {
 				batch_failed = true;
 			}
@@ -887,16 +900,17 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin
 				cache_bin->tstats.nrequests = 0;
 			}
 			unsigned preallocated_slabs = nflush;
-			unsigned ndalloc_slabs = arena_bin_batch_get_ndalloc_slabs(
-			    preallocated_slabs);
+			unsigned ndalloc_slabs =
+			    arena_bin_batch_get_ndalloc_slabs(
+			        preallocated_slabs);
 
 			/* Next flush objects our own objects. */
 			/* Init only to avoid used-uninitialized warning. */
 			arena_dalloc_bin_locked_info_t dalloc_bin_info = {0};
 			arena_dalloc_bin_locked_begin(&dalloc_bin_info, binind);
 			for (unsigned i = prev_flush_start; i < flush_start;
-			    i++) {
-				void *ptr = ptrs->ptr[i];
+			     i++) {
+				void    *ptr = ptrs->ptr[i];
 				edata_t *edata = item_edata[i].edata;
 				arena_dalloc_bin_locked_step(tsdn, cur_arena,
 				    cur_bin, &dalloc_bin_info, binind, edata,
@@ -910,16 +924,16 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin
 			if (bin_is_batched) {
 				arena_bin_flush_batch_impl(tsdn, cur_arena,
 				    cur_bin, &dalloc_bin_info, binind,
-				    dalloc_slabs, ndalloc_slabs,
-				    &dalloc_count, &dalloc_slabs_extra);
+				    dalloc_slabs, ndalloc_slabs, &dalloc_count,
+				    &dalloc_slabs_extra);
 			}
 
-			arena_dalloc_bin_locked_finish(tsdn, cur_arena, cur_bin,
-			    &dalloc_bin_info);
+			arena_dalloc_bin_locked_finish(
+			    tsdn, cur_arena, cur_bin, &dalloc_bin_info);
 			malloc_mutex_unlock(tsdn, &cur_bin->lock);
 		}
-		arena_decay_ticks(tsdn, cur_arena,
-		    flush_start - prev_flush_start);
+		arena_decay_ticks(
+		    tsdn, cur_arena, flush_start - prev_flush_start);
 	}
 
 	/* Handle all deferred slab dalloc. */
@@ -934,24 +948,24 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin
 	}
 
 	if (config_stats && !merged_stats) {
-			/*
+		/*
 			 * The flush loop didn't happen to flush to this
 			 * thread's arena, so the stats didn't get merged.
 			 * Manually do so now.
 			 */
-			bin_t *bin = arena_bin_choose(tsdn, tcache_arena,
-			    binind, NULL);
-			malloc_mutex_lock(tsdn, &bin->lock);
-			bin->stats.nflushes++;
-			bin->stats.nrequests += cache_bin->tstats.nrequests;
-			cache_bin->tstats.nrequests = 0;
-			malloc_mutex_unlock(tsdn, &bin->lock);
+		bin_t *bin = arena_bin_choose(tsdn, tcache_arena, binind, NULL);
+		malloc_mutex_lock(tsdn, &bin->lock);
+		bin->stats.nflushes++;
+		bin->stats.nrequests += cache_bin->tstats.nrequests;
+		cache_bin->tstats.nrequests = 0;
+		malloc_mutex_unlock(tsdn, &bin->lock);
 	}
 }
 
 JEMALLOC_ALWAYS_INLINE void
-tcache_bin_flush_impl_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
-    szind_t binind, cache_bin_ptr_array_t *ptrs, unsigned nflush) {
+tcache_bin_flush_impl_large(tsd_t *tsd, tcache_t *tcache,
+    cache_bin_t *cache_bin, szind_t binind, cache_bin_ptr_array_t *ptrs,
+    unsigned nflush) {
 	tcache_slow_t *tcache_slow = tcache->tcache_slow;
 	/*
 	 * A couple lookup calls take tsdn; declare it once for convenience
@@ -1009,8 +1023,7 @@ tcache_bin_flush_impl_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin
 			assert(ptr != NULL && edata != NULL);
 
 			if (edata_arena_ind_get(edata) == cur_arena_ind) {
-				large_dalloc_prep_locked(tsdn,
-				    edata);
+				large_dalloc_prep_locked(tsdn, edata);
 			}
 		}
 		if (!arena_is_auto(cur_arena)) {
@@ -1035,8 +1048,8 @@ tcache_bin_flush_impl_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin
 				ndeferred++;
 				continue;
 			}
-			if (large_dalloc_safety_checks(edata, ptr,
-			    sz_index2size(binind))) {
+			if (large_dalloc_safety_checks(
+			        edata, ptr, sz_index2size(binind))) {
 				/* See the comment in isfree. */
 				continue;
 			}
@@ -1048,8 +1061,7 @@ tcache_bin_flush_impl_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin
 
 	if (config_stats && !merged_stats) {
 		arena_stats_large_flush_nrequests_add(tsdn,
-		    &tcache_arena->stats, binind,
-		    cache_bin->tstats.nrequests);
+		    &tcache_arena->stats, binind, cache_bin->tstats.nrequests);
 		cache_bin->tstats.nrequests = 0;
 	}
 }
@@ -1058,7 +1070,7 @@ JEMALLOC_ALWAYS_INLINE void
 tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
     szind_t binind, cache_bin_ptr_array_t *ptrs, unsigned nflush, bool small) {
 	assert(ptrs != NULL && ptrs->ptr != NULL);
-	unsigned nflush_batch, nflushed = 0;
+	unsigned              nflush_batch, nflushed = 0;
 	cache_bin_ptr_array_t ptrs_batch;
 	do {
 		nflush_batch = nflush - nflushed;
@@ -1078,11 +1090,11 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 		 * '...' is morally equivalent, the code itself needs slight tweaks.
 		 */
 		if (small) {
-			tcache_bin_flush_impl_small(tsd, tcache, cache_bin, binind,
-			    &ptrs_batch, nflush_batch);
+			tcache_bin_flush_impl_small(tsd, tcache, cache_bin,
+			    binind, &ptrs_batch, nflush_batch);
 		} else {
-			tcache_bin_flush_impl_large(tsd, tcache, cache_bin, binind,
-			    &ptrs_batch, nflush_batch);
+			tcache_bin_flush_impl_large(tsd, tcache, cache_bin,
+			    binind, &ptrs_batch, nflush_batch);
 		}
 		nflushed += nflush_batch;
 	} while (nflushed < nflush);
@@ -1117,8 +1129,8 @@ tcache_bin_flush_bottom(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nflush);
 	cache_bin_init_ptr_array_for_flush(cache_bin, &ptrs, nflush);
 
-	tcache_bin_flush_impl(tsd, tcache, cache_bin, binind, &ptrs, nflush,
-	    small);
+	tcache_bin_flush_impl(
+	    tsd, tcache, cache_bin, binind, &ptrs, nflush, small);
 
 	cache_bin_finish_flush(cache_bin, &ptrs, nflush);
 }
@@ -1157,7 +1169,7 @@ tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 	 * items remain unchanged -- the stashed items reside on the other end
 	 * of the stack.  Checking the stack head and ncached to verify.
 	 */
-	void *head_content = *cache_bin->stack_head;
+	void          *head_content = *cache_bin->stack_head;
 	cache_bin_sz_t orig_cached = cache_bin_ncached_get_local(cache_bin);
 
 	cache_bin_sz_t nstashed = cache_bin_nstashed_get_local(cache_bin);
@@ -1167,11 +1179,11 @@ tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 	}
 
 	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nstashed);
-	cache_bin_init_ptr_array_for_stashed(cache_bin, binind, &ptrs,
-	    nstashed);
+	cache_bin_init_ptr_array_for_stashed(
+	    cache_bin, binind, &ptrs, nstashed);
 	san_check_stashed_ptrs(ptrs.ptr, nstashed, sz_index2size(binind));
-	tcache_bin_flush_impl(tsd, tcache, cache_bin, binind, &ptrs, nstashed,
-	    is_small);
+	tcache_bin_flush_impl(
+	    tsd, tcache, cache_bin, binind, &ptrs, nstashed, is_small);
 	cache_bin_finish_flush_stashed(cache_bin);
 
 	assert(cache_bin_nstashed_get_local(cache_bin) == 0);
@@ -1190,8 +1202,8 @@ tcache_get_default_ncached_max(void) {
 }
 
 bool
-tcache_bin_ncached_max_read(tsd_t *tsd, size_t bin_size,
-    cache_bin_sz_t *ncached_max) {
+tcache_bin_ncached_max_read(
+    tsd_t *tsd, size_t bin_size, cache_bin_sz_t *ncached_max) {
 	if (bin_size > TCACHE_MAXCLASS_LIMIT) {
 		return true;
 	}
@@ -1206,8 +1218,9 @@ tcache_bin_ncached_max_read(tsd_t *tsd, size_t bin_size,
 	szind_t bin_ind = sz_size2index(bin_size);
 
 	cache_bin_t *bin = &tcache->bins[bin_ind];
-	*ncached_max = tcache_bin_disabled(bin_ind, bin, tcache->tcache_slow) ?
-	    0: cache_bin_ncached_max_get(bin);
+	*ncached_max = tcache_bin_disabled(bin_ind, bin, tcache->tcache_slow)
+	    ? 0
+	    : cache_bin_ncached_max_get(bin);
 	return false;
 }
 
@@ -1233,17 +1246,17 @@ tcache_arena_associate(tsdn_t *tsdn, tcache_slow_t *tcache_slow,
 }
 
 static void
-tcache_arena_dissociate(tsdn_t *tsdn, tcache_slow_t *tcache_slow,
-    tcache_t *tcache) {
+tcache_arena_dissociate(
+    tsdn_t *tsdn, tcache_slow_t *tcache_slow, tcache_t *tcache) {
 	arena_t *arena = tcache_slow->arena;
 	assert(arena != NULL);
 	if (config_stats) {
 		/* Unlink from list of extant tcaches. */
 		malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
 		if (config_debug) {
-			bool in_ql = false;
+			bool           in_ql = false;
 			tcache_slow_t *iter;
-			ql_foreach(iter, &arena->tcache_ql, link) {
+			ql_foreach (iter, &arena->tcache_ql, link) {
 				if (iter == tcache_slow) {
 					in_ql = true;
 					break;
@@ -1276,8 +1289,8 @@ tcache_default_settings_init(tcache_slow_t *tcache_slow) {
 }
 
 static void
-tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
-    void *mem, const cache_bin_info_t *tcache_bin_info) {
+tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache, void *mem,
+    const cache_bin_info_t *tcache_bin_info) {
 	tcache->tcache_slow = tcache_slow;
 	tcache_slow->tcache = tcache;
 
@@ -1296,23 +1309,22 @@ tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	 * worrying about which ones are disabled.
 	 */
 	unsigned tcache_nbins = tcache_nbins_get(tcache_slow);
-	size_t cur_offset = 0;
-	cache_bin_preincrement(tcache_bin_info, tcache_nbins, mem,
-	    &cur_offset);
+	size_t   cur_offset = 0;
+	cache_bin_preincrement(tcache_bin_info, tcache_nbins, mem, &cur_offset);
 	for (unsigned i = 0; i < tcache_nbins; i++) {
 		if (i < SC_NBINS) {
 			tcache_bin_fill_ctl_init(tcache_slow, i);
 			tcache_slow->bin_refilled[i] = false;
-			tcache_slow->bin_flush_delay_items[i]
-			    = tcache_gc_item_delay_compute(i);
+			tcache_slow->bin_flush_delay_items[i] =
+			    tcache_gc_item_delay_compute(i);
 		}
 		cache_bin_t *cache_bin = &tcache->bins[i];
 		if (tcache_bin_info[i].ncached_max > 0) {
-			cache_bin_init(cache_bin, &tcache_bin_info[i], mem,
-			    &cur_offset);
+			cache_bin_init(
+			    cache_bin, &tcache_bin_info[i], mem, &cur_offset);
 		} else {
-			cache_bin_init_disabled(cache_bin,
-			    tcache_bin_info[i].ncached_max);
+			cache_bin_init_disabled(
+			    cache_bin, tcache_bin_info[i].ncached_max);
 		}
 	}
 	/*
@@ -1323,8 +1335,8 @@ tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	 */
 	for (unsigned i = tcache_nbins; i < TCACHE_NBINS_MAX; i++) {
 		cache_bin_t *cache_bin = &tcache->bins[i];
-		cache_bin_init_disabled(cache_bin,
-		    tcache_bin_info[i].ncached_max);
+		cache_bin_init_disabled(
+		    cache_bin, tcache_bin_info[i].ncached_max);
 		assert(tcache_bin_disabled(i, cache_bin, tcache->tcache_slow));
 	}
 
@@ -1332,8 +1344,8 @@ tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	if (config_debug) {
 		/* Sanity check that the whole stack is used. */
 		size_t size, alignment;
-		cache_bin_info_compute_alloc(tcache_bin_info, tcache_nbins,
-		    &size, &alignment);
+		cache_bin_info_compute_alloc(
+		    tcache_bin_info, tcache_nbins, &size, &alignment);
 		assert(cur_offset == size);
 	}
 }
@@ -1402,26 +1414,26 @@ tcache_bin_info_compute(cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]) {
 	 * than tcache_nbins, no items will be cached.
 	 */
 	for (szind_t i = 0; i < TCACHE_NBINS_MAX; i++) {
-		unsigned ncached_max = tcache_get_default_ncached_max_set(i) ?
-		    (unsigned)tcache_get_default_ncached_max()[i].ncached_max:
-		    tcache_ncached_max_compute(i);
+		unsigned ncached_max = tcache_get_default_ncached_max_set(i)
+		    ? (unsigned)tcache_get_default_ncached_max()[i].ncached_max
+		    : tcache_ncached_max_compute(i);
 		assert(ncached_max <= CACHE_BIN_NCACHED_MAX);
-		cache_bin_info_init(&tcache_bin_info[i],
-		    (cache_bin_sz_t)ncached_max);
+		cache_bin_info_init(
+		    &tcache_bin_info[i], (cache_bin_sz_t)ncached_max);
 	}
 }
 
 static bool
-tsd_tcache_data_init_impl(tsd_t *tsd, arena_t *arena,
-    const cache_bin_info_t *tcache_bin_info) {
+tsd_tcache_data_init_impl(
+    tsd_t *tsd, arena_t *arena, const cache_bin_info_t *tcache_bin_info) {
 	tcache_slow_t *tcache_slow = tsd_tcache_slowp_get_unsafe(tsd);
-	tcache_t *tcache = tsd_tcachep_get_unsafe(tsd);
+	tcache_t      *tcache = tsd_tcachep_get_unsafe(tsd);
 
 	assert(cache_bin_still_zero_initialized(&tcache->bins[0]));
 	unsigned tcache_nbins = tcache_nbins_get(tcache_slow);
-	size_t size, alignment;
-	cache_bin_info_compute_alloc(tcache_bin_info, tcache_nbins,
-	    &size, &alignment);
+	size_t   size, alignment;
+	cache_bin_info_compute_alloc(
+	    tcache_bin_info, tcache_nbins, &size, &alignment);
 
 	void *mem;
 	if (cache_bin_stack_use_thp()) {
@@ -1450,16 +1462,16 @@ tsd_tcache_data_init_impl(tsd_t *tsd, arena_t *arena,
 	if (!malloc_initialized()) {
 		/* If in initialization, assign to a0. */
 		arena = arena_get(tsd_tsdn(tsd), 0, false);
-		tcache_arena_associate(tsd_tsdn(tsd), tcache_slow, tcache,
-		    arena);
+		tcache_arena_associate(
+		    tsd_tsdn(tsd), tcache_slow, tcache, arena);
 	} else {
 		if (arena == NULL) {
 			arena = arena_choose(tsd, NULL);
 		}
 		/* This may happen if thread.tcache.enabled is used. */
 		if (tcache_slow->arena == NULL) {
-			tcache_arena_associate(tsd_tsdn(tsd), tcache_slow,
-			    tcache, arena);
+			tcache_arena_associate(
+			    tsd_tsdn(tsd), tcache_slow, tcache, arena);
 		}
 	}
 	assert(arena == tcache_slow->arena);
@@ -1484,30 +1496,29 @@ tcache_create_explicit(tsd_t *tsd) {
 	 * the cache bins have the requested alignment.
 	 */
 	unsigned tcache_nbins = global_do_not_change_tcache_nbins;
-	size_t tcache_size, alignment;
+	size_t   tcache_size, alignment;
 	cache_bin_info_compute_alloc(tcache_get_default_ncached_max(),
 	    tcache_nbins, &tcache_size, &alignment);
 
-	size_t size = tcache_size + sizeof(tcache_t)
-	    + sizeof(tcache_slow_t);
+	size_t size = tcache_size + sizeof(tcache_t) + sizeof(tcache_slow_t);
 	/* Naturally align the pointer stacks. */
 	size = PTR_CEILING(size);
 	size = sz_sa2u(size, alignment);
 
-	void *mem = ipallocztm(tsd_tsdn(tsd), size, alignment,
-	    true, NULL, true, arena_get(TSDN_NULL, 0, true));
+	void *mem = ipallocztm(tsd_tsdn(tsd), size, alignment, true, NULL, true,
+	    arena_get(TSDN_NULL, 0, true));
 	if (mem == NULL) {
 		return NULL;
 	}
-	tcache_t *tcache = (void *)((byte_t *)mem + tcache_size);
-	tcache_slow_t *tcache_slow =
-	    (void *)((byte_t *)mem + tcache_size + sizeof(tcache_t));
+	tcache_t      *tcache = (void *)((byte_t *)mem + tcache_size);
+	tcache_slow_t *tcache_slow = (void *)((byte_t *)mem + tcache_size
+	    + sizeof(tcache_t));
 	tcache_default_settings_init(tcache_slow);
-	tcache_init(tsd, tcache_slow, tcache, mem,
-	    tcache_get_default_ncached_max());
+	tcache_init(
+	    tsd, tcache_slow, tcache, mem, tcache_get_default_ncached_max());
 
-	tcache_arena_associate(tsd_tsdn(tsd), tcache_slow, tcache,
-	    arena_ichoose(tsd, NULL));
+	tcache_arena_associate(
+	    tsd_tsdn(tsd), tcache_slow, tcache, arena_ichoose(tsd, NULL));
 
 	return tcache;
 }
@@ -1525,8 +1536,8 @@ tsd_tcache_enabled_data_init(tsd_t *tsd) {
 
 	if (opt_tcache) {
 		/* Trigger tcache init. */
-		tsd_tcache_data_init(tsd, NULL,
-		    tcache_get_default_ncached_max());
+		tsd_tcache_data_init(
+		    tsd, NULL, tcache_get_default_ncached_max());
 	}
 
 	return false;
@@ -1537,8 +1548,8 @@ tcache_enabled_set(tsd_t *tsd, bool enabled) {
 	bool was_enabled = tsd_tcache_enabled_get(tsd);
 
 	if (!was_enabled && enabled) {
-		tsd_tcache_data_init(tsd, NULL,
-		    tcache_get_default_ncached_max());
+		tsd_tcache_data_init(
+		    tsd, NULL, tcache_get_default_ncached_max());
 	} else if (was_enabled && !enabled) {
 		tcache_cleanup(tsd);
 	}
@@ -1551,12 +1562,12 @@ void
 thread_tcache_max_set(tsd_t *tsd, size_t tcache_max) {
 	assert(tcache_max <= TCACHE_MAXCLASS_LIMIT);
 	assert(tcache_max == sz_s2u(tcache_max));
-	tcache_t *tcache = tsd_tcachep_get(tsd);
-	tcache_slow_t *tcache_slow = tcache->tcache_slow;
+	tcache_t        *tcache = tsd_tcachep_get(tsd);
+	tcache_slow_t   *tcache_slow = tcache->tcache_slow;
 	cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX] = {{0}};
 	assert(tcache != NULL && tcache_slow != NULL);
 
-	bool enabled = tcache_available(tsd);
+	bool                    enabled = tcache_available(tsd);
 	arena_t *assigned_arena JEMALLOC_CLANG_ANALYZER_SILENCE_INIT(NULL);
 	if (enabled) {
 		assigned_arena = tcache_slow->arena;
@@ -1587,16 +1598,16 @@ tcache_bin_info_settings_parse(const char *bin_settings_segment_cur,
 	do {
 		size_t size_start, size_end;
 		size_t ncached_max;
-		bool err = multi_setting_parse_next(&bin_settings_segment_cur,
-		    &len_left, &size_start, &size_end, &ncached_max);
+		bool   err = multi_setting_parse_next(&bin_settings_segment_cur,
+		      &len_left, &size_start, &size_end, &ncached_max);
 		if (err) {
 			return true;
 		}
 		if (size_end > TCACHE_MAXCLASS_LIMIT) {
 			size_end = TCACHE_MAXCLASS_LIMIT;
 		}
-		if (size_start > TCACHE_MAXCLASS_LIMIT ||
-		    size_start > size_end) {
+		if (size_start > TCACHE_MAXCLASS_LIMIT
+		    || size_start > size_end) {
 			continue;
 		}
 		/* May get called before sz_init (during malloc_conf_init). */
@@ -1606,8 +1617,8 @@ tcache_bin_info_settings_parse(const char *bin_settings_segment_cur,
 			ncached_max = (size_t)CACHE_BIN_NCACHED_MAX;
 		}
 		for (szind_t i = bin_start; i <= bin_end; i++) {
-			cache_bin_info_init(&tcache_bin_info[i],
-			    (cache_bin_sz_t)ncached_max);
+			cache_bin_info_init(
+			    &tcache_bin_info[i], (cache_bin_sz_t)ncached_max);
 			if (bin_info_is_set != NULL) {
 				bin_info_is_set[i] = true;
 			}
@@ -1618,13 +1629,12 @@ tcache_bin_info_settings_parse(const char *bin_settings_segment_cur,
 }
 
 bool
-tcache_bin_info_default_init(const char *bin_settings_segment_cur,
-    size_t len_left) {
+tcache_bin_info_default_init(
+    const char *bin_settings_segment_cur, size_t len_left) {
 	return tcache_bin_info_settings_parse(bin_settings_segment_cur,
 	    len_left, opt_tcache_ncached_max, opt_tcache_ncached_max_set);
 }
 
-
 bool
 tcache_bins_ncached_max_write(tsd_t *tsd, char *settings, size_t len) {
 	assert(tcache_available(tsd));
@@ -1634,15 +1644,14 @@ tcache_bins_ncached_max_write(tsd_t *tsd, char *settings, size_t len) {
 	cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX];
 	tcache_bin_settings_backup(tcache, tcache_bin_info);
 
-	if(tcache_bin_info_settings_parse(settings, len, tcache_bin_info,
-	    NULL)) {
+	if (tcache_bin_info_settings_parse(
+	        settings, len, tcache_bin_info, NULL)) {
 		return true;
 	}
 
 	arena_t *assigned_arena = tcache->tcache_slow->arena;
 	tcache_cleanup(tsd);
-	tsd_tcache_data_init(tsd, assigned_arena,
-	    tcache_bin_info);
+	tsd_tcache_data_init(tsd, assigned_arena, tcache_bin_info);
 
 	return false;
 }
@@ -1698,11 +1707,11 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) {
 	 * tsd).  Manually trigger decay to avoid pathological cases.  Also
 	 * include arena 0 because the tcache array is allocated from it.
 	 */
-	arena_decay(tsd_tsdn(tsd), arena_get(tsd_tsdn(tsd), 0, false),
-	    false, false);
+	arena_decay(
+	    tsd_tsdn(tsd), arena_get(tsd_tsdn(tsd), 0, false), false, false);
 
-	if (arena_nthreads_get(arena, false) == 0 &&
-	    !background_thread_enabled()) {
+	if (arena_nthreads_get(arena, false) == 0
+	    && !background_thread_enabled()) {
 		/* Force purging when no threads assigned to the arena anymore. */
 		arena_decay(tsd_tsdn(tsd), arena,
 		    /* is_background_thread */ false, /* all */ true);
@@ -1760,7 +1769,7 @@ tcaches_create_prep(tsd_t *tsd, base_t *base) {
 
 	if (tcaches == NULL) {
 		tcaches = base_alloc(tsd_tsdn(tsd), base,
-		    sizeof(tcache_t *) * (MALLOCX_TCACHE_MAX+1), CACHELINE);
+		    sizeof(tcache_t *) * (MALLOCX_TCACHE_MAX + 1), CACHELINE);
 		if (tcaches == NULL) {
 			err = true;
 			goto label_return;
@@ -1851,7 +1860,7 @@ void
 tcaches_destroy(tsd_t *tsd, unsigned ind) {
 	malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
 	tcaches_t *elm = &tcaches[ind];
-	tcache_t *tcache = tcaches_elm_remove(tsd, elm, false);
+	tcache_t  *tcache = tcaches_elm_remove(tsd, elm, false);
 	elm->next = tcaches_avail;
 	tcaches_avail = elm;
 	malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
@@ -1875,7 +1884,7 @@ tcache_boot(tsdn_t *tsdn, base_t *base) {
 	tcache_bin_info_compute(opt_tcache_ncached_max);
 
 	if (malloc_mutex_init(&tcaches_mtx, "tcaches", WITNESS_RANK_TCACHES,
-	    malloc_mutex_rank_exclusive)) {
+	        malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 
@@ -1897,7 +1906,8 @@ tcache_postfork_child(tsdn_t *tsdn) {
 	malloc_mutex_postfork_child(tsdn, &tcaches_mtx);
 }
 
-void tcache_assert_initialized(tcache_t *tcache) {
+void
+tcache_assert_initialized(tcache_t *tcache) {
 	assert(!cache_bin_still_zero_initialized(&tcache->bins[0]));
 }
 
@@ -1908,8 +1918,8 @@ tcache_gc_enabled(void) {
 
 /* Handles alloc and dalloc the same way */
 te_base_cb_t tcache_gc_te_handler = {
-	.enabled = &tcache_gc_enabled,
-	.new_event_wait = &tcache_gc_new_event_wait,
-	.postponed_event_wait = &tcache_gc_postponed_event_wait,
-	.event_handler = &tcache_gc_event,
+    .enabled = &tcache_gc_enabled,
+    .new_event_wait = &tcache_gc_new_event_wait,
+    .postponed_event_wait = &tcache_gc_postponed_event_wait,
+    .event_handler = &tcache_gc_event,
 };
diff --git a/src/thread_event.c b/src/thread_event.c
index 496c16be..c59027ed 100644
--- a/src/thread_event.c
+++ b/src/thread_event.c
@@ -16,7 +16,8 @@ te_ctx_has_active_events(te_ctx_t *ctx) {
 		}
 	} else {
 		for (int i = 0; i < te_dalloc_count; ++i) {
-			if (te_enabled_yes == te_dalloc_handlers[i]->enabled()) {
+			if (te_enabled_yes
+			    == te_dalloc_handlers[i]->enabled()) {
 				return true;
 			}
 		}
@@ -26,12 +27,11 @@ te_ctx_has_active_events(te_ctx_t *ctx) {
 
 static uint64_t
 te_next_event_compute(tsd_t *tsd, bool is_alloc) {
-	te_base_cb_t **handlers = is_alloc ?
-	    te_alloc_handlers : te_dalloc_handlers;
-	uint64_t *waits = is_alloc ?
-	    tsd_te_datap_get_unsafe(tsd)->alloc_wait :
-	    tsd_te_datap_get_unsafe(tsd)->dalloc_wait;
-	int count = is_alloc ? te_alloc_count : te_dalloc_count;
+	te_base_cb_t **handlers = is_alloc ? te_alloc_handlers
+	                                   : te_dalloc_handlers;
+	uint64_t *waits = is_alloc ? tsd_te_datap_get_unsafe(tsd)->alloc_wait
+	                           : tsd_te_datap_get_unsafe(tsd)->dalloc_wait;
+	int       count = is_alloc ? te_alloc_count : te_dalloc_count;
 
 	uint64_t wait = TE_MAX_START_WAIT;
 
@@ -86,9 +86,9 @@ te_assert_invariants_impl(tsd_t *tsd, te_ctx_t *ctx) {
 	 * below is stronger than needed, but having an exactly accurate guard
 	 * is more complicated to implement.
 	 */
-	assert((!te_ctx_has_active_events(ctx) && last_event == 0U) ||
-	    interval == min_wait ||
-	    (interval < min_wait && interval == TE_MAX_INTERVAL));
+	assert((!te_ctx_has_active_events(ctx) && last_event == 0U)
+	    || interval == min_wait
+	    || (interval < min_wait && interval == TE_MAX_INTERVAL));
 }
 
 void
@@ -151,8 +151,9 @@ te_assert_invariants_debug(tsd_t *tsd) {
 static void
 te_ctx_next_event_fast_update(te_ctx_t *ctx) {
 	uint64_t next_event = te_ctx_next_event_get(ctx);
-	uint64_t next_event_fast = (next_event <= TE_NEXT_EVENT_FAST_MAX) ?
-	    next_event : 0U;
+	uint64_t next_event_fast = (next_event <= TE_NEXT_EVENT_FAST_MAX)
+	    ? next_event
+	    : 0U;
 	te_ctx_next_event_fast_set(ctx, next_event_fast);
 }
 
@@ -177,8 +178,7 @@ te_recompute_fast_threshold(tsd_t *tsd) {
 }
 
 static inline void
-te_adjust_thresholds_impl(tsd_t *tsd, te_ctx_t *ctx,
-    uint64_t wait) {
+te_adjust_thresholds_impl(tsd_t *tsd, te_ctx_t *ctx, uint64_t wait) {
 	/*
 	 * The next threshold based on future events can only be adjusted after
 	 * progressing the last_event counter (which is set to current).
@@ -186,23 +186,22 @@ te_adjust_thresholds_impl(tsd_t *tsd, te_ctx_t *ctx,
 	assert(te_ctx_current_bytes_get(ctx) == te_ctx_last_event_get(ctx));
 	assert(wait <= TE_MAX_START_WAIT);
 
-	uint64_t next_event = te_ctx_last_event_get(ctx) + (wait <=
-	    TE_MAX_INTERVAL ? wait : TE_MAX_INTERVAL);
+	uint64_t next_event = te_ctx_last_event_get(ctx)
+	    + (wait <= TE_MAX_INTERVAL ? wait : TE_MAX_INTERVAL);
 	te_ctx_next_event_set(tsd, ctx, next_event);
 }
 void
-te_adjust_thresholds_helper(tsd_t *tsd, te_ctx_t *ctx,
-    uint64_t wait) {
+te_adjust_thresholds_helper(tsd_t *tsd, te_ctx_t *ctx, uint64_t wait) {
 	te_adjust_thresholds_impl(tsd, ctx, wait);
 }
 
 static void
 te_init_waits(tsd_t *tsd, uint64_t *wait, bool is_alloc) {
-	te_base_cb_t **handlers = is_alloc ? te_alloc_handlers : te_dalloc_handlers;
-	uint64_t *waits = is_alloc ?
-	    tsd_te_datap_get_unsafe(tsd)->alloc_wait :
-	    tsd_te_datap_get_unsafe(tsd)->dalloc_wait;
-	int count = is_alloc ? te_alloc_count : te_dalloc_count;
+	te_base_cb_t **handlers = is_alloc ? te_alloc_handlers
+	                                   : te_dalloc_handlers;
+	uint64_t *waits = is_alloc ? tsd_te_datap_get_unsafe(tsd)->alloc_wait
+	                           : tsd_te_datap_get_unsafe(tsd)->dalloc_wait;
+	int       count = is_alloc ? te_alloc_count : te_dalloc_count;
 	for (int i = 0; i < count; i++) {
 		if (te_enabled_yes == handlers[i]->enabled()) {
 			uint64_t ev_wait = handlers[i]->new_event_wait(tsd);
@@ -216,25 +215,23 @@ te_init_waits(tsd_t *tsd, uint64_t *wait, bool is_alloc) {
 }
 
 static inline bool
-te_update_wait(tsd_t *tsd, uint64_t accumbytes, bool allow,
-	       uint64_t *ev_wait, uint64_t *wait, te_base_cb_t *handler,
-	       uint64_t new_wait) {
+te_update_wait(tsd_t *tsd, uint64_t accumbytes, bool allow, uint64_t *ev_wait,
+    uint64_t *wait, te_base_cb_t *handler, uint64_t new_wait) {
 	bool ret = false;
 	if (*ev_wait > accumbytes) {
-                *ev_wait -= accumbytes;
-        } else if (!allow) {
-                *ev_wait = handler->postponed_event_wait(tsd);
-        } else {
-                ret = true;
-                *ev_wait = new_wait == 0 ?
-		    handler->new_event_wait(tsd) :
-		    new_wait;
-        }
+		*ev_wait -= accumbytes;
+	} else if (!allow) {
+		*ev_wait = handler->postponed_event_wait(tsd);
+	} else {
+		ret = true;
+		*ev_wait = new_wait == 0 ? handler->new_event_wait(tsd)
+		                         : new_wait;
+	}
 
-        assert(*ev_wait > 0);
-        if (*ev_wait < *wait) {
-                *wait = *ev_wait;
-        }
+	assert(*ev_wait > 0);
+	if (*ev_wait < *wait) {
+		*wait = *ev_wait;
+	}
 	return ret;
 }
 
@@ -242,32 +239,32 @@ extern uint64_t stats_interval_accum_batch;
 /* Return number of handlers enqueued into to_trigger array */
 static inline size_t
 te_update_alloc_events(tsd_t *tsd, te_base_cb_t **to_trigger,
-		       uint64_t accumbytes, bool allow, uint64_t *wait) {
+    uint64_t accumbytes, bool allow, uint64_t *wait) {
 	/*
 	 * We do not loop and invoke the functions via interface because
 	 * of the perf cost.  This path is relatively hot, so we sacrifice
 	 * elegance for perf.
 	 */
-	size_t nto_trigger = 0;
+	size_t    nto_trigger = 0;
 	uint64_t *waits = tsd_te_datap_get_unsafe(tsd)->alloc_wait;
 	if (opt_tcache_gc_incr_bytes > 0) {
-		assert(te_enabled_yes ==
-		       te_alloc_handlers[te_alloc_tcache_gc]->enabled());
+		assert(te_enabled_yes
+		    == te_alloc_handlers[te_alloc_tcache_gc]->enabled());
 		if (te_update_wait(tsd, accumbytes, allow,
-				   &waits[te_alloc_tcache_gc], wait,
-				   te_alloc_handlers[te_alloc_tcache_gc],
-				   opt_tcache_gc_incr_bytes)) {
+		        &waits[te_alloc_tcache_gc], wait,
+		        te_alloc_handlers[te_alloc_tcache_gc],
+		        opt_tcache_gc_incr_bytes)) {
 			to_trigger[nto_trigger++] =
 			    te_alloc_handlers[te_alloc_tcache_gc];
 		}
 	}
 #ifdef JEMALLOC_PROF
-        if (opt_prof) {
-		assert(te_enabled_yes ==
-		       te_alloc_handlers[te_alloc_prof_sample]->enabled());
-		if(te_update_wait(tsd, accumbytes, allow,
-				  &waits[te_alloc_prof_sample], wait,
-				  te_alloc_handlers[te_alloc_prof_sample], 0)) {
+	if (opt_prof) {
+		assert(te_enabled_yes
+		    == te_alloc_handlers[te_alloc_prof_sample]->enabled());
+		if (te_update_wait(tsd, accumbytes, allow,
+		        &waits[te_alloc_prof_sample], wait,
+		        te_alloc_handlers[te_alloc_prof_sample], 0)) {
 			to_trigger[nto_trigger++] =
 			    te_alloc_handlers[te_alloc_prof_sample];
 		}
@@ -275,12 +272,12 @@ te_update_alloc_events(tsd_t *tsd, te_base_cb_t **to_trigger,
 #endif
 	if (opt_stats_interval >= 0) {
 		if (te_update_wait(tsd, accumbytes, allow,
-				   &waits[te_alloc_stats_interval],
-				   wait,
-				   te_alloc_handlers[te_alloc_stats_interval],
-				   stats_interval_accum_batch)) {
-			assert(te_enabled_yes ==
-			       te_alloc_handlers[te_alloc_stats_interval]->enabled());
+		        &waits[te_alloc_stats_interval], wait,
+		        te_alloc_handlers[te_alloc_stats_interval],
+		        stats_interval_accum_batch)) {
+			assert(te_enabled_yes
+			    == te_alloc_handlers[te_alloc_stats_interval]
+			           ->enabled());
 			to_trigger[nto_trigger++] =
 			    te_alloc_handlers[te_alloc_stats_interval];
 		}
@@ -288,30 +285,30 @@ te_update_alloc_events(tsd_t *tsd, te_base_cb_t **to_trigger,
 
 #ifdef JEMALLOC_STATS
 	assert(te_enabled_yes == te_alloc_handlers[te_alloc_peak]->enabled());
- 	if(te_update_wait(tsd, accumbytes, allow, &waits[te_alloc_peak], wait,
-			  te_alloc_handlers[te_alloc_peak], PEAK_EVENT_WAIT)) {
+	if (te_update_wait(tsd, accumbytes, allow, &waits[te_alloc_peak], wait,
+	        te_alloc_handlers[te_alloc_peak], PEAK_EVENT_WAIT)) {
 		to_trigger[nto_trigger++] = te_alloc_handlers[te_alloc_peak];
- 	}
+	}
 
-        assert(te_enabled_yes ==
-	       te_alloc_handlers[te_alloc_prof_threshold]->enabled());
-        if(te_update_wait(tsd, accumbytes, allow,
-			  &waits[te_alloc_prof_threshold], wait,
-			  te_alloc_handlers[te_alloc_prof_threshold],
-			  1 << opt_experimental_lg_prof_threshold)) {
+	assert(te_enabled_yes
+	    == te_alloc_handlers[te_alloc_prof_threshold]->enabled());
+	if (te_update_wait(tsd, accumbytes, allow,
+	        &waits[te_alloc_prof_threshold], wait,
+	        te_alloc_handlers[te_alloc_prof_threshold],
+	        1 << opt_experimental_lg_prof_threshold)) {
 		to_trigger[nto_trigger++] =
 		    te_alloc_handlers[te_alloc_prof_threshold];
- 	}
+	}
 #endif
 
 	for (te_alloc_t ue = te_alloc_user0; ue <= te_alloc_user3; ue++) {
-		te_enabled_t status =
-		    te_user_event_enabled(ue - te_alloc_user0, true);
+		te_enabled_t status = te_user_event_enabled(
+		    ue - te_alloc_user0, true);
 		if (status == te_enabled_not_installed) {
 			break;
 		} else if (status == te_enabled_yes) {
 			if (te_update_wait(tsd, accumbytes, allow, &waits[ue],
-					   wait, te_alloc_handlers[ue], 0)) {
+			        wait, te_alloc_handlers[ue], 0)) {
 				to_trigger[nto_trigger++] =
 				    te_alloc_handlers[ue];
 			}
@@ -321,37 +318,36 @@ te_update_alloc_events(tsd_t *tsd, te_base_cb_t **to_trigger,
 }
 
 static inline size_t
-te_update_dalloc_events(tsd_t *tsd, te_base_cb_t **to_trigger, uint64_t accumbytes,
-			bool allow, uint64_t *wait) {
-	size_t nto_trigger = 0;
+te_update_dalloc_events(tsd_t *tsd, te_base_cb_t **to_trigger,
+    uint64_t accumbytes, bool allow, uint64_t *wait) {
+	size_t    nto_trigger = 0;
 	uint64_t *waits = tsd_te_datap_get_unsafe(tsd)->dalloc_wait;
 	if (opt_tcache_gc_incr_bytes > 0) {
-		assert(te_enabled_yes ==
-		       te_dalloc_handlers[te_dalloc_tcache_gc]->enabled());
+		assert(te_enabled_yes
+		    == te_dalloc_handlers[te_dalloc_tcache_gc]->enabled());
 		if (te_update_wait(tsd, accumbytes, allow,
-				   &waits[te_dalloc_tcache_gc], wait,
-				   te_dalloc_handlers[te_dalloc_tcache_gc],
-				   opt_tcache_gc_incr_bytes)) {
+		        &waits[te_dalloc_tcache_gc], wait,
+		        te_dalloc_handlers[te_dalloc_tcache_gc],
+		        opt_tcache_gc_incr_bytes)) {
 			to_trigger[nto_trigger++] =
 			    te_dalloc_handlers[te_dalloc_tcache_gc];
 		}
-        }
+	}
 #ifdef JEMALLOC_STATS
 	assert(te_enabled_yes == te_dalloc_handlers[te_dalloc_peak]->enabled());
-        if(te_update_wait(tsd, accumbytes, allow, &waits[te_dalloc_peak], wait,
-			  te_dalloc_handlers[te_dalloc_peak],
-			  PEAK_EVENT_WAIT)) {
+	if (te_update_wait(tsd, accumbytes, allow, &waits[te_dalloc_peak], wait,
+	        te_dalloc_handlers[te_dalloc_peak], PEAK_EVENT_WAIT)) {
 		to_trigger[nto_trigger++] = te_dalloc_handlers[te_dalloc_peak];
- 	}
+	}
 #endif
 	for (te_dalloc_t ue = te_dalloc_user0; ue <= te_dalloc_user3; ue++) {
-		te_enabled_t status =
-		    te_user_event_enabled(ue - te_dalloc_user0, false);
+		te_enabled_t status = te_user_event_enabled(
+		    ue - te_dalloc_user0, false);
 		if (status == te_enabled_not_installed) {
 			break;
 		} else if (status == te_enabled_yes) {
 			if (te_update_wait(tsd, accumbytes, allow, &waits[ue],
-					   wait, te_dalloc_handlers[ue], 0)) {
+			        wait, te_dalloc_handlers[ue], 0)) {
 				to_trigger[nto_trigger++] =
 				    te_dalloc_handlers[ue];
 			}
@@ -369,26 +365,22 @@ te_event_trigger(tsd_t *tsd, te_ctx_t *ctx) {
 
 	te_ctx_last_event_set(ctx, bytes_after);
 
-	bool allow_event_trigger = tsd_nominal(tsd) &&
-	    tsd_reentrancy_level_get(tsd) == 0;
+	bool allow_event_trigger = tsd_nominal(tsd)
+	    && tsd_reentrancy_level_get(tsd) == 0;
 	uint64_t wait = TE_MAX_START_WAIT;
 
-	assert((int)te_alloc_count >= (int) te_dalloc_count);
+	assert((int)te_alloc_count >= (int)te_dalloc_count);
 	te_base_cb_t *to_trigger[te_alloc_count];
-	size_t nto_trigger;
+	size_t        nto_trigger;
 	if (ctx->is_alloc) {
-		nto_trigger = te_update_alloc_events(tsd, to_trigger,
-						     accumbytes,
-						     allow_event_trigger,
-						     &wait);
+		nto_trigger = te_update_alloc_events(
+		    tsd, to_trigger, accumbytes, allow_event_trigger, &wait);
 	} else {
-		nto_trigger = te_update_dalloc_events(tsd, to_trigger,
-						      accumbytes,
-						      allow_event_trigger,
-						      &wait);
+		nto_trigger = te_update_dalloc_events(
+		    tsd, to_trigger, accumbytes, allow_event_trigger, &wait);
 	}
 
-        assert(wait <= TE_MAX_START_WAIT);
+	assert(wait <= TE_MAX_START_WAIT);
 	te_adjust_thresholds_helper(tsd, ctx, wait);
 	te_assert_invariants(tsd);
 
diff --git a/src/thread_event_registry.c b/src/thread_event_registry.c
index f5408178..05882616 100644
--- a/src/thread_event_registry.c
+++ b/src/thread_event_registry.c
@@ -145,34 +145,25 @@ TE_USER_HANDLER_BINDING_IDX(3);
 /* Table of all the thread events. */
 te_base_cb_t *te_alloc_handlers[te_alloc_count] = {
 #ifdef JEMALLOC_PROF
-	&prof_sample_te_handler,
+    &prof_sample_te_handler,
 #endif
-	&stats_interval_te_handler,
-	&tcache_gc_te_handler,
+    &stats_interval_te_handler, &tcache_gc_te_handler,
 #ifdef JEMALLOC_STATS
-	&prof_threshold_te_handler,
-	&peak_te_handler,
+    &prof_threshold_te_handler, &peak_te_handler,
 #endif
-	&user_alloc_handler0,
-	&user_alloc_handler1,
-	&user_alloc_handler2,
-	&user_alloc_handler3
-};
+    &user_alloc_handler0, &user_alloc_handler1, &user_alloc_handler2,
+    &user_alloc_handler3};
 
-te_base_cb_t *te_dalloc_handlers[te_dalloc_count] = {
-	&tcache_gc_te_handler,
+te_base_cb_t *te_dalloc_handlers[te_dalloc_count] = {&tcache_gc_te_handler,
 #ifdef JEMALLOC_STATS
-	&peak_te_handler,
+    &peak_te_handler,
 #endif
-	&user_dalloc_handler0,
-	&user_dalloc_handler1,
-	&user_dalloc_handler2,
-	&user_dalloc_handler3
-};
+    &user_dalloc_handler0, &user_dalloc_handler1, &user_dalloc_handler2,
+    &user_dalloc_handler3};
 
 static inline bool
 te_update_tsd(tsd_t *tsd, uint64_t new_wait, size_t ue_idx, bool is_alloc) {
-	bool needs_recompute = false;
+	bool     needs_recompute = false;
 	te_ctx_t ctx;
 	uint64_t next, current, cur_wait;
 
diff --git a/src/ticker.c b/src/ticker.c
index 790b5c20..1fd6ac96 100644
--- a/src/ticker.c
+++ b/src/ticker.c
@@ -20,13 +20,8 @@
  * The values here are computed in src/ticker.py
  */
 
-const uint8_t ticker_geom_table[1 << TICKER_GEOM_NBITS] = {
-	254, 211, 187, 169, 156, 144, 135, 127,
-	120, 113, 107, 102, 97, 93, 89, 85,
-	81, 77, 74, 71, 68, 65, 62, 60,
-	57, 55, 53, 50, 48, 46, 44, 42,
-	40, 39, 37, 35, 33, 32, 30, 29,
-	27, 26, 24, 23, 21, 20, 19, 18,
-	16, 15, 14, 13, 12, 10, 9, 8,
-	7, 6, 5, 4, 3, 2, 1, 0
-};
+const uint8_t ticker_geom_table[1 << TICKER_GEOM_NBITS] = {254, 211, 187, 169,
+    156, 144, 135, 127, 120, 113, 107, 102, 97, 93, 89, 85, 81, 77, 74, 71, 68,
+    65, 62, 60, 57, 55, 53, 50, 48, 46, 44, 42, 40, 39, 37, 35, 33, 32, 30, 29,
+    27, 26, 24, 23, 21, 20, 19, 18, 16, 15, 14, 13, 12, 10, 9, 8, 7, 6, 5, 4, 3,
+    2, 1, 0};
diff --git a/src/tsd.c b/src/tsd.c
index 0a2ccc59..20042c2d 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -20,19 +20,20 @@ bool tsd_booted = false;
 #elif (defined(JEMALLOC_TLS))
 JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls = TSD_INITIALIZER;
 pthread_key_t tsd_tsd;
-bool tsd_booted = false;
+bool          tsd_booted = false;
 #elif (defined(_WIN32))
-#if defined(JEMALLOC_LEGACY_WINDOWS_SUPPORT) || !defined(_MSC_VER)
-DWORD tsd_tsd;
+#	if defined(JEMALLOC_LEGACY_WINDOWS_SUPPORT) || !defined(_MSC_VER)
+DWORD         tsd_tsd;
 tsd_wrapper_t tsd_boot_wrapper = {TSD_INITIALIZER, false};
-#else
-JEMALLOC_TSD_TYPE_ATTR(tsd_wrapper_t) tsd_wrapper_tls = { TSD_INITIALIZER, false };
-#endif
+#	else
+JEMALLOC_TSD_TYPE_ATTR(tsd_wrapper_t)
+tsd_wrapper_tls = {TSD_INITIALIZER, false};
+#	endif
 bool tsd_booted = false;
-#if JEMALLOC_WIN32_TLSGETVALUE2
-TGV2 tls_get_value2 = NULL;
+#	if JEMALLOC_WIN32_TLSGETVALUE2
+TGV2    tls_get_value2 = NULL;
 HMODULE tgv2_mod = NULL;
-#endif
+#	endif
 #else
 
 /*
@@ -45,17 +46,12 @@ struct tsd_init_head_s {
 	malloc_mutex_t lock;
 };
 
-pthread_key_t tsd_tsd;
-tsd_init_head_t	tsd_init_head = {
-	ql_head_initializer(blocks),
-	MALLOC_MUTEX_INITIALIZER
-};
+pthread_key_t   tsd_tsd;
+tsd_init_head_t tsd_init_head = {
+    ql_head_initializer(blocks), MALLOC_MUTEX_INITIALIZER};
 
-tsd_wrapper_t tsd_boot_wrapper = {
-	false,
-	TSD_INITIALIZER
-};
-bool tsd_booted = false;
+tsd_wrapper_t tsd_boot_wrapper = {false, TSD_INITIALIZER};
+bool          tsd_booted = false;
 #endif
 
 JEMALLOC_DIAGNOSTIC_POP
@@ -64,7 +60,7 @@ JEMALLOC_DIAGNOSTIC_POP
 
 /* A list of all the tsds in the nominal state. */
 typedef ql_head(tsd_t) tsd_list_t;
-static tsd_list_t tsd_nominal_tsds = ql_head_initializer(tsd_nominal_tsds);
+static tsd_list_t     tsd_nominal_tsds = ql_head_initializer(tsd_nominal_tsds);
 static malloc_mutex_t tsd_nominal_tsds_lock;
 
 /* How many slow-path-enabling features are turned on. */
@@ -73,13 +69,13 @@ static atomic_u32_t tsd_global_slow_count = ATOMIC_INIT(0);
 static bool
 tsd_in_nominal_list(tsd_t *tsd) {
 	tsd_t *tsd_list;
-	bool found = false;
+	bool   found = false;
 	/*
 	 * We don't know that tsd is nominal; it might not be safe to get data
 	 * out of it here.
 	 */
 	malloc_mutex_lock(TSDN_NULL, &tsd_nominal_tsds_lock);
-	ql_foreach(tsd_list, &tsd_nominal_tsds, TSD_MANGLE(tsd_link)) {
+	ql_foreach (tsd_list, &tsd_nominal_tsds, TSD_MANGLE(tsd_link)) {
 		if (tsd == tsd_list) {
 			found = true;
 			break;
@@ -117,7 +113,7 @@ tsd_force_recompute(tsdn_t *tsdn) {
 	atomic_fence(ATOMIC_RELEASE);
 	malloc_mutex_lock(tsdn, &tsd_nominal_tsds_lock);
 	tsd_t *remote_tsd;
-	ql_foreach(remote_tsd, &tsd_nominal_tsds, TSD_MANGLE(tsd_link)) {
+	ql_foreach (remote_tsd, &tsd_nominal_tsds, TSD_MANGLE(tsd_link)) {
 		assert(tsd_atomic_load(&remote_tsd->state, ATOMIC_RELAXED)
 		    <= tsd_state_nominal_max);
 		tsd_atomic_store(&remote_tsd->state,
@@ -143,7 +139,8 @@ tsd_global_slow_inc(tsdn_t *tsdn) {
 	tsd_force_recompute(tsdn);
 }
 
-void tsd_global_slow_dec(tsdn_t *tsdn) {
+void
+tsd_global_slow_dec(tsdn_t *tsdn) {
 	atomic_fetch_sub_u32(&tsd_global_slow_count, 1, ATOMIC_RELAXED);
 	/* See the note in ..._inc(). */
 	tsd_force_recompute(tsdn);
@@ -180,8 +177,8 @@ tsd_slow_update(tsd_t *tsd) {
 	uint8_t old_state;
 	do {
 		uint8_t new_state = tsd_state_compute(tsd);
-		old_state = tsd_atomic_exchange(&tsd->state, new_state,
-		    ATOMIC_ACQUIRE);
+		old_state = tsd_atomic_exchange(
+		    &tsd->state, new_state, ATOMIC_ACQUIRE);
 	} while (old_state == tsd_state_nominal_recompute);
 
 	te_recompute_fast_threshold(tsd);
@@ -211,8 +208,8 @@ tsd_state_set(tsd_t *tsd, uint8_t new_state) {
 		assert(tsd_in_nominal_list(tsd));
 		if (new_state > tsd_state_nominal_max) {
 			tsd_remove_nominal(tsd);
-			tsd_atomic_store(&tsd->state, new_state,
-			    ATOMIC_RELAXED);
+			tsd_atomic_store(
+			    &tsd->state, new_state, ATOMIC_RELAXED);
 		} else {
 			/*
 			 * This is the tricky case.  We're transitioning from
@@ -235,8 +232,7 @@ tsd_prng_state_init(tsd_t *tsd) {
 	 * cost of test repeatability.  For debug builds, instead use a
 	 * deterministic seed.
 	 */
-	*tsd_prng_statep_get(tsd) = config_debug ? 0 :
-	    (uint64_t)(uintptr_t)tsd;
+	*tsd_prng_statep_get(tsd) = config_debug ? 0 : (uint64_t)(uintptr_t)tsd;
 }
 
 static bool
@@ -264,8 +260,8 @@ assert_tsd_data_cleanup_done(tsd_t *tsd) {
 
 static bool
 tsd_data_init_nocleanup(tsd_t *tsd) {
-	assert(tsd_state_get(tsd) == tsd_state_reincarnated ||
-	    tsd_state_get(tsd) == tsd_state_minimal_initialized);
+	assert(tsd_state_get(tsd) == tsd_state_reincarnated
+	    || tsd_state_get(tsd) == tsd_state_minimal_initialized);
 	/*
 	 * During reincarnation, there is no guarantee that the cleanup function
 	 * will be called (deallocation may happen after all tsd destructors).
@@ -358,15 +354,15 @@ malloc_tsd_dalloc(void *wrapper) {
 }
 
 #if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32)
-static unsigned ncleanups;
+static unsigned             ncleanups;
 static malloc_tsd_cleanup_t cleanups[MALLOC_TSD_CLEANUPS_MAX];
 
-#ifndef _WIN32
+#	ifndef _WIN32
 JEMALLOC_EXPORT
-#endif
+#	endif
 void
 _malloc_thread_cleanup(void) {
-	bool pending[MALLOC_TSD_CLEANUPS_MAX], again;
+	bool     pending[MALLOC_TSD_CLEANUPS_MAX], again;
 	unsigned i;
 
 	for (i = 0; i < ncleanups; i++) {
@@ -386,9 +382,9 @@ _malloc_thread_cleanup(void) {
 	} while (again);
 }
 
-#ifndef _WIN32
+#	ifndef _WIN32
 JEMALLOC_EXPORT
-#endif
+#	endif
 void
 _malloc_tsd_cleanup_register(bool (*f)(void)) {
 	assert(ncleanups < MALLOC_TSD_CLEANUPS_MAX);
@@ -446,7 +442,7 @@ tsd_cleanup(void *arg) {
 	}
 #ifdef JEMALLOC_JET
 	test_callback_t test_callback = *tsd_test_callbackp_get_unsafe(tsd);
-	int *data = tsd_test_datap_get_unsafe(tsd);
+	int            *data = tsd_test_datap_get_unsafe(tsd);
 	if (test_callback != NULL) {
 		test_callback(data);
 	}
@@ -461,7 +457,7 @@ malloc_tsd_boot0(void) {
 	ncleanups = 0;
 #endif
 	if (malloc_mutex_init(&tsd_nominal_tsds_lock, "tsd_nominal_tsds_lock",
-	    WITNESS_RANK_OMIT, malloc_mutex_rank_exclusive)) {
+	        WITNESS_RANK_OMIT, malloc_mutex_rank_exclusive)) {
 		return NULL;
 	}
 	if (tsd_boot0()) {
@@ -483,11 +479,11 @@ malloc_tsd_boot1(void) {
 static BOOL WINAPI
 _tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) {
 	switch (fdwReason) {
-#ifdef JEMALLOC_LAZY_LOCK
+#	ifdef JEMALLOC_LAZY_LOCK
 	case DLL_THREAD_ATTACH:
 		isthreaded = true;
 		break;
-#endif
+#	endif
 	case DLL_THREAD_DETACH:
 		_malloc_thread_cleanup();
 		break;
@@ -502,35 +498,36 @@ _tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) {
  * hooked "read". We won't read for the rest of the file, so we can get away
  * with unhooking.
  */
-#ifdef read
-#  undef read
+#	ifdef read
+#		undef read
+#	endif
+
+#	ifdef _MSC_VER
+#		ifdef _M_IX86
+#			pragma comment(linker, "/INCLUDE:__tls_used")
+#			pragma comment(linker, "/INCLUDE:_tls_callback")
+#		else
+#			pragma comment(linker, "/INCLUDE:_tls_used")
+#			pragma comment(                                       \
+			    linker, "/INCLUDE:" STRINGIFY(tls_callback))
+#		endif
+#		pragma section(".CRT$XLY", long, read)
+#	endif
+JEMALLOC_SECTION(".CRT$XLY")
+JEMALLOC_ATTR(used) BOOL(WINAPI *const tls_callback)(
+    HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) = _tls_callback;
 #endif
 
-#ifdef _MSC_VER
-#  ifdef _M_IX86
-#    pragma comment(linker, "/INCLUDE:__tls_used")
-#    pragma comment(linker, "/INCLUDE:_tls_callback")
-#  else
-#    pragma comment(linker, "/INCLUDE:_tls_used")
-#    pragma comment(linker, "/INCLUDE:" STRINGIFY(tls_callback) )
-#  endif
-#  pragma section(".CRT$XLY",long,read)
-#endif
-JEMALLOC_SECTION(".CRT$XLY") JEMALLOC_ATTR(used)
-BOOL	(WINAPI *const tls_callback)(HINSTANCE hinstDLL,
-    DWORD fdwReason, LPVOID lpvReserved) = _tls_callback;
-#endif
-
-#if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \
-    !defined(_WIN32))
+#if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS)        \
+    && !defined(_WIN32))
 void *
 tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block) {
-	pthread_t self = pthread_self();
+	pthread_t         self = pthread_self();
 	tsd_init_block_t *iter;
 
 	/* Check whether this thread has already inserted into the list. */
 	malloc_mutex_lock(TSDN_NULL, &head->lock);
-	ql_foreach(iter, &head->blocks, link) {
+	ql_foreach (iter, &head->blocks, link) {
 		if (iter->thread == self) {
 			malloc_mutex_unlock(TSDN_NULL, &head->lock);
 			return iter->data;
diff --git a/src/util.c b/src/util.c
index b73848fb..1bcf4fee 100644
--- a/src/util.c
+++ b/src/util.c
@@ -8,8 +8,8 @@ bool
 multi_setting_parse_next(const char **setting_segment_cur, size_t *len_left,
     size_t *key_start, size_t *key_end, size_t *value) {
 	const char *cur = *setting_segment_cur;
-	char *end;
-	uintmax_t um;
+	char       *end;
+	uintmax_t   um;
 
 	set_errno(0);
 
@@ -46,4 +46,3 @@ multi_setting_parse_next(const char **setting_segment_cur, size_t *len_left,
 
 	return false;
 }
-
diff --git a/src/witness.c b/src/witness.c
index 4474af04..940b1eae 100644
--- a/src/witness.c
+++ b/src/witness.c
@@ -26,8 +26,8 @@ witness_print_witness(witness_t *w, unsigned n) {
 static void
 witness_print_witnesses(const witness_list_t *witnesses) {
 	witness_t *w, *last = NULL;
-	unsigned n = 0;
-	ql_foreach(w, witnesses, link) {
+	unsigned   n = 0;
+	ql_foreach (w, witnesses, link) {
 		if (last != NULL && w->rank > last->rank) {
 			assert(w->name != last->name);
 			witness_print_witness(last, n);
@@ -45,8 +45,8 @@ witness_print_witnesses(const witness_list_t *witnesses) {
 }
 
 static void
-witness_lock_error_impl(const witness_list_t *witnesses,
-    const witness_t *witness) {
+witness_lock_error_impl(
+    const witness_list_t *witnesses, const witness_t *witness) {
 	malloc_printf("<jemalloc>: Lock rank order reversal:");
 	witness_print_witnesses(witnesses);
 	malloc_printf(" %s(%u)\n", witness->name, witness->rank);
@@ -56,8 +56,8 @@ witness_lock_error_t *JET_MUTABLE witness_lock_error = witness_lock_error_impl;
 
 static void
 witness_owner_error_impl(const witness_t *witness) {
-	malloc_printf("<jemalloc>: Should own %s(%u)\n", witness->name,
-	    witness->rank);
+	malloc_printf(
+	    "<jemalloc>: Should own %s(%u)\n", witness->name, witness->rank);
 	abort();
 }
 witness_owner_error_t *JET_MUTABLE witness_owner_error =
@@ -76,7 +76,7 @@ static void
 witness_depth_error_impl(const witness_list_t *witnesses,
     witness_rank_t rank_inclusive, unsigned depth) {
 	malloc_printf("<jemalloc>: Should own %u lock%s of rank >= %u:", depth,
-	    (depth != 1) ?  "s" : "", rank_inclusive);
+	    (depth != 1) ? "s" : "", rank_inclusive);
 	witness_print_witnesses(witnesses);
 	malloc_printf("\n");
 	abort();
diff --git a/src/zone.c b/src/zone.c
index 23dfdd04..e09de4b8 100644
--- a/src/zone.c
+++ b/src/zone.c
@@ -4,7 +4,7 @@
 #include "jemalloc/internal/assert.h"
 
 #ifndef JEMALLOC_ZONE
-#  error "This source file is for zones on Darwin (OS X)."
+#	error "This source file is for zones on Darwin (OS X)."
 #endif
 
 /* Definitions of the following structs in malloc/malloc.h might be too old
@@ -22,10 +22,11 @@ typedef struct _malloc_zone_t {
 	void *(*realloc)(struct _malloc_zone_t *, void *, size_t);
 	void (*destroy)(struct _malloc_zone_t *);
 	const char *zone_name;
-	unsigned (*batch_malloc)(struct _malloc_zone_t *, size_t, void **, unsigned);
+	unsigned (*batch_malloc)(
+	    struct _malloc_zone_t *, size_t, void **, unsigned);
 	void (*batch_free)(struct _malloc_zone_t *, void **, unsigned);
 	struct malloc_introspection_t *introspect;
-	unsigned version;
+	unsigned                       version;
 	void *(*memalign)(struct _malloc_zone_t *, size_t, size_t);
 	void (*free_definite_size)(struct _malloc_zone_t *, void *, size_t);
 	size_t (*pressure_relief)(struct _malloc_zone_t *, size_t);
@@ -33,22 +34,24 @@ typedef struct _malloc_zone_t {
 
 typedef struct {
 	vm_address_t address;
-	vm_size_t size;
+	vm_size_t    size;
 } vm_range_t;
 
 typedef struct malloc_statistics_t {
 	unsigned blocks_in_use;
-	size_t size_in_use;
-	size_t max_size_in_use;
-	size_t size_allocated;
+	size_t   size_in_use;
+	size_t   max_size_in_use;
+	size_t   size_allocated;
 } malloc_statistics_t;
 
 typedef kern_return_t memory_reader_t(task_t, vm_address_t, vm_size_t, void **);
 
-typedef void vm_range_recorder_t(task_t, void *, unsigned type, vm_range_t *, unsigned);
+typedef void vm_range_recorder_t(
+    task_t, void *, unsigned type, vm_range_t *, unsigned);
 
 typedef struct malloc_introspection_t {
-	kern_return_t (*enumerator)(task_t, void *, unsigned, vm_address_t, memory_reader_t, vm_range_recorder_t);
+	kern_return_t (*enumerator)(task_t, void *, unsigned, vm_address_t,
+	    memory_reader_t, vm_range_recorder_t);
 	size_t (*good_size)(malloc_zone_t *, size_t);
 	boolean_t (*check)(malloc_zone_t *);
 	void (*print)(malloc_zone_t *, boolean_t);
@@ -61,14 +64,16 @@ typedef struct malloc_introspection_t {
 	boolean_t (*disable_discharge_checking)(malloc_zone_t *);
 	void (*discharge)(malloc_zone_t *, void *);
 #ifdef __BLOCKS__
-	void (*enumerate_discharged_pointers)(malloc_zone_t *, void (^)(void *, void *));
+	void (*enumerate_discharged_pointers)(
+	    malloc_zone_t *, void (^)(void *, void *));
 #else
 	void *enumerate_unavailable_without_blocks;
 #endif
 	void (*reinit_lock)(malloc_zone_t *);
 } malloc_introspection_t;
 
-extern kern_return_t malloc_get_all_zones(task_t, memory_reader_t, vm_address_t **, unsigned *);
+extern kern_return_t malloc_get_all_zones(
+    task_t, memory_reader_t, vm_address_t **, unsigned *);
 
 extern malloc_zone_t *malloc_default_zone(void);
 
@@ -81,48 +86,46 @@ extern void malloc_zone_unregister(malloc_zone_t *zone);
  * We need to check whether it is present at runtime, thus the weak_import.
  */
 extern malloc_zone_t *malloc_default_purgeable_zone(void)
-JEMALLOC_ATTR(weak_import);
+    JEMALLOC_ATTR(weak_import);
 
 /******************************************************************************/
 /* Data. */
 
-static malloc_zone_t *default_zone, *purgeable_zone;
-static malloc_zone_t jemalloc_zone;
+static malloc_zone_t                *default_zone, *purgeable_zone;
+static malloc_zone_t                 jemalloc_zone;
 static struct malloc_introspection_t jemalloc_zone_introspect;
-static pid_t zone_force_lock_pid = -1;
+static pid_t                         zone_force_lock_pid = -1;
 
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */
 
-static size_t	zone_size(malloc_zone_t *zone, const void *ptr);
-static void	*zone_malloc(malloc_zone_t *zone, size_t size);
-static void	*zone_calloc(malloc_zone_t *zone, size_t num, size_t size);
-static void	*zone_valloc(malloc_zone_t *zone, size_t size);
-static void	zone_free(malloc_zone_t *zone, void *ptr);
-static void	*zone_realloc(malloc_zone_t *zone, void *ptr, size_t size);
-static void	*zone_memalign(malloc_zone_t *zone, size_t alignment,
-    size_t size);
-static void	zone_free_definite_size(malloc_zone_t *zone, void *ptr,
-    size_t size);
-static void	zone_destroy(malloc_zone_t *zone);
-static unsigned	zone_batch_malloc(struct _malloc_zone_t *zone, size_t size,
+static size_t zone_size(malloc_zone_t *zone, const void *ptr);
+static void  *zone_malloc(malloc_zone_t *zone, size_t size);
+static void  *zone_calloc(malloc_zone_t *zone, size_t num, size_t size);
+static void  *zone_valloc(malloc_zone_t *zone, size_t size);
+static void   zone_free(malloc_zone_t *zone, void *ptr);
+static void  *zone_realloc(malloc_zone_t *zone, void *ptr, size_t size);
+static void  *zone_memalign(malloc_zone_t *zone, size_t alignment, size_t size);
+static void   zone_free_definite_size(
+      malloc_zone_t *zone, void *ptr, size_t size);
+static void     zone_destroy(malloc_zone_t *zone);
+static unsigned zone_batch_malloc(struct _malloc_zone_t *zone, size_t size,
     void **results, unsigned num_requested);
-static void	zone_batch_free(struct _malloc_zone_t *zone,
-    void **to_be_freed, unsigned num_to_be_freed);
-static size_t	zone_pressure_relief(struct _malloc_zone_t *zone, size_t goal);
-static size_t	zone_good_size(malloc_zone_t *zone, size_t size);
-static kern_return_t	zone_enumerator(task_t task, void *data, unsigned type_mask,
-    vm_address_t zone_address, memory_reader_t reader,
+static void     zone_batch_free(
+        struct _malloc_zone_t *zone, void **to_be_freed, unsigned num_to_be_freed);
+static size_t zone_pressure_relief(struct _malloc_zone_t *zone, size_t goal);
+static size_t zone_good_size(malloc_zone_t *zone, size_t size);
+static kern_return_t zone_enumerator(task_t task, void *data,
+    unsigned type_mask, vm_address_t zone_address, memory_reader_t reader,
     vm_range_recorder_t recorder);
-static boolean_t	zone_check(malloc_zone_t *zone);
-static void	zone_print(malloc_zone_t *zone, boolean_t verbose);
-static void	zone_log(malloc_zone_t *zone, void *address);
-static void	zone_force_lock(malloc_zone_t *zone);
-static void	zone_force_unlock(malloc_zone_t *zone);
-static void	zone_statistics(malloc_zone_t *zone,
-    malloc_statistics_t *stats);
-static boolean_t	zone_locked(malloc_zone_t *zone);
-static void	zone_reinit_lock(malloc_zone_t *zone);
+static boolean_t     zone_check(malloc_zone_t *zone);
+static void          zone_print(malloc_zone_t *zone, boolean_t verbose);
+static void          zone_log(malloc_zone_t *zone, void *address);
+static void          zone_force_lock(malloc_zone_t *zone);
+static void          zone_force_unlock(malloc_zone_t *zone);
+static void zone_statistics(malloc_zone_t *zone, malloc_statistics_t *stats);
+static boolean_t zone_locked(malloc_zone_t *zone);
+static void      zone_reinit_lock(malloc_zone_t *zone);
 
 /******************************************************************************/
 /*
@@ -225,8 +228,8 @@ zone_batch_malloc(struct _malloc_zone_t *zone, size_t size, void **results,
 }
 
 static void
-zone_batch_free(struct _malloc_zone_t *zone, void **to_be_freed,
-    unsigned num_to_be_freed) {
+zone_batch_free(
+    struct _malloc_zone_t *zone, void **to_be_freed, unsigned num_to_be_freed) {
 	unsigned i;
 
 	for (i = 0; i < num_to_be_freed; i++) {
@@ -261,12 +264,10 @@ zone_check(malloc_zone_t *zone) {
 }
 
 static void
-zone_print(malloc_zone_t *zone, boolean_t verbose) {
-}
+zone_print(malloc_zone_t *zone, boolean_t verbose) {}
 
 static void
-zone_log(malloc_zone_t *zone, void *address) {
-}
+zone_log(malloc_zone_t *zone, void *address) {}
 
 static void
 zone_force_lock(malloc_zone_t *zone) {
@@ -369,7 +370,7 @@ zone_init(void) {
 static malloc_zone_t *
 zone_default_get(void) {
 	malloc_zone_t **zones = NULL;
-	unsigned int num_zones = 0;
+	unsigned int    num_zones = 0;
 
 	/*
 	 * On OSX 10.12, malloc_default_zone returns a special zone that is not
@@ -380,8 +381,9 @@ zone_default_get(void) {
 	 * zone is the default.  So get the list of zones to get the first one,
 	 * instead of relying on malloc_default_zone.
 	 */
-	if (KERN_SUCCESS != malloc_get_all_zones(0, NULL,
-	    (vm_address_t**)&zones, &num_zones)) {
+	if (KERN_SUCCESS
+	    != malloc_get_all_zones(
+	        0, NULL, (vm_address_t **)&zones, &num_zones)) {
 		/*
 		 * Reset the value in case the failure happened after it was
 		 * set.
@@ -441,8 +443,8 @@ zone_register(void) {
 	 * register jemalloc's.
 	 */
 	default_zone = zone_default_get();
-	if (!default_zone->zone_name || strcmp(default_zone->zone_name,
-	    "DefaultMallocZone") != 0) {
+	if (!default_zone->zone_name
+	    || strcmp(default_zone->zone_name, "DefaultMallocZone") != 0) {
 		return;
 	}
 
@@ -457,8 +459,9 @@ zone_register(void) {
 	 * to check for the existence of malloc_default_purgeable_zone() at
 	 * run time.
 	 */
-	purgeable_zone = (malloc_default_purgeable_zone == NULL) ? NULL :
-	    malloc_default_purgeable_zone();
+	purgeable_zone = (malloc_default_purgeable_zone == NULL)
+	    ? NULL
+	    : malloc_default_purgeable_zone();
 
 	/* Register the custom zone.  At this point it won't be the default. */
 	zone_init();
diff --git a/test/analyze/prof_bias.c b/test/analyze/prof_bias.c
index a96ca942..e4bf7942 100644
--- a/test/analyze/prof_bias.c
+++ b/test/analyze/prof_bias.c
@@ -46,15 +46,15 @@ do_allocs(size_t sz, size_t cnt, bool do_frees) {
 int
 main(void) {
 	size_t lg_prof_sample_local = 19;
-	int err = mallctl("prof.reset", NULL, NULL,
-	    (void *)&lg_prof_sample_local, sizeof(lg_prof_sample_local));
+	int    err = mallctl("prof.reset", NULL, NULL,
+	       (void *)&lg_prof_sample_local, sizeof(lg_prof_sample_local));
 	assert(err == 0);
 
 	prof_backtrace_hook_set(mock_backtrace);
 	do_allocs(16, 32 * 1024 * 1024, /* do_frees */ true);
-	do_allocs(32 * 1024* 1024, 16, /* do_frees */ true);
+	do_allocs(32 * 1024 * 1024, 16, /* do_frees */ true);
 	do_allocs(16, 32 * 1024 * 1024, /* do_frees */ false);
-	do_allocs(32 * 1024* 1024, 16, /* do_frees */ false);
+	do_allocs(32 * 1024 * 1024, 16, /* do_frees */ false);
 
 	return 0;
 }
diff --git a/test/analyze/rand.c b/test/analyze/rand.c
index bb20b06e..4c7e18c7 100644
--- a/test/analyze/rand.c
+++ b/test/analyze/rand.c
@@ -72,13 +72,13 @@ print_buckets(const size_t buckets[], const size_t means[],
 		if (buckets[i] + stddevs[i] <= means[i]) {
 			malloc_write(" ");
 			for (size_t t = means[i] - buckets[i]; t >= stddevs[i];
-			    t -= stddevs[i]) {
+			     t -= stddevs[i]) {
 				malloc_write("-");
 			}
 		} else if (buckets[i] >= means[i] + stddevs[i]) {
 			malloc_write(" ");
 			for (size_t t = buckets[i] - means[i]; t >= stddevs[i];
-			    t -= stddevs[i]) {
+			     t -= stddevs[i]) {
 				malloc_write("+");
 			}
 		}
@@ -93,8 +93,8 @@ bucket_analysis(uint64_t (*gen)(void *), void *opaque, size_t buckets[],
 	for (size_t i = 1; i <= 3; ++i) {
 		malloc_printf("round %zu\n", i);
 		fill(buckets, n_bucket, 0);
-		collect_buckets(gen, opaque, buckets, n_bucket,
-		    lg_bucket_width, n_iter);
+		collect_buckets(
+		    gen, opaque, buckets, n_bucket, lg_bucket_width, n_iter);
 		print_buckets(buckets, means, stddevs, n_bucket);
 	}
 }
@@ -108,7 +108,7 @@ bucket_analysis(uint64_t (*gen)(void *), void *opaque, size_t buckets[],
 
 typedef struct uniform_gen_arg_s uniform_gen_arg_t;
 struct uniform_gen_arg_s {
-	uint64_t state;
+	uint64_t       state;
 	const unsigned lg_range;
 };
 
@@ -131,8 +131,10 @@ TEST_BEGIN(test_uniform) {
 	 * integers, and that the minimal bucket mean is at least
 	 * MIN_BUCKET_MEAN.
 	 */
-	const size_t q = 1 << QUOTIENT_CEIL(LG_CEIL(QUOTIENT_CEIL(
-	    MIN_BUCKET_MEAN, N_BUCKET * (N_BUCKET - 1))), 2);
+	const size_t q = 1 << QUOTIENT_CEIL(
+	                     LG_CEIL(QUOTIENT_CEIL(
+	                         MIN_BUCKET_MEAN, N_BUCKET * (N_BUCKET - 1))),
+	                     2);
 	const size_t stddev = (N_BUCKET - 1) * q;
 	const size_t mean = N_BUCKET * stddev * q;
 	const size_t n_iter = N_BUCKET * mean;
@@ -142,14 +144,14 @@ TEST_BEGIN(test_uniform) {
 	size_t stddevs[N_BUCKET];
 	fill(stddevs, N_BUCKET, stddev);
 
-	uniform_gen_arg_t arg = {(uint64_t)(uintptr_t)&lg_range_test,
-	    lg_range_test};
+	uniform_gen_arg_t arg = {
+	    (uint64_t)(uintptr_t)&lg_range_test, lg_range_test};
 	size_t buckets[N_BUCKET];
 	assert_zu_ge(lg_range_test, LG_N_BUCKET, "");
 	const size_t lg_bucket_width = lg_range_test - LG_N_BUCKET;
 
-	bucket_analysis(uniform_gen, &arg, buckets, means, stddevs,
-	    N_BUCKET, lg_bucket_width, n_iter);
+	bucket_analysis(uniform_gen, &arg, buckets, means, stddevs, N_BUCKET,
+	    lg_bucket_width, n_iter);
 
 #undef LG_N_BUCKET
 #undef N_BUCKET
@@ -168,8 +170,8 @@ TEST_END
  * comments in test_prof_sample for explanations for n_divide.
  */
 static double
-fill_geometric_proportions(double proportions[], const size_t n_bucket,
-    const size_t n_divide) {
+fill_geometric_proportions(
+    double proportions[], const size_t n_bucket, const size_t n_divide) {
 	assert(n_bucket > 0);
 	assert(n_divide > 0);
 	double x = 1.;
@@ -220,12 +222,12 @@ TEST_BEGIN(test_prof_sample) {
 #ifdef JEMALLOC_PROF
 
 /* Number of divisions within [0, mean). */
-#define LG_N_DIVIDE 3
-#define N_DIVIDE (1 << LG_N_DIVIDE)
+#	define LG_N_DIVIDE 3
+#	define N_DIVIDE (1 << LG_N_DIVIDE)
 
 /* Coverage of buckets in terms of multiples of mean. */
-#define LG_N_MULTIPLY 2
-#define N_GEO_BUCKET (N_DIVIDE << LG_N_MULTIPLY)
+#	define LG_N_MULTIPLY 2
+#	define N_GEO_BUCKET (N_DIVIDE << LG_N_MULTIPLY)
 
 	test_skip_if(!opt_prof);
 
@@ -233,14 +235,15 @@ TEST_BEGIN(test_prof_sample) {
 
 	size_t lg_prof_sample_orig = lg_prof_sample;
 	assert_d_eq(mallctl("prof.reset", NULL, NULL, &lg_prof_sample_test,
-	    sizeof(size_t)), 0, "");
+	                sizeof(size_t)),
+	    0, "");
 	malloc_printf("lg_prof_sample = %zu\n", lg_prof_sample_test);
 
-	double proportions[N_GEO_BUCKET + 1];
-	const double min_proportion = fill_geometric_proportions(proportions,
-	    N_GEO_BUCKET + 1, N_DIVIDE);
-	const size_t n_iter = round_to_nearest(MIN_BUCKET_MEAN /
-	    min_proportion);
+	double       proportions[N_GEO_BUCKET + 1];
+	const double min_proportion = fill_geometric_proportions(
+	    proportions, N_GEO_BUCKET + 1, N_DIVIDE);
+	const size_t n_iter = round_to_nearest(
+	    MIN_BUCKET_MEAN / min_proportion);
 	size_t means[N_GEO_BUCKET + 1];
 	size_t stddevs[N_GEO_BUCKET + 1];
 	fill_references(means, stddevs, proportions, N_GEO_BUCKET + 1, n_iter);
@@ -255,12 +258,13 @@ TEST_BEGIN(test_prof_sample) {
 	    N_GEO_BUCKET + 1, lg_bucket_width, n_iter);
 
 	assert_d_eq(mallctl("prof.reset", NULL, NULL, &lg_prof_sample_orig,
-	    sizeof(size_t)), 0, "");
+	                sizeof(size_t)),
+	    0, "");
 
-#undef LG_N_DIVIDE
-#undef N_DIVIDE
-#undef LG_N_MULTIPLY
-#undef N_GEO_BUCKET
+#	undef LG_N_DIVIDE
+#	undef N_DIVIDE
+#	undef LG_N_MULTIPLY
+#	undef N_GEO_BUCKET
 
 #endif /* JEMALLOC_PROF */
 }
@@ -270,7 +274,5 @@ TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_uniform,
-	    test_prof_sample);
+	return test_no_reentrancy(test_uniform, test_prof_sample);
 }
diff --git a/test/analyze/sizes.c b/test/analyze/sizes.c
index cfb5ce51..cc6c3806 100644
--- a/test/analyze/sizes.c
+++ b/test/analyze/sizes.c
@@ -11,9 +11,9 @@
 
 static void
 do_print(const char *name, size_t sz_bytes) {
-	const char *sizes[] = {"bytes", "KB", "MB", "GB", "TB", "PB", "EB",
-		"ZB"};
-	size_t sizes_max = sizeof(sizes)/sizeof(sizes[0]);
+	const char *sizes[] = {
+	    "bytes", "KB", "MB", "GB", "TB", "PB", "EB", "ZB"};
+	size_t sizes_max = sizeof(sizes) / sizeof(sizes[0]);
 
 	size_t ind = 0;
 	double sz = sz_bytes;
@@ -30,8 +30,7 @@ do_print(const char *name, size_t sz_bytes) {
 
 int
 main(void) {
-#define P(type)								\
-	do_print(#type, sizeof(type))
+#define P(type) do_print(#type, sizeof(type))
 	P(arena_t);
 	P(arena_stats_t);
 	P(base_t);
diff --git a/test/include/test/SFMT-alti.h b/test/include/test/SFMT-alti.h
index a1885dbf..d6a85ad1 100644
--- a/test/include/test/SFMT-alti.h
+++ b/test/include/test/SFMT-alti.h
@@ -61,58 +61,59 @@
  * @return output
  */
 JEMALLOC_ALWAYS_INLINE
-vector unsigned int vec_recursion(vector unsigned int a,
-						vector unsigned int b,
-						vector unsigned int c,
-						vector unsigned int d) {
-
-    const vector unsigned int sl1 = ALTI_SL1;
-    const vector unsigned int sr1 = ALTI_SR1;
+vector unsigned int
+vec_recursion(vector unsigned int a, vector unsigned int b,
+    vector unsigned int c, vector unsigned int d) {
+	const vector unsigned int sl1 = ALTI_SL1;
+	const vector unsigned int sr1 = ALTI_SR1;
 #ifdef ONLY64
-    const vector unsigned int mask = ALTI_MSK64;
-    const vector unsigned char perm_sl = ALTI_SL2_PERM64;
-    const vector unsigned char perm_sr = ALTI_SR2_PERM64;
+	const vector unsigned int  mask = ALTI_MSK64;
+	const vector unsigned char perm_sl = ALTI_SL2_PERM64;
+	const vector unsigned char perm_sr = ALTI_SR2_PERM64;
 #else
-    const vector unsigned int mask = ALTI_MSK;
-    const vector unsigned char perm_sl = ALTI_SL2_PERM;
-    const vector unsigned char perm_sr = ALTI_SR2_PERM;
+	const vector unsigned int  mask = ALTI_MSK;
+	const vector unsigned char perm_sl = ALTI_SL2_PERM;
+	const vector unsigned char perm_sr = ALTI_SR2_PERM;
 #endif
-    vector unsigned int v, w, x, y, z;
-    x = vec_perm(a, (vector unsigned int)perm_sl, perm_sl);
-    v = a;
-    y = vec_sr(b, sr1);
-    z = vec_perm(c, (vector unsigned int)perm_sr, perm_sr);
-    w = vec_sl(d, sl1);
-    z = vec_xor(z, w);
-    y = vec_and(y, mask);
-    v = vec_xor(v, x);
-    z = vec_xor(z, y);
-    z = vec_xor(z, v);
-    return z;
+	vector unsigned int v, w, x, y, z;
+	x = vec_perm(a, (vector unsigned int)perm_sl, perm_sl);
+	v = a;
+	y = vec_sr(b, sr1);
+	z = vec_perm(c, (vector unsigned int)perm_sr, perm_sr);
+	w = vec_sl(d, sl1);
+	z = vec_xor(z, w);
+	y = vec_and(y, mask);
+	v = vec_xor(v, x);
+	z = vec_xor(z, y);
+	z = vec_xor(z, v);
+	return z;
 }
 
 /**
  * This function fills the internal state array with pseudorandom
  * integers.
  */
-static inline void gen_rand_all(sfmt_t *ctx) {
-    int i;
-    vector unsigned int r, r1, r2;
+static inline void
+gen_rand_all(sfmt_t *ctx) {
+	int                 i;
+	vector unsigned int r, r1, r2;
 
-    r1 = ctx->sfmt[N - 2].s;
-    r2 = ctx->sfmt[N - 1].s;
-    for (i = 0; i < N - POS1; i++) {
-	r = vec_recursion(ctx->sfmt[i].s, ctx->sfmt[i + POS1].s, r1, r2);
-	ctx->sfmt[i].s = r;
-	r1 = r2;
-	r2 = r;
-    }
-    for (; i < N; i++) {
-	r = vec_recursion(ctx->sfmt[i].s, ctx->sfmt[i + POS1 - N].s, r1, r2);
-	ctx->sfmt[i].s = r;
-	r1 = r2;
-	r2 = r;
-    }
+	r1 = ctx->sfmt[N - 2].s;
+	r2 = ctx->sfmt[N - 1].s;
+	for (i = 0; i < N - POS1; i++) {
+		r = vec_recursion(
+		    ctx->sfmt[i].s, ctx->sfmt[i + POS1].s, r1, r2);
+		ctx->sfmt[i].s = r;
+		r1 = r2;
+		r2 = r;
+	}
+	for (; i < N; i++) {
+		r = vec_recursion(
+		    ctx->sfmt[i].s, ctx->sfmt[i + POS1 - N].s, r1, r2);
+		ctx->sfmt[i].s = r;
+		r1 = r2;
+		r2 = r;
+	}
 }
 
 /**
@@ -122,50 +123,57 @@ static inline void gen_rand_all(sfmt_t *ctx) {
  * @param array an 128-bit array to be filled by pseudorandom numbers.
  * @param size number of 128-bit pesudorandom numbers to be generated.
  */
-static inline void gen_rand_array(sfmt_t *ctx, w128_t *array, int size) {
-    int i, j;
-    vector unsigned int r, r1, r2;
+static inline void
+gen_rand_array(sfmt_t *ctx, w128_t *array, int size) {
+	int                 i, j;
+	vector unsigned int r, r1, r2;
 
-    r1 = ctx->sfmt[N - 2].s;
-    r2 = ctx->sfmt[N - 1].s;
-    for (i = 0; i < N - POS1; i++) {
-	r = vec_recursion(ctx->sfmt[i].s, ctx->sfmt[i + POS1].s, r1, r2);
-	array[i].s = r;
-	r1 = r2;
-	r2 = r;
-    }
-    for (; i < N; i++) {
-	r = vec_recursion(ctx->sfmt[i].s, array[i + POS1 - N].s, r1, r2);
-	array[i].s = r;
-	r1 = r2;
-	r2 = r;
-    }
-    /* main loop */
-    for (; i < size - N; i++) {
-	r = vec_recursion(array[i - N].s, array[i + POS1 - N].s, r1, r2);
-	array[i].s = r;
-	r1 = r2;
-	r2 = r;
-    }
-    for (j = 0; j < 2 * N - size; j++) {
-	ctx->sfmt[j].s = array[j + size - N].s;
-    }
-    for (; i < size; i++) {
-	r = vec_recursion(array[i - N].s, array[i + POS1 - N].s, r1, r2);
-	array[i].s = r;
-	ctx->sfmt[j++].s = r;
-	r1 = r2;
-	r2 = r;
-    }
+	r1 = ctx->sfmt[N - 2].s;
+	r2 = ctx->sfmt[N - 1].s;
+	for (i = 0; i < N - POS1; i++) {
+		r = vec_recursion(
+		    ctx->sfmt[i].s, ctx->sfmt[i + POS1].s, r1, r2);
+		array[i].s = r;
+		r1 = r2;
+		r2 = r;
+	}
+	for (; i < N; i++) {
+		r = vec_recursion(
+		    ctx->sfmt[i].s, array[i + POS1 - N].s, r1, r2);
+		array[i].s = r;
+		r1 = r2;
+		r2 = r;
+	}
+	/* main loop */
+	for (; i < size - N; i++) {
+		r = vec_recursion(
+		    array[i - N].s, array[i + POS1 - N].s, r1, r2);
+		array[i].s = r;
+		r1 = r2;
+		r2 = r;
+	}
+	for (j = 0; j < 2 * N - size; j++) {
+		ctx->sfmt[j].s = array[j + size - N].s;
+	}
+	for (; i < size; i++) {
+		r = vec_recursion(
+		    array[i - N].s, array[i + POS1 - N].s, r1, r2);
+		array[i].s = r;
+		ctx->sfmt[j++].s = r;
+		r1 = r2;
+		r2 = r;
+	}
 }
 
 #ifndef ONLY64
-#if defined(__APPLE__)
-#define ALTI_SWAP (vector unsigned char) \
-	(4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11)
-#else
-#define ALTI_SWAP {4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11}
-#endif
+#	if defined(__APPLE__)
+#		define ALTI_SWAP                                              \
+			(vector unsigned char)(4, 5, 6, 7, 0, 1, 2, 3, 12, 13, \
+			    14, 15, 8, 9, 10, 11)
+#	else
+#		define ALTI_SWAP                                              \
+			{ 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11 }
+#	endif
 /**
  * This function swaps high and low 32-bit of 64-bit integers in user
  * specified array.
@@ -173,13 +181,15 @@ static inline void gen_rand_array(sfmt_t *ctx, w128_t *array, int size) {
  * @param array an 128-bit array to be swaped.
  * @param size size of 128-bit array.
  */
-static inline void swap(w128_t *array, int size) {
-    int i;
-    const vector unsigned char perm = ALTI_SWAP;
+static inline void
+swap(w128_t *array, int size) {
+	int                        i;
+	const vector unsigned char perm = ALTI_SWAP;
 
-    for (i = 0; i < size; i++) {
-	array[i].s = vec_perm(array[i].s, (vector unsigned int)perm, perm);
-    }
+	for (i = 0; i < size; i++) {
+		array[i].s = vec_perm(
+		    array[i].s, (vector unsigned int)perm, perm);
+	}
 }
 #endif
 
diff --git a/test/include/test/SFMT-params.h b/test/include/test/SFMT-params.h
index 6730adf8..4ff4316f 100644
--- a/test/include/test/SFMT-params.h
+++ b/test/include/test/SFMT-params.h
@@ -37,10 +37,10 @@
 #define SFMT_PARAMS_H
 
 #if !defined(MEXP)
-#ifdef __GNUC__
-  #warning "MEXP is not defined. I assume MEXP is 19937."
-#endif
-  #define MEXP 19937
+#	ifdef __GNUC__
+#		warning "MEXP is not defined. I assume MEXP is 19937."
+#	endif
+#	define MEXP 19937
 #endif
 /*-----------------
   BASIC DEFINITIONS
@@ -100,32 +100,32 @@
 */
 
 #if MEXP == 607
-  #include "test/SFMT-params607.h"
+#	include "test/SFMT-params607.h"
 #elif MEXP == 1279
-  #include "test/SFMT-params1279.h"
+#	include "test/SFMT-params1279.h"
 #elif MEXP == 2281
-  #include "test/SFMT-params2281.h"
+#	include "test/SFMT-params2281.h"
 #elif MEXP == 4253
-  #include "test/SFMT-params4253.h"
+#	include "test/SFMT-params4253.h"
 #elif MEXP == 11213
-  #include "test/SFMT-params11213.h"
+#	include "test/SFMT-params11213.h"
 #elif MEXP == 19937
-  #include "test/SFMT-params19937.h"
+#	include "test/SFMT-params19937.h"
 #elif MEXP == 44497
-  #include "test/SFMT-params44497.h"
+#	include "test/SFMT-params44497.h"
 #elif MEXP == 86243
-  #include "test/SFMT-params86243.h"
+#	include "test/SFMT-params86243.h"
 #elif MEXP == 132049
-  #include "test/SFMT-params132049.h"
+#	include "test/SFMT-params132049.h"
 #elif MEXP == 216091
-  #include "test/SFMT-params216091.h"
+#	include "test/SFMT-params216091.h"
 #else
-#ifdef __GNUC__
-  #error "MEXP is not valid."
-  #undef MEXP
-#else
-  #undef MEXP
-#endif
+#	ifdef __GNUC__
+#		error "MEXP is not valid."
+#		undef MEXP
+#	else
+#		undef MEXP
+#	endif
 
 #endif
 
diff --git a/test/include/test/SFMT-params11213.h b/test/include/test/SFMT-params11213.h
index 2994bd21..d2ab5b7c 100644
--- a/test/include/test/SFMT-params11213.h
+++ b/test/include/test/SFMT-params11213.h
@@ -36,46 +36,56 @@
 #ifndef SFMT_PARAMS11213_H
 #define SFMT_PARAMS11213_H
 
-#define POS1	68
-#define SL1	14
-#define SL2	3
-#define SR1	7
-#define SR2	3
-#define MSK1	0xeffff7fbU
-#define MSK2	0xffffffefU
-#define MSK3	0xdfdfbfffU
-#define MSK4	0x7fffdbfdU
-#define PARITY1	0x00000001U
-#define PARITY2	0x00000000U
-#define PARITY3	0xe8148000U
-#define PARITY4	0xd0c7afa3U
-
+#define POS1 68
+#define SL1 14
+#define SL2 3
+#define SR1 7
+#define SR2 3
+#define MSK1 0xeffff7fbU
+#define MSK2 0xffffffefU
+#define MSK3 0xdfdfbfffU
+#define MSK4 0x7fffdbfdU
+#define PARITY1 0x00000001U
+#define PARITY2 0x00000000U
+#define PARITY3 0xe8148000U
+#define PARITY4 0xd0c7afa3U
 
 /* PARAMETERS FOR ALTIVEC */
-#if defined(__APPLE__)	/* For OSX */
-    #define ALTI_SL1	(vector unsigned int)(SL1, SL1, SL1, SL1)
-    #define ALTI_SR1	(vector unsigned int)(SR1, SR1, SR1, SR1)
-    #define ALTI_MSK	(vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
-    #define ALTI_MSK64 \
-	(vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
-    #define ALTI_SL2_PERM \
-	(vector unsigned char)(3,21,21,21,7,0,1,2,11,4,5,6,15,8,9,10)
-    #define ALTI_SL2_PERM64 \
-	(vector unsigned char)(3,4,5,6,7,29,29,29,11,12,13,14,15,0,1,2)
-    #define ALTI_SR2_PERM \
-	(vector unsigned char)(5,6,7,0,9,10,11,4,13,14,15,8,19,19,19,12)
-    #define ALTI_SR2_PERM64 \
-	(vector unsigned char)(13,14,15,0,1,2,3,4,19,19,19,8,9,10,11,12)
-#else	/* For OTHER OSs(Linux?) */
-    #define ALTI_SL1	{SL1, SL1, SL1, SL1}
-    #define ALTI_SR1	{SR1, SR1, SR1, SR1}
-    #define ALTI_MSK	{MSK1, MSK2, MSK3, MSK4}
-    #define ALTI_MSK64	{MSK2, MSK1, MSK4, MSK3}
-    #define ALTI_SL2_PERM	{3,21,21,21,7,0,1,2,11,4,5,6,15,8,9,10}
-    #define ALTI_SL2_PERM64	{3,4,5,6,7,29,29,29,11,12,13,14,15,0,1,2}
-    #define ALTI_SR2_PERM	{5,6,7,0,9,10,11,4,13,14,15,8,19,19,19,12}
-    #define ALTI_SR2_PERM64	{13,14,15,0,1,2,3,4,19,19,19,8,9,10,11,12}
-#endif	/* For OSX */
-#define IDSTR	"SFMT-11213:68-14-3-7-3:effff7fb-ffffffef-dfdfbfff-7fffdbfd"
+#if defined(__APPLE__) /* For OSX */
+#	define ALTI_SL1 (vector unsigned int)(SL1, SL1, SL1, SL1)
+#	define ALTI_SR1 (vector unsigned int)(SR1, SR1, SR1, SR1)
+#	define ALTI_MSK (vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
+#	define ALTI_MSK64 (vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
+#	define ALTI_SL2_PERM                                                  \
+		(vector unsigned char)(3, 21, 21, 21, 7, 0, 1, 2, 11, 4, 5, 6, \
+		    15, 8, 9, 10)
+#	define ALTI_SL2_PERM64                                                \
+		(vector unsigned char)(3, 4, 5, 6, 7, 29, 29, 29, 11, 12, 13,  \
+		    14, 15, 0, 1, 2)
+#	define ALTI_SR2_PERM                                                  \
+		(vector unsigned char)(5, 6, 7, 0, 9, 10, 11, 4, 13, 14, 15,   \
+		    8, 19, 19, 19, 12)
+#	define ALTI_SR2_PERM64                                                \
+		(vector unsigned char)(13, 14, 15, 0, 1, 2, 3, 4, 19, 19, 19,  \
+		    8, 9, 10, 11, 12)
+#else /* For OTHER OSs(Linux?) */
+#	define ALTI_SL1                                                       \
+		{ SL1, SL1, SL1, SL1 }
+#	define ALTI_SR1                                                       \
+		{ SR1, SR1, SR1, SR1 }
+#	define ALTI_MSK                                                       \
+		{ MSK1, MSK2, MSK3, MSK4 }
+#	define ALTI_MSK64                                                     \
+		{ MSK2, MSK1, MSK4, MSK3 }
+#	define ALTI_SL2_PERM                                                  \
+		{ 3, 21, 21, 21, 7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10 }
+#	define ALTI_SL2_PERM64                                                \
+		{ 3, 4, 5, 6, 7, 29, 29, 29, 11, 12, 13, 14, 15, 0, 1, 2 }
+#	define ALTI_SR2_PERM                                                  \
+		{ 5, 6, 7, 0, 9, 10, 11, 4, 13, 14, 15, 8, 19, 19, 19, 12 }
+#	define ALTI_SR2_PERM64                                                \
+		{ 13, 14, 15, 0, 1, 2, 3, 4, 19, 19, 19, 8, 9, 10, 11, 12 }
+#endif /* For OSX */
+#define IDSTR "SFMT-11213:68-14-3-7-3:effff7fb-ffffffef-dfdfbfff-7fffdbfd"
 
 #endif /* SFMT_PARAMS11213_H */
diff --git a/test/include/test/SFMT-params1279.h b/test/include/test/SFMT-params1279.h
index d7959f98..1be5c01d 100644
--- a/test/include/test/SFMT-params1279.h
+++ b/test/include/test/SFMT-params1279.h
@@ -36,46 +36,56 @@
 #ifndef SFMT_PARAMS1279_H
 #define SFMT_PARAMS1279_H
 
-#define POS1	7
-#define SL1	14
-#define SL2	3
-#define SR1	5
-#define SR2	1
-#define MSK1	0xf7fefffdU
-#define MSK2	0x7fefcfffU
-#define MSK3	0xaff3ef3fU
-#define MSK4	0xb5ffff7fU
-#define PARITY1	0x00000001U
-#define PARITY2	0x00000000U
-#define PARITY3	0x00000000U
-#define PARITY4	0x20000000U
-
+#define POS1 7
+#define SL1 14
+#define SL2 3
+#define SR1 5
+#define SR2 1
+#define MSK1 0xf7fefffdU
+#define MSK2 0x7fefcfffU
+#define MSK3 0xaff3ef3fU
+#define MSK4 0xb5ffff7fU
+#define PARITY1 0x00000001U
+#define PARITY2 0x00000000U
+#define PARITY3 0x00000000U
+#define PARITY4 0x20000000U
 
 /* PARAMETERS FOR ALTIVEC */
-#if defined(__APPLE__)	/* For OSX */
-    #define ALTI_SL1	(vector unsigned int)(SL1, SL1, SL1, SL1)
-    #define ALTI_SR1	(vector unsigned int)(SR1, SR1, SR1, SR1)
-    #define ALTI_MSK	(vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
-    #define ALTI_MSK64 \
-	(vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
-    #define ALTI_SL2_PERM \
-	(vector unsigned char)(3,21,21,21,7,0,1,2,11,4,5,6,15,8,9,10)
-    #define ALTI_SL2_PERM64 \
-	(vector unsigned char)(3,4,5,6,7,29,29,29,11,12,13,14,15,0,1,2)
-    #define ALTI_SR2_PERM \
-	(vector unsigned char)(7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14)
-    #define ALTI_SR2_PERM64 \
-	(vector unsigned char)(15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14)
-#else	/* For OTHER OSs(Linux?) */
-    #define ALTI_SL1	{SL1, SL1, SL1, SL1}
-    #define ALTI_SR1	{SR1, SR1, SR1, SR1}
-    #define ALTI_MSK	{MSK1, MSK2, MSK3, MSK4}
-    #define ALTI_MSK64	{MSK2, MSK1, MSK4, MSK3}
-    #define ALTI_SL2_PERM	{3,21,21,21,7,0,1,2,11,4,5,6,15,8,9,10}
-    #define ALTI_SL2_PERM64	{3,4,5,6,7,29,29,29,11,12,13,14,15,0,1,2}
-    #define ALTI_SR2_PERM	{7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14}
-    #define ALTI_SR2_PERM64	{15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14}
-#endif	/* For OSX */
-#define IDSTR	"SFMT-1279:7-14-3-5-1:f7fefffd-7fefcfff-aff3ef3f-b5ffff7f"
+#if defined(__APPLE__) /* For OSX */
+#	define ALTI_SL1 (vector unsigned int)(SL1, SL1, SL1, SL1)
+#	define ALTI_SR1 (vector unsigned int)(SR1, SR1, SR1, SR1)
+#	define ALTI_MSK (vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
+#	define ALTI_MSK64 (vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
+#	define ALTI_SL2_PERM                                                  \
+		(vector unsigned char)(3, 21, 21, 21, 7, 0, 1, 2, 11, 4, 5, 6, \
+		    15, 8, 9, 10)
+#	define ALTI_SL2_PERM64                                                \
+		(vector unsigned char)(3, 4, 5, 6, 7, 29, 29, 29, 11, 12, 13,  \
+		    14, 15, 0, 1, 2)
+#	define ALTI_SR2_PERM                                                  \
+		(vector unsigned char)(7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10,  \
+		    17, 12, 13, 14)
+#	define ALTI_SR2_PERM64                                                \
+		(vector unsigned char)(15, 0, 1, 2, 3, 4, 5, 6, 17, 8, 9, 10,  \
+		    11, 12, 13, 14)
+#else /* For OTHER OSs(Linux?) */
+#	define ALTI_SL1                                                       \
+		{ SL1, SL1, SL1, SL1 }
+#	define ALTI_SR1                                                       \
+		{ SR1, SR1, SR1, SR1 }
+#	define ALTI_MSK                                                       \
+		{ MSK1, MSK2, MSK3, MSK4 }
+#	define ALTI_MSK64                                                     \
+		{ MSK2, MSK1, MSK4, MSK3 }
+#	define ALTI_SL2_PERM                                                  \
+		{ 3, 21, 21, 21, 7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10 }
+#	define ALTI_SL2_PERM64                                                \
+		{ 3, 4, 5, 6, 7, 29, 29, 29, 11, 12, 13, 14, 15, 0, 1, 2 }
+#	define ALTI_SR2_PERM                                                  \
+		{ 7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10, 17, 12, 13, 14 }
+#	define ALTI_SR2_PERM64                                                \
+		{ 15, 0, 1, 2, 3, 4, 5, 6, 17, 8, 9, 10, 11, 12, 13, 14 }
+#endif /* For OSX */
+#define IDSTR "SFMT-1279:7-14-3-5-1:f7fefffd-7fefcfff-aff3ef3f-b5ffff7f"
 
 #endif /* SFMT_PARAMS1279_H */
diff --git a/test/include/test/SFMT-params132049.h b/test/include/test/SFMT-params132049.h
index a1dcec39..1002614b 100644
--- a/test/include/test/SFMT-params132049.h
+++ b/test/include/test/SFMT-params132049.h
@@ -36,46 +36,56 @@
 #ifndef SFMT_PARAMS132049_H
 #define SFMT_PARAMS132049_H
 
-#define POS1	110
-#define SL1	19
-#define SL2	1
-#define SR1	21
-#define SR2	1
-#define MSK1	0xffffbb5fU
-#define MSK2	0xfb6ebf95U
-#define MSK3	0xfffefffaU
-#define MSK4	0xcff77fffU
-#define PARITY1	0x00000001U
-#define PARITY2	0x00000000U
-#define PARITY3	0xcb520000U
-#define PARITY4	0xc7e91c7dU
-
+#define POS1 110
+#define SL1 19
+#define SL2 1
+#define SR1 21
+#define SR2 1
+#define MSK1 0xffffbb5fU
+#define MSK2 0xfb6ebf95U
+#define MSK3 0xfffefffaU
+#define MSK4 0xcff77fffU
+#define PARITY1 0x00000001U
+#define PARITY2 0x00000000U
+#define PARITY3 0xcb520000U
+#define PARITY4 0xc7e91c7dU
 
 /* PARAMETERS FOR ALTIVEC */
-#if defined(__APPLE__)	/* For OSX */
-    #define ALTI_SL1	(vector unsigned int)(SL1, SL1, SL1, SL1)
-    #define ALTI_SR1	(vector unsigned int)(SR1, SR1, SR1, SR1)
-    #define ALTI_MSK	(vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
-    #define ALTI_MSK64 \
-	(vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
-    #define ALTI_SL2_PERM \
-	(vector unsigned char)(1,2,3,23,5,6,7,0,9,10,11,4,13,14,15,8)
-    #define ALTI_SL2_PERM64 \
-	(vector unsigned char)(1,2,3,4,5,6,7,31,9,10,11,12,13,14,15,0)
-    #define ALTI_SR2_PERM \
-	(vector unsigned char)(7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14)
-    #define ALTI_SR2_PERM64 \
-	(vector unsigned char)(15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14)
-#else	/* For OTHER OSs(Linux?) */
-    #define ALTI_SL1	{SL1, SL1, SL1, SL1}
-    #define ALTI_SR1	{SR1, SR1, SR1, SR1}
-    #define ALTI_MSK	{MSK1, MSK2, MSK3, MSK4}
-    #define ALTI_MSK64	{MSK2, MSK1, MSK4, MSK3}
-    #define ALTI_SL2_PERM	{1,2,3,23,5,6,7,0,9,10,11,4,13,14,15,8}
-    #define ALTI_SL2_PERM64	{1,2,3,4,5,6,7,31,9,10,11,12,13,14,15,0}
-    #define ALTI_SR2_PERM	{7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14}
-    #define ALTI_SR2_PERM64	{15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14}
-#endif	/* For OSX */
-#define IDSTR	"SFMT-132049:110-19-1-21-1:ffffbb5f-fb6ebf95-fffefffa-cff77fff"
+#if defined(__APPLE__) /* For OSX */
+#	define ALTI_SL1 (vector unsigned int)(SL1, SL1, SL1, SL1)
+#	define ALTI_SR1 (vector unsigned int)(SR1, SR1, SR1, SR1)
+#	define ALTI_MSK (vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
+#	define ALTI_MSK64 (vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
+#	define ALTI_SL2_PERM                                                  \
+		(vector unsigned char)(1, 2, 3, 23, 5, 6, 7, 0, 9, 10, 11, 4,  \
+		    13, 14, 15, 8)
+#	define ALTI_SL2_PERM64                                                \
+		(vector unsigned char)(1, 2, 3, 4, 5, 6, 7, 31, 9, 10, 11, 12, \
+		    13, 14, 15, 0)
+#	define ALTI_SR2_PERM                                                  \
+		(vector unsigned char)(7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10,  \
+		    17, 12, 13, 14)
+#	define ALTI_SR2_PERM64                                                \
+		(vector unsigned char)(15, 0, 1, 2, 3, 4, 5, 6, 17, 8, 9, 10,  \
+		    11, 12, 13, 14)
+#else /* For OTHER OSs(Linux?) */
+#	define ALTI_SL1                                                       \
+		{ SL1, SL1, SL1, SL1 }
+#	define ALTI_SR1                                                       \
+		{ SR1, SR1, SR1, SR1 }
+#	define ALTI_MSK                                                       \
+		{ MSK1, MSK2, MSK3, MSK4 }
+#	define ALTI_MSK64                                                     \
+		{ MSK2, MSK1, MSK4, MSK3 }
+#	define ALTI_SL2_PERM                                                  \
+		{ 1, 2, 3, 23, 5, 6, 7, 0, 9, 10, 11, 4, 13, 14, 15, 8 }
+#	define ALTI_SL2_PERM64                                                \
+		{ 1, 2, 3, 4, 5, 6, 7, 31, 9, 10, 11, 12, 13, 14, 15, 0 }
+#	define ALTI_SR2_PERM                                                  \
+		{ 7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10, 17, 12, 13, 14 }
+#	define ALTI_SR2_PERM64                                                \
+		{ 15, 0, 1, 2, 3, 4, 5, 6, 17, 8, 9, 10, 11, 12, 13, 14 }
+#endif /* For OSX */
+#define IDSTR "SFMT-132049:110-19-1-21-1:ffffbb5f-fb6ebf95-fffefffa-cff77fff"
 
 #endif /* SFMT_PARAMS132049_H */
diff --git a/test/include/test/SFMT-params19937.h b/test/include/test/SFMT-params19937.h
index fb92b4c9..71df2713 100644
--- a/test/include/test/SFMT-params19937.h
+++ b/test/include/test/SFMT-params19937.h
@@ -36,46 +36,56 @@
 #ifndef SFMT_PARAMS19937_H
 #define SFMT_PARAMS19937_H
 
-#define POS1	122
-#define SL1	18
-#define SL2	1
-#define SR1	11
-#define SR2	1
-#define MSK1	0xdfffffefU
-#define MSK2	0xddfecb7fU
-#define MSK3	0xbffaffffU
-#define MSK4	0xbffffff6U
-#define PARITY1	0x00000001U
-#define PARITY2	0x00000000U
-#define PARITY3	0x00000000U
-#define PARITY4	0x13c9e684U
-
+#define POS1 122
+#define SL1 18
+#define SL2 1
+#define SR1 11
+#define SR2 1
+#define MSK1 0xdfffffefU
+#define MSK2 0xddfecb7fU
+#define MSK3 0xbffaffffU
+#define MSK4 0xbffffff6U
+#define PARITY1 0x00000001U
+#define PARITY2 0x00000000U
+#define PARITY3 0x00000000U
+#define PARITY4 0x13c9e684U
 
 /* PARAMETERS FOR ALTIVEC */
-#if defined(__APPLE__)	/* For OSX */
-    #define ALTI_SL1	(vector unsigned int)(SL1, SL1, SL1, SL1)
-    #define ALTI_SR1	(vector unsigned int)(SR1, SR1, SR1, SR1)
-    #define ALTI_MSK	(vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
-    #define ALTI_MSK64 \
-	(vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
-    #define ALTI_SL2_PERM \
-	(vector unsigned char)(1,2,3,23,5,6,7,0,9,10,11,4,13,14,15,8)
-    #define ALTI_SL2_PERM64 \
-	(vector unsigned char)(1,2,3,4,5,6,7,31,9,10,11,12,13,14,15,0)
-    #define ALTI_SR2_PERM \
-	(vector unsigned char)(7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14)
-    #define ALTI_SR2_PERM64 \
-	(vector unsigned char)(15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14)
-#else	/* For OTHER OSs(Linux?) */
-    #define ALTI_SL1	{SL1, SL1, SL1, SL1}
-    #define ALTI_SR1	{SR1, SR1, SR1, SR1}
-    #define ALTI_MSK	{MSK1, MSK2, MSK3, MSK4}
-    #define ALTI_MSK64	{MSK2, MSK1, MSK4, MSK3}
-    #define ALTI_SL2_PERM	{1,2,3,23,5,6,7,0,9,10,11,4,13,14,15,8}
-    #define ALTI_SL2_PERM64	{1,2,3,4,5,6,7,31,9,10,11,12,13,14,15,0}
-    #define ALTI_SR2_PERM	{7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14}
-    #define ALTI_SR2_PERM64	{15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14}
-#endif	/* For OSX */
-#define IDSTR	"SFMT-19937:122-18-1-11-1:dfffffef-ddfecb7f-bffaffff-bffffff6"
+#if defined(__APPLE__) /* For OSX */
+#	define ALTI_SL1 (vector unsigned int)(SL1, SL1, SL1, SL1)
+#	define ALTI_SR1 (vector unsigned int)(SR1, SR1, SR1, SR1)
+#	define ALTI_MSK (vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
+#	define ALTI_MSK64 (vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
+#	define ALTI_SL2_PERM                                                  \
+		(vector unsigned char)(1, 2, 3, 23, 5, 6, 7, 0, 9, 10, 11, 4,  \
+		    13, 14, 15, 8)
+#	define ALTI_SL2_PERM64                                                \
+		(vector unsigned char)(1, 2, 3, 4, 5, 6, 7, 31, 9, 10, 11, 12, \
+		    13, 14, 15, 0)
+#	define ALTI_SR2_PERM                                                  \
+		(vector unsigned char)(7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10,  \
+		    17, 12, 13, 14)
+#	define ALTI_SR2_PERM64                                                \
+		(vector unsigned char)(15, 0, 1, 2, 3, 4, 5, 6, 17, 8, 9, 10,  \
+		    11, 12, 13, 14)
+#else /* For OTHER OSs(Linux?) */
+#	define ALTI_SL1                                                       \
+		{ SL1, SL1, SL1, SL1 }
+#	define ALTI_SR1                                                       \
+		{ SR1, SR1, SR1, SR1 }
+#	define ALTI_MSK                                                       \
+		{ MSK1, MSK2, MSK3, MSK4 }
+#	define ALTI_MSK64                                                     \
+		{ MSK2, MSK1, MSK4, MSK3 }
+#	define ALTI_SL2_PERM                                                  \
+		{ 1, 2, 3, 23, 5, 6, 7, 0, 9, 10, 11, 4, 13, 14, 15, 8 }
+#	define ALTI_SL2_PERM64                                                \
+		{ 1, 2, 3, 4, 5, 6, 7, 31, 9, 10, 11, 12, 13, 14, 15, 0 }
+#	define ALTI_SR2_PERM                                                  \
+		{ 7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10, 17, 12, 13, 14 }
+#	define ALTI_SR2_PERM64                                                \
+		{ 15, 0, 1, 2, 3, 4, 5, 6, 17, 8, 9, 10, 11, 12, 13, 14 }
+#endif /* For OSX */
+#define IDSTR "SFMT-19937:122-18-1-11-1:dfffffef-ddfecb7f-bffaffff-bffffff6"
 
 #endif /* SFMT_PARAMS19937_H */
diff --git a/test/include/test/SFMT-params216091.h b/test/include/test/SFMT-params216091.h
index 125ce282..d2d240e2 100644
--- a/test/include/test/SFMT-params216091.h
+++ b/test/include/test/SFMT-params216091.h
@@ -36,46 +36,56 @@
 #ifndef SFMT_PARAMS216091_H
 #define SFMT_PARAMS216091_H
 
-#define POS1	627
-#define SL1	11
-#define SL2	3
-#define SR1	10
-#define SR2	1
-#define MSK1	0xbff7bff7U
-#define MSK2	0xbfffffffU
-#define MSK3	0xbffffa7fU
-#define MSK4	0xffddfbfbU
-#define PARITY1	0xf8000001U
-#define PARITY2	0x89e80709U
-#define PARITY3	0x3bd2b64bU
-#define PARITY4	0x0c64b1e4U
-
+#define POS1 627
+#define SL1 11
+#define SL2 3
+#define SR1 10
+#define SR2 1
+#define MSK1 0xbff7bff7U
+#define MSK2 0xbfffffffU
+#define MSK3 0xbffffa7fU
+#define MSK4 0xffddfbfbU
+#define PARITY1 0xf8000001U
+#define PARITY2 0x89e80709U
+#define PARITY3 0x3bd2b64bU
+#define PARITY4 0x0c64b1e4U
 
 /* PARAMETERS FOR ALTIVEC */
-#if defined(__APPLE__)	/* For OSX */
-    #define ALTI_SL1	(vector unsigned int)(SL1, SL1, SL1, SL1)
-    #define ALTI_SR1	(vector unsigned int)(SR1, SR1, SR1, SR1)
-    #define ALTI_MSK	(vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
-    #define ALTI_MSK64 \
-	(vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
-    #define ALTI_SL2_PERM \
-	(vector unsigned char)(3,21,21,21,7,0,1,2,11,4,5,6,15,8,9,10)
-    #define ALTI_SL2_PERM64 \
-	(vector unsigned char)(3,4,5,6,7,29,29,29,11,12,13,14,15,0,1,2)
-    #define ALTI_SR2_PERM \
-	(vector unsigned char)(7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14)
-    #define ALTI_SR2_PERM64 \
-	(vector unsigned char)(15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14)
-#else	/* For OTHER OSs(Linux?) */
-    #define ALTI_SL1	{SL1, SL1, SL1, SL1}
-    #define ALTI_SR1	{SR1, SR1, SR1, SR1}
-    #define ALTI_MSK	{MSK1, MSK2, MSK3, MSK4}
-    #define ALTI_MSK64	{MSK2, MSK1, MSK4, MSK3}
-    #define ALTI_SL2_PERM	{3,21,21,21,7,0,1,2,11,4,5,6,15,8,9,10}
-    #define ALTI_SL2_PERM64	{3,4,5,6,7,29,29,29,11,12,13,14,15,0,1,2}
-    #define ALTI_SR2_PERM	{7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14}
-    #define ALTI_SR2_PERM64	{15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14}
-#endif	/* For OSX */
-#define IDSTR	"SFMT-216091:627-11-3-10-1:bff7bff7-bfffffff-bffffa7f-ffddfbfb"
+#if defined(__APPLE__) /* For OSX */
+#	define ALTI_SL1 (vector unsigned int)(SL1, SL1, SL1, SL1)
+#	define ALTI_SR1 (vector unsigned int)(SR1, SR1, SR1, SR1)
+#	define ALTI_MSK (vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
+#	define ALTI_MSK64 (vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
+#	define ALTI_SL2_PERM                                                  \
+		(vector unsigned char)(3, 21, 21, 21, 7, 0, 1, 2, 11, 4, 5, 6, \
+		    15, 8, 9, 10)
+#	define ALTI_SL2_PERM64                                                \
+		(vector unsigned char)(3, 4, 5, 6, 7, 29, 29, 29, 11, 12, 13,  \
+		    14, 15, 0, 1, 2)
+#	define ALTI_SR2_PERM                                                  \
+		(vector unsigned char)(7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10,  \
+		    17, 12, 13, 14)
+#	define ALTI_SR2_PERM64                                                \
+		(vector unsigned char)(15, 0, 1, 2, 3, 4, 5, 6, 17, 8, 9, 10,  \
+		    11, 12, 13, 14)
+#else /* For OTHER OSs(Linux?) */
+#	define ALTI_SL1                                                       \
+		{ SL1, SL1, SL1, SL1 }
+#	define ALTI_SR1                                                       \
+		{ SR1, SR1, SR1, SR1 }
+#	define ALTI_MSK                                                       \
+		{ MSK1, MSK2, MSK3, MSK4 }
+#	define ALTI_MSK64                                                     \
+		{ MSK2, MSK1, MSK4, MSK3 }
+#	define ALTI_SL2_PERM                                                  \
+		{ 3, 21, 21, 21, 7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10 }
+#	define ALTI_SL2_PERM64                                                \
+		{ 3, 4, 5, 6, 7, 29, 29, 29, 11, 12, 13, 14, 15, 0, 1, 2 }
+#	define ALTI_SR2_PERM                                                  \
+		{ 7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10, 17, 12, 13, 14 }
+#	define ALTI_SR2_PERM64                                                \
+		{ 15, 0, 1, 2, 3, 4, 5, 6, 17, 8, 9, 10, 11, 12, 13, 14 }
+#endif /* For OSX */
+#define IDSTR "SFMT-216091:627-11-3-10-1:bff7bff7-bfffffff-bffffa7f-ffddfbfb"
 
 #endif /* SFMT_PARAMS216091_H */
diff --git a/test/include/test/SFMT-params2281.h b/test/include/test/SFMT-params2281.h
index 0ef85c40..97b8de68 100644
--- a/test/include/test/SFMT-params2281.h
+++ b/test/include/test/SFMT-params2281.h
@@ -36,46 +36,56 @@
 #ifndef SFMT_PARAMS2281_H
 #define SFMT_PARAMS2281_H
 
-#define POS1	12
-#define SL1	19
-#define SL2	1
-#define SR1	5
-#define SR2	1
-#define MSK1	0xbff7ffbfU
-#define MSK2	0xfdfffffeU
-#define MSK3	0xf7ffef7fU
-#define MSK4	0xf2f7cbbfU
-#define PARITY1	0x00000001U
-#define PARITY2	0x00000000U
-#define PARITY3	0x00000000U
-#define PARITY4	0x41dfa600U
-
+#define POS1 12
+#define SL1 19
+#define SL2 1
+#define SR1 5
+#define SR2 1
+#define MSK1 0xbff7ffbfU
+#define MSK2 0xfdfffffeU
+#define MSK3 0xf7ffef7fU
+#define MSK4 0xf2f7cbbfU
+#define PARITY1 0x00000001U
+#define PARITY2 0x00000000U
+#define PARITY3 0x00000000U
+#define PARITY4 0x41dfa600U
 
 /* PARAMETERS FOR ALTIVEC */
-#if defined(__APPLE__)	/* For OSX */
-    #define ALTI_SL1	(vector unsigned int)(SL1, SL1, SL1, SL1)
-    #define ALTI_SR1	(vector unsigned int)(SR1, SR1, SR1, SR1)
-    #define ALTI_MSK	(vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
-    #define ALTI_MSK64 \
-	(vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
-    #define ALTI_SL2_PERM \
-	(vector unsigned char)(1,2,3,23,5,6,7,0,9,10,11,4,13,14,15,8)
-    #define ALTI_SL2_PERM64 \
-	(vector unsigned char)(1,2,3,4,5,6,7,31,9,10,11,12,13,14,15,0)
-    #define ALTI_SR2_PERM \
-	(vector unsigned char)(7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14)
-    #define ALTI_SR2_PERM64 \
-	(vector unsigned char)(15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14)
-#else	/* For OTHER OSs(Linux?) */
-    #define ALTI_SL1	{SL1, SL1, SL1, SL1}
-    #define ALTI_SR1	{SR1, SR1, SR1, SR1}
-    #define ALTI_MSK	{MSK1, MSK2, MSK3, MSK4}
-    #define ALTI_MSK64	{MSK2, MSK1, MSK4, MSK3}
-    #define ALTI_SL2_PERM	{1,2,3,23,5,6,7,0,9,10,11,4,13,14,15,8}
-    #define ALTI_SL2_PERM64	{1,2,3,4,5,6,7,31,9,10,11,12,13,14,15,0}
-    #define ALTI_SR2_PERM	{7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14}
-    #define ALTI_SR2_PERM64	{15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14}
-#endif	/* For OSX */
-#define IDSTR	"SFMT-2281:12-19-1-5-1:bff7ffbf-fdfffffe-f7ffef7f-f2f7cbbf"
+#if defined(__APPLE__) /* For OSX */
+#	define ALTI_SL1 (vector unsigned int)(SL1, SL1, SL1, SL1)
+#	define ALTI_SR1 (vector unsigned int)(SR1, SR1, SR1, SR1)
+#	define ALTI_MSK (vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
+#	define ALTI_MSK64 (vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
+#	define ALTI_SL2_PERM                                                  \
+		(vector unsigned char)(1, 2, 3, 23, 5, 6, 7, 0, 9, 10, 11, 4,  \
+		    13, 14, 15, 8)
+#	define ALTI_SL2_PERM64                                                \
+		(vector unsigned char)(1, 2, 3, 4, 5, 6, 7, 31, 9, 10, 11, 12, \
+		    13, 14, 15, 0)
+#	define ALTI_SR2_PERM                                                  \
+		(vector unsigned char)(7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10,  \
+		    17, 12, 13, 14)
+#	define ALTI_SR2_PERM64                                                \
+		(vector unsigned char)(15, 0, 1, 2, 3, 4, 5, 6, 17, 8, 9, 10,  \
+		    11, 12, 13, 14)
+#else /* For OTHER OSs(Linux?) */
+#	define ALTI_SL1                                                       \
+		{ SL1, SL1, SL1, SL1 }
+#	define ALTI_SR1                                                       \
+		{ SR1, SR1, SR1, SR1 }
+#	define ALTI_MSK                                                       \
+		{ MSK1, MSK2, MSK3, MSK4 }
+#	define ALTI_MSK64                                                     \
+		{ MSK2, MSK1, MSK4, MSK3 }
+#	define ALTI_SL2_PERM                                                  \
+		{ 1, 2, 3, 23, 5, 6, 7, 0, 9, 10, 11, 4, 13, 14, 15, 8 }
+#	define ALTI_SL2_PERM64                                                \
+		{ 1, 2, 3, 4, 5, 6, 7, 31, 9, 10, 11, 12, 13, 14, 15, 0 }
+#	define ALTI_SR2_PERM                                                  \
+		{ 7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10, 17, 12, 13, 14 }
+#	define ALTI_SR2_PERM64                                                \
+		{ 15, 0, 1, 2, 3, 4, 5, 6, 17, 8, 9, 10, 11, 12, 13, 14 }
+#endif /* For OSX */
+#define IDSTR "SFMT-2281:12-19-1-5-1:bff7ffbf-fdfffffe-f7ffef7f-f2f7cbbf"
 
 #endif /* SFMT_PARAMS2281_H */
diff --git a/test/include/test/SFMT-params4253.h b/test/include/test/SFMT-params4253.h
index 9f07bc67..7e51edd8 100644
--- a/test/include/test/SFMT-params4253.h
+++ b/test/include/test/SFMT-params4253.h
@@ -36,46 +36,56 @@
 #ifndef SFMT_PARAMS4253_H
 #define SFMT_PARAMS4253_H
 
-#define POS1	17
-#define SL1	20
-#define SL2	1
-#define SR1	7
-#define SR2	1
-#define MSK1	0x9f7bffffU
-#define MSK2	0x9fffff5fU
-#define MSK3	0x3efffffbU
-#define MSK4	0xfffff7bbU
-#define PARITY1	0xa8000001U
-#define PARITY2	0xaf5390a3U
-#define PARITY3	0xb740b3f8U
-#define PARITY4	0x6c11486dU
-
+#define POS1 17
+#define SL1 20
+#define SL2 1
+#define SR1 7
+#define SR2 1
+#define MSK1 0x9f7bffffU
+#define MSK2 0x9fffff5fU
+#define MSK3 0x3efffffbU
+#define MSK4 0xfffff7bbU
+#define PARITY1 0xa8000001U
+#define PARITY2 0xaf5390a3U
+#define PARITY3 0xb740b3f8U
+#define PARITY4 0x6c11486dU
 
 /* PARAMETERS FOR ALTIVEC */
-#if defined(__APPLE__)	/* For OSX */
-    #define ALTI_SL1	(vector unsigned int)(SL1, SL1, SL1, SL1)
-    #define ALTI_SR1	(vector unsigned int)(SR1, SR1, SR1, SR1)
-    #define ALTI_MSK	(vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
-    #define ALTI_MSK64 \
-	(vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
-    #define ALTI_SL2_PERM \
-	(vector unsigned char)(1,2,3,23,5,6,7,0,9,10,11,4,13,14,15,8)
-    #define ALTI_SL2_PERM64 \
-	(vector unsigned char)(1,2,3,4,5,6,7,31,9,10,11,12,13,14,15,0)
-    #define ALTI_SR2_PERM \
-	(vector unsigned char)(7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14)
-    #define ALTI_SR2_PERM64 \
-	(vector unsigned char)(15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14)
-#else	/* For OTHER OSs(Linux?) */
-    #define ALTI_SL1	{SL1, SL1, SL1, SL1}
-    #define ALTI_SR1	{SR1, SR1, SR1, SR1}
-    #define ALTI_MSK	{MSK1, MSK2, MSK3, MSK4}
-    #define ALTI_MSK64	{MSK2, MSK1, MSK4, MSK3}
-    #define ALTI_SL2_PERM	{1,2,3,23,5,6,7,0,9,10,11,4,13,14,15,8}
-    #define ALTI_SL2_PERM64	{1,2,3,4,5,6,7,31,9,10,11,12,13,14,15,0}
-    #define ALTI_SR2_PERM	{7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14}
-    #define ALTI_SR2_PERM64	{15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14}
-#endif	/* For OSX */
-#define IDSTR	"SFMT-4253:17-20-1-7-1:9f7bffff-9fffff5f-3efffffb-fffff7bb"
+#if defined(__APPLE__) /* For OSX */
+#	define ALTI_SL1 (vector unsigned int)(SL1, SL1, SL1, SL1)
+#	define ALTI_SR1 (vector unsigned int)(SR1, SR1, SR1, SR1)
+#	define ALTI_MSK (vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
+#	define ALTI_MSK64 (vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
+#	define ALTI_SL2_PERM                                                  \
+		(vector unsigned char)(1, 2, 3, 23, 5, 6, 7, 0, 9, 10, 11, 4,  \
+		    13, 14, 15, 8)
+#	define ALTI_SL2_PERM64                                                \
+		(vector unsigned char)(1, 2, 3, 4, 5, 6, 7, 31, 9, 10, 11, 12, \
+		    13, 14, 15, 0)
+#	define ALTI_SR2_PERM                                                  \
+		(vector unsigned char)(7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10,  \
+		    17, 12, 13, 14)
+#	define ALTI_SR2_PERM64                                                \
+		(vector unsigned char)(15, 0, 1, 2, 3, 4, 5, 6, 17, 8, 9, 10,  \
+		    11, 12, 13, 14)
+#else /* For OTHER OSs(Linux?) */
+#	define ALTI_SL1                                                       \
+		{ SL1, SL1, SL1, SL1 }
+#	define ALTI_SR1                                                       \
+		{ SR1, SR1, SR1, SR1 }
+#	define ALTI_MSK                                                       \
+		{ MSK1, MSK2, MSK3, MSK4 }
+#	define ALTI_MSK64                                                     \
+		{ MSK2, MSK1, MSK4, MSK3 }
+#	define ALTI_SL2_PERM                                                  \
+		{ 1, 2, 3, 23, 5, 6, 7, 0, 9, 10, 11, 4, 13, 14, 15, 8 }
+#	define ALTI_SL2_PERM64                                                \
+		{ 1, 2, 3, 4, 5, 6, 7, 31, 9, 10, 11, 12, 13, 14, 15, 0 }
+#	define ALTI_SR2_PERM                                                  \
+		{ 7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10, 17, 12, 13, 14 }
+#	define ALTI_SR2_PERM64                                                \
+		{ 15, 0, 1, 2, 3, 4, 5, 6, 17, 8, 9, 10, 11, 12, 13, 14 }
+#endif /* For OSX */
+#define IDSTR "SFMT-4253:17-20-1-7-1:9f7bffff-9fffff5f-3efffffb-fffff7bb"
 
 #endif /* SFMT_PARAMS4253_H */
diff --git a/test/include/test/SFMT-params44497.h b/test/include/test/SFMT-params44497.h
index 85598fed..8f6fee7b 100644
--- a/test/include/test/SFMT-params44497.h
+++ b/test/include/test/SFMT-params44497.h
@@ -36,46 +36,56 @@
 #ifndef SFMT_PARAMS44497_H
 #define SFMT_PARAMS44497_H
 
-#define POS1	330
-#define SL1	5
-#define SL2	3
-#define SR1	9
-#define SR2	3
-#define MSK1	0xeffffffbU
-#define MSK2	0xdfbebfffU
-#define MSK3	0xbfbf7befU
-#define MSK4	0x9ffd7bffU
-#define PARITY1	0x00000001U
-#define PARITY2	0x00000000U
-#define PARITY3	0xa3ac4000U
-#define PARITY4	0xecc1327aU
-
+#define POS1 330
+#define SL1 5
+#define SL2 3
+#define SR1 9
+#define SR2 3
+#define MSK1 0xeffffffbU
+#define MSK2 0xdfbebfffU
+#define MSK3 0xbfbf7befU
+#define MSK4 0x9ffd7bffU
+#define PARITY1 0x00000001U
+#define PARITY2 0x00000000U
+#define PARITY3 0xa3ac4000U
+#define PARITY4 0xecc1327aU
 
 /* PARAMETERS FOR ALTIVEC */
-#if defined(__APPLE__)	/* For OSX */
-    #define ALTI_SL1	(vector unsigned int)(SL1, SL1, SL1, SL1)
-    #define ALTI_SR1	(vector unsigned int)(SR1, SR1, SR1, SR1)
-    #define ALTI_MSK	(vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
-    #define ALTI_MSK64 \
-	(vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
-    #define ALTI_SL2_PERM \
-	(vector unsigned char)(3,21,21,21,7,0,1,2,11,4,5,6,15,8,9,10)
-    #define ALTI_SL2_PERM64 \
-	(vector unsigned char)(3,4,5,6,7,29,29,29,11,12,13,14,15,0,1,2)
-    #define ALTI_SR2_PERM \
-	(vector unsigned char)(5,6,7,0,9,10,11,4,13,14,15,8,19,19,19,12)
-    #define ALTI_SR2_PERM64 \
-	(vector unsigned char)(13,14,15,0,1,2,3,4,19,19,19,8,9,10,11,12)
-#else	/* For OTHER OSs(Linux?) */
-    #define ALTI_SL1	{SL1, SL1, SL1, SL1}
-    #define ALTI_SR1	{SR1, SR1, SR1, SR1}
-    #define ALTI_MSK	{MSK1, MSK2, MSK3, MSK4}
-    #define ALTI_MSK64	{MSK2, MSK1, MSK4, MSK3}
-    #define ALTI_SL2_PERM	{3,21,21,21,7,0,1,2,11,4,5,6,15,8,9,10}
-    #define ALTI_SL2_PERM64	{3,4,5,6,7,29,29,29,11,12,13,14,15,0,1,2}
-    #define ALTI_SR2_PERM	{5,6,7,0,9,10,11,4,13,14,15,8,19,19,19,12}
-    #define ALTI_SR2_PERM64	{13,14,15,0,1,2,3,4,19,19,19,8,9,10,11,12}
-#endif	/* For OSX */
-#define IDSTR	"SFMT-44497:330-5-3-9-3:effffffb-dfbebfff-bfbf7bef-9ffd7bff"
+#if defined(__APPLE__) /* For OSX */
+#	define ALTI_SL1 (vector unsigned int)(SL1, SL1, SL1, SL1)
+#	define ALTI_SR1 (vector unsigned int)(SR1, SR1, SR1, SR1)
+#	define ALTI_MSK (vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
+#	define ALTI_MSK64 (vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
+#	define ALTI_SL2_PERM                                                  \
+		(vector unsigned char)(3, 21, 21, 21, 7, 0, 1, 2, 11, 4, 5, 6, \
+		    15, 8, 9, 10)
+#	define ALTI_SL2_PERM64                                                \
+		(vector unsigned char)(3, 4, 5, 6, 7, 29, 29, 29, 11, 12, 13,  \
+		    14, 15, 0, 1, 2)
+#	define ALTI_SR2_PERM                                                  \
+		(vector unsigned char)(5, 6, 7, 0, 9, 10, 11, 4, 13, 14, 15,   \
+		    8, 19, 19, 19, 12)
+#	define ALTI_SR2_PERM64                                                \
+		(vector unsigned char)(13, 14, 15, 0, 1, 2, 3, 4, 19, 19, 19,  \
+		    8, 9, 10, 11, 12)
+#else /* For OTHER OSs(Linux?) */
+#	define ALTI_SL1                                                       \
+		{ SL1, SL1, SL1, SL1 }
+#	define ALTI_SR1                                                       \
+		{ SR1, SR1, SR1, SR1 }
+#	define ALTI_MSK                                                       \
+		{ MSK1, MSK2, MSK3, MSK4 }
+#	define ALTI_MSK64                                                     \
+		{ MSK2, MSK1, MSK4, MSK3 }
+#	define ALTI_SL2_PERM                                                  \
+		{ 3, 21, 21, 21, 7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10 }
+#	define ALTI_SL2_PERM64                                                \
+		{ 3, 4, 5, 6, 7, 29, 29, 29, 11, 12, 13, 14, 15, 0, 1, 2 }
+#	define ALTI_SR2_PERM                                                  \
+		{ 5, 6, 7, 0, 9, 10, 11, 4, 13, 14, 15, 8, 19, 19, 19, 12 }
+#	define ALTI_SR2_PERM64                                                \
+		{ 13, 14, 15, 0, 1, 2, 3, 4, 19, 19, 19, 8, 9, 10, 11, 12 }
+#endif /* For OSX */
+#define IDSTR "SFMT-44497:330-5-3-9-3:effffffb-dfbebfff-bfbf7bef-9ffd7bff"
 
 #endif /* SFMT_PARAMS44497_H */
diff --git a/test/include/test/SFMT-params607.h b/test/include/test/SFMT-params607.h
index bc76485f..29fb3913 100644
--- a/test/include/test/SFMT-params607.h
+++ b/test/include/test/SFMT-params607.h
@@ -36,46 +36,56 @@
 #ifndef SFMT_PARAMS607_H
 #define SFMT_PARAMS607_H
 
-#define POS1	2
-#define SL1	15
-#define SL2	3
-#define SR1	13
-#define SR2	3
-#define MSK1	0xfdff37ffU
-#define MSK2	0xef7f3f7dU
-#define MSK3	0xff777b7dU
-#define MSK4	0x7ff7fb2fU
-#define PARITY1	0x00000001U
-#define PARITY2	0x00000000U
-#define PARITY3	0x00000000U
-#define PARITY4	0x5986f054U
-
+#define POS1 2
+#define SL1 15
+#define SL2 3
+#define SR1 13
+#define SR2 3
+#define MSK1 0xfdff37ffU
+#define MSK2 0xef7f3f7dU
+#define MSK3 0xff777b7dU
+#define MSK4 0x7ff7fb2fU
+#define PARITY1 0x00000001U
+#define PARITY2 0x00000000U
+#define PARITY3 0x00000000U
+#define PARITY4 0x5986f054U
 
 /* PARAMETERS FOR ALTIVEC */
-#if defined(__APPLE__)	/* For OSX */
-    #define ALTI_SL1	(vector unsigned int)(SL1, SL1, SL1, SL1)
-    #define ALTI_SR1	(vector unsigned int)(SR1, SR1, SR1, SR1)
-    #define ALTI_MSK	(vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
-    #define ALTI_MSK64 \
-	(vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
-    #define ALTI_SL2_PERM \
-	(vector unsigned char)(3,21,21,21,7,0,1,2,11,4,5,6,15,8,9,10)
-    #define ALTI_SL2_PERM64 \
-	(vector unsigned char)(3,4,5,6,7,29,29,29,11,12,13,14,15,0,1,2)
-    #define ALTI_SR2_PERM \
-	(vector unsigned char)(5,6,7,0,9,10,11,4,13,14,15,8,19,19,19,12)
-    #define ALTI_SR2_PERM64 \
-	(vector unsigned char)(13,14,15,0,1,2,3,4,19,19,19,8,9,10,11,12)
-#else	/* For OTHER OSs(Linux?) */
-    #define ALTI_SL1	{SL1, SL1, SL1, SL1}
-    #define ALTI_SR1	{SR1, SR1, SR1, SR1}
-    #define ALTI_MSK	{MSK1, MSK2, MSK3, MSK4}
-    #define ALTI_MSK64	{MSK2, MSK1, MSK4, MSK3}
-    #define ALTI_SL2_PERM	{3,21,21,21,7,0,1,2,11,4,5,6,15,8,9,10}
-    #define ALTI_SL2_PERM64	{3,4,5,6,7,29,29,29,11,12,13,14,15,0,1,2}
-    #define ALTI_SR2_PERM	{5,6,7,0,9,10,11,4,13,14,15,8,19,19,19,12}
-    #define ALTI_SR2_PERM64	{13,14,15,0,1,2,3,4,19,19,19,8,9,10,11,12}
-#endif	/* For OSX */
-#define IDSTR	"SFMT-607:2-15-3-13-3:fdff37ff-ef7f3f7d-ff777b7d-7ff7fb2f"
+#if defined(__APPLE__) /* For OSX */
+#	define ALTI_SL1 (vector unsigned int)(SL1, SL1, SL1, SL1)
+#	define ALTI_SR1 (vector unsigned int)(SR1, SR1, SR1, SR1)
+#	define ALTI_MSK (vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
+#	define ALTI_MSK64 (vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
+#	define ALTI_SL2_PERM                                                  \
+		(vector unsigned char)(3, 21, 21, 21, 7, 0, 1, 2, 11, 4, 5, 6, \
+		    15, 8, 9, 10)
+#	define ALTI_SL2_PERM64                                                \
+		(vector unsigned char)(3, 4, 5, 6, 7, 29, 29, 29, 11, 12, 13,  \
+		    14, 15, 0, 1, 2)
+#	define ALTI_SR2_PERM                                                  \
+		(vector unsigned char)(5, 6, 7, 0, 9, 10, 11, 4, 13, 14, 15,   \
+		    8, 19, 19, 19, 12)
+#	define ALTI_SR2_PERM64                                                \
+		(vector unsigned char)(13, 14, 15, 0, 1, 2, 3, 4, 19, 19, 19,  \
+		    8, 9, 10, 11, 12)
+#else /* For OTHER OSs(Linux?) */
+#	define ALTI_SL1                                                       \
+		{ SL1, SL1, SL1, SL1 }
+#	define ALTI_SR1                                                       \
+		{ SR1, SR1, SR1, SR1 }
+#	define ALTI_MSK                                                       \
+		{ MSK1, MSK2, MSK3, MSK4 }
+#	define ALTI_MSK64                                                     \
+		{ MSK2, MSK1, MSK4, MSK3 }
+#	define ALTI_SL2_PERM                                                  \
+		{ 3, 21, 21, 21, 7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10 }
+#	define ALTI_SL2_PERM64                                                \
+		{ 3, 4, 5, 6, 7, 29, 29, 29, 11, 12, 13, 14, 15, 0, 1, 2 }
+#	define ALTI_SR2_PERM                                                  \
+		{ 5, 6, 7, 0, 9, 10, 11, 4, 13, 14, 15, 8, 19, 19, 19, 12 }
+#	define ALTI_SR2_PERM64                                                \
+		{ 13, 14, 15, 0, 1, 2, 3, 4, 19, 19, 19, 8, 9, 10, 11, 12 }
+#endif /* For OSX */
+#define IDSTR "SFMT-607:2-15-3-13-3:fdff37ff-ef7f3f7d-ff777b7d-7ff7fb2f"
 
 #endif /* SFMT_PARAMS607_H */
diff --git a/test/include/test/SFMT-params86243.h b/test/include/test/SFMT-params86243.h
index 5e4d783c..5e3747e9 100644
--- a/test/include/test/SFMT-params86243.h
+++ b/test/include/test/SFMT-params86243.h
@@ -36,46 +36,56 @@
 #ifndef SFMT_PARAMS86243_H
 #define SFMT_PARAMS86243_H
 
-#define POS1	366
-#define SL1	6
-#define SL2	7
-#define SR1	19
-#define SR2	1
-#define MSK1	0xfdbffbffU
-#define MSK2	0xbff7ff3fU
-#define MSK3	0xfd77efffU
-#define MSK4	0xbf9ff3ffU
-#define PARITY1	0x00000001U
-#define PARITY2	0x00000000U
-#define PARITY3	0x00000000U
-#define PARITY4	0xe9528d85U
-
+#define POS1 366
+#define SL1 6
+#define SL2 7
+#define SR1 19
+#define SR2 1
+#define MSK1 0xfdbffbffU
+#define MSK2 0xbff7ff3fU
+#define MSK3 0xfd77efffU
+#define MSK4 0xbf9ff3ffU
+#define PARITY1 0x00000001U
+#define PARITY2 0x00000000U
+#define PARITY3 0x00000000U
+#define PARITY4 0xe9528d85U
 
 /* PARAMETERS FOR ALTIVEC */
-#if defined(__APPLE__)	/* For OSX */
-    #define ALTI_SL1	(vector unsigned int)(SL1, SL1, SL1, SL1)
-    #define ALTI_SR1	(vector unsigned int)(SR1, SR1, SR1, SR1)
-    #define ALTI_MSK	(vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
-    #define ALTI_MSK64 \
-	(vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
-    #define ALTI_SL2_PERM \
-	(vector unsigned char)(25,25,25,25,3,25,25,25,7,0,1,2,11,4,5,6)
-    #define ALTI_SL2_PERM64 \
-	(vector unsigned char)(7,25,25,25,25,25,25,25,15,0,1,2,3,4,5,6)
-    #define ALTI_SR2_PERM \
-	(vector unsigned char)(7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14)
-    #define ALTI_SR2_PERM64 \
-	(vector unsigned char)(15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14)
-#else	/* For OTHER OSs(Linux?) */
-    #define ALTI_SL1	{SL1, SL1, SL1, SL1}
-    #define ALTI_SR1	{SR1, SR1, SR1, SR1}
-    #define ALTI_MSK	{MSK1, MSK2, MSK3, MSK4}
-    #define ALTI_MSK64	{MSK2, MSK1, MSK4, MSK3}
-    #define ALTI_SL2_PERM	{25,25,25,25,3,25,25,25,7,0,1,2,11,4,5,6}
-    #define ALTI_SL2_PERM64	{7,25,25,25,25,25,25,25,15,0,1,2,3,4,5,6}
-    #define ALTI_SR2_PERM	{7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14}
-    #define ALTI_SR2_PERM64	{15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14}
-#endif	/* For OSX */
-#define IDSTR	"SFMT-86243:366-6-7-19-1:fdbffbff-bff7ff3f-fd77efff-bf9ff3ff"
+#if defined(__APPLE__) /* For OSX */
+#	define ALTI_SL1 (vector unsigned int)(SL1, SL1, SL1, SL1)
+#	define ALTI_SR1 (vector unsigned int)(SR1, SR1, SR1, SR1)
+#	define ALTI_MSK (vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
+#	define ALTI_MSK64 (vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
+#	define ALTI_SL2_PERM                                                  \
+		(vector unsigned char)(25, 25, 25, 25, 3, 25, 25, 25, 7, 0, 1, \
+		    2, 11, 4, 5, 6)
+#	define ALTI_SL2_PERM64                                                \
+		(vector unsigned char)(7, 25, 25, 25, 25, 25, 25, 25, 15, 0,   \
+		    1, 2, 3, 4, 5, 6)
+#	define ALTI_SR2_PERM                                                  \
+		(vector unsigned char)(7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10,  \
+		    17, 12, 13, 14)
+#	define ALTI_SR2_PERM64                                                \
+		(vector unsigned char)(15, 0, 1, 2, 3, 4, 5, 6, 17, 8, 9, 10,  \
+		    11, 12, 13, 14)
+#else /* For OTHER OSs(Linux?) */
+#	define ALTI_SL1                                                       \
+		{ SL1, SL1, SL1, SL1 }
+#	define ALTI_SR1                                                       \
+		{ SR1, SR1, SR1, SR1 }
+#	define ALTI_MSK                                                       \
+		{ MSK1, MSK2, MSK3, MSK4 }
+#	define ALTI_MSK64                                                     \
+		{ MSK2, MSK1, MSK4, MSK3 }
+#	define ALTI_SL2_PERM                                                  \
+		{ 25, 25, 25, 25, 3, 25, 25, 25, 7, 0, 1, 2, 11, 4, 5, 6 }
+#	define ALTI_SL2_PERM64                                                \
+		{ 7, 25, 25, 25, 25, 25, 25, 25, 15, 0, 1, 2, 3, 4, 5, 6 }
+#	define ALTI_SR2_PERM                                                  \
+		{ 7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10, 17, 12, 13, 14 }
+#	define ALTI_SR2_PERM64                                                \
+		{ 15, 0, 1, 2, 3, 4, 5, 6, 17, 8, 9, 10, 11, 12, 13, 14 }
+#endif /* For OSX */
+#define IDSTR "SFMT-86243:366-6-7-19-1:fdbffbff-bff7ff3f-fd77efff-bf9ff3ff"
 
 #endif /* SFMT_PARAMS86243_H */
diff --git a/test/include/test/SFMT-sse2.h b/test/include/test/SFMT-sse2.h
index 169ad558..83b35b43 100644
--- a/test/include/test/SFMT-sse2.h
+++ b/test/include/test/SFMT-sse2.h
@@ -60,48 +60,49 @@
  * @param mask 128-bit mask
  * @return output
  */
-JEMALLOC_ALWAYS_INLINE __m128i mm_recursion(__m128i *a, __m128i *b,
-				   __m128i c, __m128i d, __m128i mask) {
-    __m128i v, x, y, z;
+JEMALLOC_ALWAYS_INLINE __m128i
+mm_recursion(__m128i *a, __m128i *b, __m128i c, __m128i d, __m128i mask) {
+	__m128i v, x, y, z;
 
-    x = _mm_load_si128(a);
-    y = _mm_srli_epi32(*b, SR1);
-    z = _mm_srli_si128(c, SR2);
-    v = _mm_slli_epi32(d, SL1);
-    z = _mm_xor_si128(z, x);
-    z = _mm_xor_si128(z, v);
-    x = _mm_slli_si128(x, SL2);
-    y = _mm_and_si128(y, mask);
-    z = _mm_xor_si128(z, x);
-    z = _mm_xor_si128(z, y);
-    return z;
+	x = _mm_load_si128(a);
+	y = _mm_srli_epi32(*b, SR1);
+	z = _mm_srli_si128(c, SR2);
+	v = _mm_slli_epi32(d, SL1);
+	z = _mm_xor_si128(z, x);
+	z = _mm_xor_si128(z, v);
+	x = _mm_slli_si128(x, SL2);
+	y = _mm_and_si128(y, mask);
+	z = _mm_xor_si128(z, x);
+	z = _mm_xor_si128(z, y);
+	return z;
 }
 
 /**
  * This function fills the internal state array with pseudorandom
  * integers.
  */
-static inline void gen_rand_all(sfmt_t *ctx) {
-    int i;
-    __m128i r, r1, r2, mask;
-    mask = _mm_set_epi32(MSK4, MSK3, MSK2, MSK1);
+static inline void
+gen_rand_all(sfmt_t *ctx) {
+	int     i;
+	__m128i r, r1, r2, mask;
+	mask = _mm_set_epi32(MSK4, MSK3, MSK2, MSK1);
 
-    r1 = _mm_load_si128(&ctx->sfmt[N - 2].si);
-    r2 = _mm_load_si128(&ctx->sfmt[N - 1].si);
-    for (i = 0; i < N - POS1; i++) {
-	r = mm_recursion(&ctx->sfmt[i].si, &ctx->sfmt[i + POS1].si, r1, r2,
-	  mask);
-	_mm_store_si128(&ctx->sfmt[i].si, r);
-	r1 = r2;
-	r2 = r;
-    }
-    for (; i < N; i++) {
-	r = mm_recursion(&ctx->sfmt[i].si, &ctx->sfmt[i + POS1 - N].si, r1, r2,
-	  mask);
-	_mm_store_si128(&ctx->sfmt[i].si, r);
-	r1 = r2;
-	r2 = r;
-    }
+	r1 = _mm_load_si128(&ctx->sfmt[N - 2].si);
+	r2 = _mm_load_si128(&ctx->sfmt[N - 1].si);
+	for (i = 0; i < N - POS1; i++) {
+		r = mm_recursion(
+		    &ctx->sfmt[i].si, &ctx->sfmt[i + POS1].si, r1, r2, mask);
+		_mm_store_si128(&ctx->sfmt[i].si, r);
+		r1 = r2;
+		r2 = r;
+	}
+	for (; i < N; i++) {
+		r = mm_recursion(&ctx->sfmt[i].si, &ctx->sfmt[i + POS1 - N].si,
+		    r1, r2, mask);
+		_mm_store_si128(&ctx->sfmt[i].si, r);
+		r1 = r2;
+		r2 = r;
+	}
 }
 
 /**
@@ -111,47 +112,48 @@ static inline void gen_rand_all(sfmt_t *ctx) {
  * @param array an 128-bit array to be filled by pseudorandom numbers.
  * @param size number of 128-bit pesudorandom numbers to be generated.
  */
-static inline void gen_rand_array(sfmt_t *ctx, w128_t *array, int size) {
-    int i, j;
-    __m128i r, r1, r2, mask;
-    mask = _mm_set_epi32(MSK4, MSK3, MSK2, MSK1);
+static inline void
+gen_rand_array(sfmt_t *ctx, w128_t *array, int size) {
+	int     i, j;
+	__m128i r, r1, r2, mask;
+	mask = _mm_set_epi32(MSK4, MSK3, MSK2, MSK1);
 
-    r1 = _mm_load_si128(&ctx->sfmt[N - 2].si);
-    r2 = _mm_load_si128(&ctx->sfmt[N - 1].si);
-    for (i = 0; i < N - POS1; i++) {
-	r = mm_recursion(&ctx->sfmt[i].si, &ctx->sfmt[i + POS1].si, r1, r2,
-	  mask);
-	_mm_store_si128(&array[i].si, r);
-	r1 = r2;
-	r2 = r;
-    }
-    for (; i < N; i++) {
-	r = mm_recursion(&ctx->sfmt[i].si, &array[i + POS1 - N].si, r1, r2,
-	  mask);
-	_mm_store_si128(&array[i].si, r);
-	r1 = r2;
-	r2 = r;
-    }
-    /* main loop */
-    for (; i < size - N; i++) {
-	r = mm_recursion(&array[i - N].si, &array[i + POS1 - N].si, r1, r2,
-			 mask);
-	_mm_store_si128(&array[i].si, r);
-	r1 = r2;
-	r2 = r;
-    }
-    for (j = 0; j < 2 * N - size; j++) {
-	r = _mm_load_si128(&array[j + size - N].si);
-	_mm_store_si128(&ctx->sfmt[j].si, r);
-    }
-    for (; i < size; i++) {
-	r = mm_recursion(&array[i - N].si, &array[i + POS1 - N].si, r1, r2,
-			 mask);
-	_mm_store_si128(&array[i].si, r);
-	_mm_store_si128(&ctx->sfmt[j++].si, r);
-	r1 = r2;
-	r2 = r;
-    }
+	r1 = _mm_load_si128(&ctx->sfmt[N - 2].si);
+	r2 = _mm_load_si128(&ctx->sfmt[N - 1].si);
+	for (i = 0; i < N - POS1; i++) {
+		r = mm_recursion(
+		    &ctx->sfmt[i].si, &ctx->sfmt[i + POS1].si, r1, r2, mask);
+		_mm_store_si128(&array[i].si, r);
+		r1 = r2;
+		r2 = r;
+	}
+	for (; i < N; i++) {
+		r = mm_recursion(
+		    &ctx->sfmt[i].si, &array[i + POS1 - N].si, r1, r2, mask);
+		_mm_store_si128(&array[i].si, r);
+		r1 = r2;
+		r2 = r;
+	}
+	/* main loop */
+	for (; i < size - N; i++) {
+		r = mm_recursion(
+		    &array[i - N].si, &array[i + POS1 - N].si, r1, r2, mask);
+		_mm_store_si128(&array[i].si, r);
+		r1 = r2;
+		r2 = r;
+	}
+	for (j = 0; j < 2 * N - size; j++) {
+		r = _mm_load_si128(&array[j + size - N].si);
+		_mm_store_si128(&ctx->sfmt[j].si, r);
+	}
+	for (; i < size; i++) {
+		r = mm_recursion(
+		    &array[i - N].si, &array[i + POS1 - N].si, r1, r2, mask);
+		_mm_store_si128(&array[i].si, r);
+		_mm_store_si128(&ctx->sfmt[j++].si, r);
+		r1 = r2;
+		r2 = r;
+	}
 }
 
 #endif
diff --git a/test/include/test/SFMT.h b/test/include/test/SFMT.h
index 338dd45c..0082c026 100644
--- a/test/include/test/SFMT.h
+++ b/test/include/test/SFMT.h
@@ -68,79 +68,89 @@
 
 typedef struct sfmt_s sfmt_t;
 
-uint32_t gen_rand32(sfmt_t *ctx);
-uint32_t gen_rand32_range(sfmt_t *ctx, uint32_t limit);
-uint64_t gen_rand64(sfmt_t *ctx);
-uint64_t gen_rand64_range(sfmt_t *ctx, uint64_t limit);
-void fill_array32(sfmt_t *ctx, uint32_t *array, int size);
-void fill_array64(sfmt_t *ctx, uint64_t *array, int size);
-sfmt_t *init_gen_rand(uint32_t seed);
-sfmt_t *init_by_array(uint32_t *init_key, int key_length);
-void fini_gen_rand(sfmt_t *ctx);
+uint32_t    gen_rand32(sfmt_t *ctx);
+uint32_t    gen_rand32_range(sfmt_t *ctx, uint32_t limit);
+uint64_t    gen_rand64(sfmt_t *ctx);
+uint64_t    gen_rand64_range(sfmt_t *ctx, uint64_t limit);
+void        fill_array32(sfmt_t *ctx, uint32_t *array, int size);
+void        fill_array64(sfmt_t *ctx, uint64_t *array, int size);
+sfmt_t     *init_gen_rand(uint32_t seed);
+sfmt_t     *init_by_array(uint32_t *init_key, int key_length);
+void        fini_gen_rand(sfmt_t *ctx);
 const char *get_idstring(void);
-int get_min_array_size32(void);
-int get_min_array_size64(void);
+int         get_min_array_size32(void);
+int         get_min_array_size64(void);
 
 /* These real versions are due to Isaku Wada */
 /** generates a random number on [0,1]-real-interval */
-static inline double to_real1(uint32_t v) {
-    return v * (1.0/4294967295.0);
-    /* divided by 2^32-1 */
+static inline double
+to_real1(uint32_t v) {
+	return v * (1.0 / 4294967295.0);
+	/* divided by 2^32-1 */
 }
 
 /** generates a random number on [0,1]-real-interval */
-static inline double genrand_real1(sfmt_t *ctx) {
-    return to_real1(gen_rand32(ctx));
+static inline double
+genrand_real1(sfmt_t *ctx) {
+	return to_real1(gen_rand32(ctx));
 }
 
 /** generates a random number on [0,1)-real-interval */
-static inline double to_real2(uint32_t v) {
-    return v * (1.0/4294967296.0);
-    /* divided by 2^32 */
+static inline double
+to_real2(uint32_t v) {
+	return v * (1.0 / 4294967296.0);
+	/* divided by 2^32 */
 }
 
 /** generates a random number on [0,1)-real-interval */
-static inline double genrand_real2(sfmt_t *ctx) {
-    return to_real2(gen_rand32(ctx));
+static inline double
+genrand_real2(sfmt_t *ctx) {
+	return to_real2(gen_rand32(ctx));
 }
 
 /** generates a random number on (0,1)-real-interval */
-static inline double to_real3(uint32_t v) {
-    return (((double)v) + 0.5)*(1.0/4294967296.0);
-    /* divided by 2^32 */
+static inline double
+to_real3(uint32_t v) {
+	return (((double)v) + 0.5) * (1.0 / 4294967296.0);
+	/* divided by 2^32 */
 }
 
 /** generates a random number on (0,1)-real-interval */
-static inline double genrand_real3(sfmt_t *ctx) {
-    return to_real3(gen_rand32(ctx));
+static inline double
+genrand_real3(sfmt_t *ctx) {
+	return to_real3(gen_rand32(ctx));
 }
 /** These real versions are due to Isaku Wada */
 
 /** generates a random number on [0,1) with 53-bit resolution*/
-static inline double to_res53(uint64_t v) {
-    return v * (1.0/18446744073709551616.0L);
+static inline double
+to_res53(uint64_t v) {
+	return v * (1.0 / 18446744073709551616.0L);
 }
 
 /** generates a random number on [0,1) with 53-bit resolution from two
  * 32 bit integers */
-static inline double to_res53_mix(uint32_t x, uint32_t y) {
-    return to_res53(x | ((uint64_t)y << 32));
+static inline double
+to_res53_mix(uint32_t x, uint32_t y) {
+	return to_res53(x | ((uint64_t)y << 32));
 }
 
 /** generates a random number on [0,1) with 53-bit resolution
  */
-static inline double genrand_res53(sfmt_t *ctx) {
-    return to_res53(gen_rand64(ctx));
+static inline double
+genrand_res53(sfmt_t *ctx) {
+	return to_res53(gen_rand64(ctx));
 }
 
 /** generates a random number on [0,1) with 53-bit resolution
     using 32bit integer.
  */
-static inline double genrand_res53_mix(sfmt_t *ctx) {
-    uint32_t x, y;
+static inline double
+genrand_res53_mix(sfmt_t *ctx) {
+	uint32_t x, y;
 
-    x = gen_rand32(ctx);
-    y = gen_rand32(ctx);
-    return to_res53_mix(x, y);
+	x = gen_rand32(ctx);
+	y = gen_rand32(ctx);
+	return to_res53_mix(x, y);
 }
 #endif
diff --git a/test/include/test/arena_util.h b/test/include/test/arena_util.h
index 535c1aa1..431fdfae 100644
--- a/test/include/test/arena_util.h
+++ b/test/include/test/arena_util.h
@@ -1,25 +1,25 @@
 static inline unsigned
 do_arena_create(ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
 	unsigned arena_ind;
-	size_t sz = sizeof(unsigned);
+	size_t   sz = sizeof(unsigned);
 	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
 	    0, "Unexpected mallctl() failure");
 	size_t mib[3];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
+	size_t miblen = sizeof(mib) / sizeof(size_t);
 
-	expect_d_eq(mallctlnametomib("arena.0.dirty_decay_ms", mib, &miblen),
-	    0, "Unexpected mallctlnametomib() failure");
+	expect_d_eq(mallctlnametomib("arena.0.dirty_decay_ms", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
 	mib[1] = (size_t)arena_ind;
 	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL,
-	    (void *)&dirty_decay_ms, sizeof(dirty_decay_ms)), 0,
-	    "Unexpected mallctlbymib() failure");
+	                (void *)&dirty_decay_ms, sizeof(dirty_decay_ms)),
+	    0, "Unexpected mallctlbymib() failure");
 
-	expect_d_eq(mallctlnametomib("arena.0.muzzy_decay_ms", mib, &miblen),
-	    0, "Unexpected mallctlnametomib() failure");
+	expect_d_eq(mallctlnametomib("arena.0.muzzy_decay_ms", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
 	mib[1] = (size_t)arena_ind;
 	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL,
-	    (void *)&muzzy_decay_ms, sizeof(muzzy_decay_ms)), 0,
-	    "Unexpected mallctlbymib() failure");
+	                (void *)&muzzy_decay_ms, sizeof(muzzy_decay_ms)),
+	    0, "Unexpected mallctlbymib() failure");
 
 	return arena_ind;
 }
@@ -33,7 +33,7 @@ do_arena_destroy(unsigned arena_ind) {
 	mallctl("thread.tcache.flush", NULL, NULL, NULL, 0);
 
 	size_t mib[3];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
+	size_t miblen = sizeof(mib) / sizeof(size_t);
 	expect_d_eq(mallctlnametomib("arena.0.destroy", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[1] = (size_t)arena_ind;
@@ -51,7 +51,7 @@ do_epoch(void) {
 static inline void
 do_purge(unsigned arena_ind) {
 	size_t mib[3];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
+	size_t miblen = sizeof(mib) / sizeof(size_t);
 	expect_d_eq(mallctlnametomib("arena.0.purge", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[1] = (size_t)arena_ind;
@@ -62,7 +62,7 @@ do_purge(unsigned arena_ind) {
 static inline void
 do_decay(unsigned arena_ind) {
 	size_t mib[3];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
+	size_t miblen = sizeof(mib) / sizeof(size_t);
 	expect_d_eq(mallctlnametomib("arena.0.decay", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[1] = (size_t)arena_ind;
@@ -73,12 +73,12 @@ do_decay(unsigned arena_ind) {
 static inline uint64_t
 get_arena_npurge_impl(const char *mibname, unsigned arena_ind) {
 	size_t mib[4];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
+	size_t miblen = sizeof(mib) / sizeof(size_t);
 	expect_d_eq(mallctlnametomib(mibname, mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[2] = (size_t)arena_ind;
 	uint64_t npurge = 0;
-	size_t sz = sizeof(npurge);
+	size_t   sz = sizeof(npurge);
 	expect_d_eq(mallctlbymib(mib, miblen, (void *)&npurge, &sz, NULL, 0),
 	    config_stats ? 0 : ENOENT, "Unexpected mallctlbymib() failure");
 	return npurge;
@@ -105,15 +105,15 @@ get_arena_muzzy_npurge(unsigned arena_ind) {
 static inline uint64_t
 get_arena_npurge(unsigned arena_ind) {
 	do_epoch();
-	return get_arena_npurge_impl("stats.arenas.0.dirty_npurge", arena_ind) +
-	    get_arena_npurge_impl("stats.arenas.0.muzzy_npurge", arena_ind);
+	return get_arena_npurge_impl("stats.arenas.0.dirty_npurge", arena_ind)
+	    + get_arena_npurge_impl("stats.arenas.0.muzzy_npurge", arena_ind);
 }
 
 static inline size_t
 get_arena_pdirty(unsigned arena_ind) {
 	do_epoch();
 	size_t mib[4];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
+	size_t miblen = sizeof(mib) / sizeof(size_t);
 	expect_d_eq(mallctlnametomib("stats.arenas.0.pdirty", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[2] = (size_t)arena_ind;
@@ -128,7 +128,7 @@ static inline size_t
 get_arena_pmuzzy(unsigned arena_ind) {
 	do_epoch();
 	size_t mib[4];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
+	size_t miblen = sizeof(mib) / sizeof(size_t);
 	expect_d_eq(mallctlnametomib("stats.arenas.0.pmuzzy", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[2] = (size_t)arena_ind;
@@ -148,8 +148,7 @@ do_mallocx(size_t size, int flags) {
 
 static inline void
 generate_dirty(unsigned arena_ind, size_t size) {
-	int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
+	int   flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
 	void *p = do_mallocx(size, flags);
 	dallocx(p, flags);
 }
-
diff --git a/test/include/test/bench.h b/test/include/test/bench.h
index e2a9fc09..faebfd77 100644
--- a/test/include/test/bench.h
+++ b/test/include/test/bench.h
@@ -1,6 +1,6 @@
 static inline void
-time_func(timedelta_t *timer, uint64_t nwarmup, uint64_t niter,
-    void (*func)(void)) {
+time_func(
+    timedelta_t *timer, uint64_t nwarmup, uint64_t niter, void (*func)(void)) {
 	uint64_t i;
 
 	for (i = 0; i < nwarmup; i++) {
@@ -23,16 +23,16 @@ fmt_nsecs(uint64_t usec, uint64_t iters, char *buf) {
 	uint64_t nsecs_per_iter1000 = nsec1000 / iters;
 	uint64_t intpart = nsecs_per_iter1000 / 1000;
 	uint64_t fracpart = nsecs_per_iter1000 % 1000;
-	malloc_snprintf(buf, FMT_NSECS_BUF_SIZE, "%" FMTu64 ".%03" FMTu64, intpart,
-	    fracpart);
+	malloc_snprintf(buf, FMT_NSECS_BUF_SIZE, "%" FMTu64 ".%03" FMTu64,
+	    intpart, fracpart);
 }
 
 static inline void
 compare_funcs(uint64_t nwarmup, uint64_t niter, const char *name_a,
-    void (*func_a), const char *name_b, void (*func_b)) {
+    void(*func_a), const char *name_b, void(*func_b)) {
 	timedelta_t timer_a, timer_b;
-	char ratio_buf[6];
-	void *p;
+	char        ratio_buf[6];
+	void       *p;
 
 	p = mallocx(1, 0);
 	if (p == NULL) {
@@ -44,16 +44,18 @@ compare_funcs(uint64_t nwarmup, uint64_t niter, const char *name_a,
 	time_func(&timer_b, nwarmup, niter, (void (*)(void))func_b);
 
 	uint64_t usec_a = timer_usec(&timer_a);
-	char buf_a[FMT_NSECS_BUF_SIZE];
+	char     buf_a[FMT_NSECS_BUF_SIZE];
 	fmt_nsecs(usec_a, niter, buf_a);
 
 	uint64_t usec_b = timer_usec(&timer_b);
-	char buf_b[FMT_NSECS_BUF_SIZE];
+	char     buf_b[FMT_NSECS_BUF_SIZE];
 	fmt_nsecs(usec_b, niter, buf_b);
 
 	timer_ratio(&timer_a, &timer_b, ratio_buf, sizeof(ratio_buf));
-	malloc_printf("%" FMTu64 " iterations, %s=%" FMTu64 "us (%s ns/iter), "
-	    "%s=%" FMTu64 "us (%s ns/iter), time consumption ratio=%s:1\n",
+	malloc_printf("%" FMTu64 " iterations, %s=%" FMTu64
+	              "us (%s ns/iter), "
+	              "%s=%" FMTu64
+	              "us (%s ns/iter), time consumption ratio=%s:1\n",
 	    niter, name_a, usec_a, buf_a, name_b, usec_b, buf_b, ratio_buf);
 
 	dallocx(p, 0);
@@ -62,10 +64,10 @@ compare_funcs(uint64_t nwarmup, uint64_t niter, const char *name_a,
 static inline void *
 no_opt_ptr(void *ptr) {
 #ifdef JEMALLOC_HAVE_ASM_VOLATILE
-  asm volatile("" : "+r"(ptr));
+	asm volatile("" : "+r"(ptr));
 #else
-  void *volatile dup = ptr;
-  ptr = dup;
+	void *volatile dup = ptr;
+	ptr = dup;
 #endif
-  return ptr;
+	return ptr;
 }
diff --git a/test/include/test/bgthd.h b/test/include/test/bgthd.h
index 4fa2395e..0a7e789b 100644
--- a/test/include/test/bgthd.h
+++ b/test/include/test/bgthd.h
@@ -5,9 +5,9 @@
 
 static inline bool
 is_background_thread_enabled(void) {
-	bool enabled;
+	bool   enabled;
 	size_t sz = sizeof(bool);
-	int ret = mallctl("background_thread", (void *)&enabled, &sz, NULL,0);
+	int ret = mallctl("background_thread", (void *)&enabled, &sz, NULL, 0);
 	if (ret == ENOENT) {
 		return false;
 	}
diff --git a/test/include/test/btalloc.h b/test/include/test/btalloc.h
index 8f345993..04a336d5 100644
--- a/test/include/test/btalloc.h
+++ b/test/include/test/btalloc.h
@@ -1,30 +1,28 @@
 /* btalloc() provides a mechanism for allocating via permuted backtraces. */
-void	*btalloc(size_t size, unsigned bits);
+void *btalloc(size_t size, unsigned bits);
 
-#define btalloc_n_proto(n)						\
-void	*btalloc_##n(size_t size, unsigned bits);
-btalloc_n_proto(0)
-btalloc_n_proto(1)
+#define btalloc_n_proto(n) void *btalloc_##n(size_t size, unsigned bits);
+btalloc_n_proto(0) btalloc_n_proto(1)
 
-#define btalloc_n_gen(n)						\
-void *									\
-btalloc_##n(size_t size, unsigned bits) {				\
-	void *p;							\
-									\
-	if (bits == 0) {						\
-		p = mallocx(size, 0);					\
-	} else {							\
-		switch (bits & 0x1U) {					\
-		case 0:							\
-			p = (btalloc_0(size, bits >> 1));		\
-			break;						\
-		case 1:							\
-			p = (btalloc_1(size, bits >> 1));		\
-			break;						\
-		default: not_reached();					\
-		}							\
-	}								\
-	/* Intentionally sabotage tail call optimization. */		\
-	expect_ptr_not_null(p, "Unexpected mallocx() failure");		\
-	return p;							\
-}
+#define btalloc_n_gen(n)                                                       \
+	void *btalloc_##n(size_t size, unsigned bits) {                        \
+		void *p;                                                       \
+                                                                               \
+		if (bits == 0) {                                               \
+			p = mallocx(size, 0);                                  \
+		} else {                                                       \
+			switch (bits & 0x1U) {                                 \
+			case 0:                                                \
+				p = (btalloc_0(size, bits >> 1));              \
+				break;                                         \
+			case 1:                                                \
+				p = (btalloc_1(size, bits >> 1));              \
+				break;                                         \
+			default:                                               \
+				not_reached();                                 \
+			}                                                      \
+		}                                                              \
+		/* Intentionally sabotage tail call optimization. */           \
+		expect_ptr_not_null(p, "Unexpected mallocx() failure");        \
+		return p;                                                      \
+	}
diff --git a/test/include/test/extent_hooks.h b/test/include/test/extent_hooks.h
index aad0a46c..33bb8593 100644
--- a/test/include/test/extent_hooks.h
+++ b/test/include/test/extent_hooks.h
@@ -3,40 +3,33 @@
  * passthrough.
  */
 
-static void	*extent_alloc_hook(extent_hooks_t *extent_hooks, void *new_addr,
+static void *extent_alloc_hook(extent_hooks_t *extent_hooks, void *new_addr,
     size_t size, size_t alignment, bool *zero, bool *commit,
     unsigned arena_ind);
-static bool	extent_dalloc_hook(extent_hooks_t *extent_hooks, void *addr,
-    size_t size, bool committed, unsigned arena_ind);
-static void	extent_destroy_hook(extent_hooks_t *extent_hooks, void *addr,
-    size_t size, bool committed, unsigned arena_ind);
-static bool	extent_commit_hook(extent_hooks_t *extent_hooks, void *addr,
-    size_t size, size_t offset, size_t length, unsigned arena_ind);
-static bool	extent_decommit_hook(extent_hooks_t *extent_hooks, void *addr,
-    size_t size, size_t offset, size_t length, unsigned arena_ind);
-static bool	extent_purge_lazy_hook(extent_hooks_t *extent_hooks, void *addr,
-    size_t size, size_t offset, size_t length, unsigned arena_ind);
-static bool	extent_purge_forced_hook(extent_hooks_t *extent_hooks,
-    void *addr, size_t size, size_t offset, size_t length, unsigned arena_ind);
-static bool	extent_split_hook(extent_hooks_t *extent_hooks, void *addr,
-    size_t size, size_t size_a, size_t size_b, bool committed,
-    unsigned arena_ind);
-static bool	extent_merge_hook(extent_hooks_t *extent_hooks, void *addr_a,
-    size_t size_a, void *addr_b, size_t size_b, bool committed,
-    unsigned arena_ind);
+static bool  extent_dalloc_hook(extent_hooks_t *extent_hooks, void *addr,
+     size_t size, bool committed, unsigned arena_ind);
+static void  extent_destroy_hook(extent_hooks_t *extent_hooks, void *addr,
+     size_t size, bool committed, unsigned arena_ind);
+static bool  extent_commit_hook(extent_hooks_t *extent_hooks, void *addr,
+     size_t size, size_t offset, size_t length, unsigned arena_ind);
+static bool  extent_decommit_hook(extent_hooks_t *extent_hooks, void *addr,
+     size_t size, size_t offset, size_t length, unsigned arena_ind);
+static bool  extent_purge_lazy_hook(extent_hooks_t *extent_hooks, void *addr,
+     size_t size, size_t offset, size_t length, unsigned arena_ind);
+static bool  extent_purge_forced_hook(extent_hooks_t *extent_hooks, void *addr,
+     size_t size, size_t offset, size_t length, unsigned arena_ind);
+static bool  extent_split_hook(extent_hooks_t *extent_hooks, void *addr,
+     size_t size, size_t size_a, size_t size_b, bool committed,
+     unsigned arena_ind);
+static bool  extent_merge_hook(extent_hooks_t *extent_hooks, void *addr_a,
+     size_t size_a, void *addr_b, size_t size_b, bool committed,
+     unsigned arena_ind);
 
 static extent_hooks_t *default_hooks;
-static extent_hooks_t hooks = {
-	extent_alloc_hook,
-	extent_dalloc_hook,
-	extent_destroy_hook,
-	extent_commit_hook,
-	extent_decommit_hook,
-	extent_purge_lazy_hook,
-	extent_purge_forced_hook,
-	extent_split_hook,
-	extent_merge_hook
-};
+static extent_hooks_t  hooks = {extent_alloc_hook, extent_dalloc_hook,
+     extent_destroy_hook, extent_commit_hook, extent_decommit_hook,
+     extent_purge_lazy_hook, extent_purge_forced_hook, extent_split_hook,
+     extent_merge_hook};
 
 /* Control whether hook functions pass calls through to default hooks. */
 static bool try_alloc = true;
@@ -72,9 +65,9 @@ static bool did_split;
 static bool did_merge;
 
 #if 0
-#  define TRACE_HOOK(fmt, ...) malloc_printf(fmt, __VA_ARGS__)
+#	define TRACE_HOOK(fmt, ...) malloc_printf(fmt, __VA_ARGS__)
 #else
-#  define TRACE_HOOK(fmt, ...)
+#	define TRACE_HOOK(fmt, ...)
 #endif
 
 static void *
@@ -82,20 +75,21 @@ extent_alloc_hook(extent_hooks_t *extent_hooks, void *new_addr, size_t size,
     size_t alignment, bool *zero, bool *commit, unsigned arena_ind) {
 	void *ret;
 
-	TRACE_HOOK("%s(extent_hooks=%p, new_addr=%p, size=%zu, alignment=%zu, "
-	    "*zero=%s, *commit=%s, arena_ind=%u)\n", __func__, extent_hooks,
-	    new_addr, size, alignment, *zero ?  "true" : "false", *commit ?
-	    "true" : "false", arena_ind);
+	TRACE_HOOK(
+	    "%s(extent_hooks=%p, new_addr=%p, size=%zu, alignment=%zu, "
+	    "*zero=%s, *commit=%s, arena_ind=%u)\n",
+	    __func__, extent_hooks, new_addr, size, alignment,
+	    *zero ? "true" : "false", *commit ? "true" : "false", arena_ind);
 	expect_ptr_eq(extent_hooks, &hooks,
 	    "extent_hooks should be same as pointer used to set hooks");
-	expect_ptr_eq(extent_hooks->alloc, extent_alloc_hook,
-	    "Wrong hook function");
+	expect_ptr_eq(
+	    extent_hooks->alloc, extent_alloc_hook, "Wrong hook function");
 	called_alloc = true;
 	if (!try_alloc) {
 		return NULL;
 	}
-	ret = default_hooks->alloc(default_hooks, new_addr, size, alignment,
-	    zero, commit, 0);
+	ret = default_hooks->alloc(
+	    default_hooks, new_addr, size, alignment, zero, commit, 0);
 	did_alloc = (ret != NULL);
 	return ret;
 }
@@ -105,13 +99,15 @@ extent_dalloc_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
     bool committed, unsigned arena_ind) {
 	bool err;
 
-	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, committed=%s, "
-	    "arena_ind=%u)\n", __func__, extent_hooks, addr, size, committed ?
-	    "true" : "false", arena_ind);
+	TRACE_HOOK(
+	    "%s(extent_hooks=%p, addr=%p, size=%zu, committed=%s, "
+	    "arena_ind=%u)\n",
+	    __func__, extent_hooks, addr, size, committed ? "true" : "false",
+	    arena_ind);
 	expect_ptr_eq(extent_hooks, &hooks,
 	    "extent_hooks should be same as pointer used to set hooks");
-	expect_ptr_eq(extent_hooks->dalloc, extent_dalloc_hook,
-	    "Wrong hook function");
+	expect_ptr_eq(
+	    extent_hooks->dalloc, extent_dalloc_hook, "Wrong hook function");
 	called_dalloc = true;
 	if (!try_dalloc) {
 		return true;
@@ -124,13 +120,15 @@ extent_dalloc_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
 static void
 extent_destroy_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
     bool committed, unsigned arena_ind) {
-	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, committed=%s, "
-	    "arena_ind=%u)\n", __func__, extent_hooks, addr, size, committed ?
-	    "true" : "false", arena_ind);
+	TRACE_HOOK(
+	    "%s(extent_hooks=%p, addr=%p, size=%zu, committed=%s, "
+	    "arena_ind=%u)\n",
+	    __func__, extent_hooks, addr, size, committed ? "true" : "false",
+	    arena_ind);
 	expect_ptr_eq(extent_hooks, &hooks,
 	    "extent_hooks should be same as pointer used to set hooks");
-	expect_ptr_eq(extent_hooks->destroy, extent_destroy_hook,
-	    "Wrong hook function");
+	expect_ptr_eq(
+	    extent_hooks->destroy, extent_destroy_hook, "Wrong hook function");
 	called_destroy = true;
 	if (!try_destroy) {
 		return;
@@ -144,19 +142,20 @@ extent_commit_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
     size_t offset, size_t length, unsigned arena_ind) {
 	bool err;
 
-	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
-	    "length=%zu, arena_ind=%u)\n", __func__, extent_hooks, addr, size,
-	    offset, length, arena_ind);
+	TRACE_HOOK(
+	    "%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
+	    "length=%zu, arena_ind=%u)\n",
+	    __func__, extent_hooks, addr, size, offset, length, arena_ind);
 	expect_ptr_eq(extent_hooks, &hooks,
 	    "extent_hooks should be same as pointer used to set hooks");
-	expect_ptr_eq(extent_hooks->commit, extent_commit_hook,
-	    "Wrong hook function");
+	expect_ptr_eq(
+	    extent_hooks->commit, extent_commit_hook, "Wrong hook function");
 	called_commit = true;
 	if (!try_commit) {
 		return true;
 	}
-	err = default_hooks->commit(default_hooks, addr, size, offset, length,
-	    0);
+	err = default_hooks->commit(
+	    default_hooks, addr, size, offset, length, 0);
 	did_commit = !err;
 	return err;
 }
@@ -166,9 +165,10 @@ extent_decommit_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
     size_t offset, size_t length, unsigned arena_ind) {
 	bool err;
 
-	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
-	    "length=%zu, arena_ind=%u)\n", __func__, extent_hooks, addr, size,
-	    offset, length, arena_ind);
+	TRACE_HOOK(
+	    "%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
+	    "length=%zu, arena_ind=%u)\n",
+	    __func__, extent_hooks, addr, size, offset, length, arena_ind);
 	expect_ptr_eq(extent_hooks, &hooks,
 	    "extent_hooks should be same as pointer used to set hooks");
 	expect_ptr_eq(extent_hooks->decommit, extent_decommit_hook,
@@ -177,8 +177,8 @@ extent_decommit_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	if (!try_decommit) {
 		return true;
 	}
-	err = default_hooks->decommit(default_hooks, addr, size, offset, length,
-	    0);
+	err = default_hooks->decommit(
+	    default_hooks, addr, size, offset, length, 0);
 	did_decommit = !err;
 	return err;
 }
@@ -188,9 +188,10 @@ extent_purge_lazy_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
     size_t offset, size_t length, unsigned arena_ind) {
 	bool err;
 
-	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
-	    "length=%zu arena_ind=%u)\n", __func__, extent_hooks, addr, size,
-	    offset, length, arena_ind);
+	TRACE_HOOK(
+	    "%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
+	    "length=%zu arena_ind=%u)\n",
+	    __func__, extent_hooks, addr, size, offset, length, arena_ind);
 	expect_ptr_eq(extent_hooks, &hooks,
 	    "extent_hooks should be same as pointer used to set hooks");
 	expect_ptr_eq(extent_hooks->purge_lazy, extent_purge_lazy_hook,
@@ -199,9 +200,9 @@ extent_purge_lazy_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	if (!try_purge_lazy) {
 		return true;
 	}
-	err = default_hooks->purge_lazy == NULL ||
-	    default_hooks->purge_lazy(default_hooks, addr, size, offset, length,
-	    0);
+	err = default_hooks->purge_lazy == NULL
+	    || default_hooks->purge_lazy(
+	        default_hooks, addr, size, offset, length, 0);
 	did_purge_lazy = !err;
 	return err;
 }
@@ -211,9 +212,10 @@ extent_purge_forced_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
     size_t offset, size_t length, unsigned arena_ind) {
 	bool err;
 
-	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
-	    "length=%zu arena_ind=%u)\n", __func__, extent_hooks, addr, size,
-	    offset, length, arena_ind);
+	TRACE_HOOK(
+	    "%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
+	    "length=%zu arena_ind=%u)\n",
+	    __func__, extent_hooks, addr, size, offset, length, arena_ind);
 	expect_ptr_eq(extent_hooks, &hooks,
 	    "extent_hooks should be same as pointer used to set hooks");
 	expect_ptr_eq(extent_hooks->purge_forced, extent_purge_forced_hook,
@@ -222,9 +224,9 @@ extent_purge_forced_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	if (!try_purge_forced) {
 		return true;
 	}
-	err = default_hooks->purge_forced == NULL ||
-	    default_hooks->purge_forced(default_hooks, addr, size, offset,
-	    length, 0);
+	err = default_hooks->purge_forced == NULL
+	    || default_hooks->purge_forced(
+	        default_hooks, addr, size, offset, length, 0);
 	did_purge_forced = !err;
 	return err;
 }
@@ -234,21 +236,22 @@ extent_split_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
     size_t size_a, size_t size_b, bool committed, unsigned arena_ind) {
 	bool err;
 
-	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, size_a=%zu, "
-	    "size_b=%zu, committed=%s, arena_ind=%u)\n", __func__, extent_hooks,
-	    addr, size, size_a, size_b, committed ? "true" : "false",
-	    arena_ind);
+	TRACE_HOOK(
+	    "%s(extent_hooks=%p, addr=%p, size=%zu, size_a=%zu, "
+	    "size_b=%zu, committed=%s, arena_ind=%u)\n",
+	    __func__, extent_hooks, addr, size, size_a, size_b,
+	    committed ? "true" : "false", arena_ind);
 	expect_ptr_eq(extent_hooks, &hooks,
 	    "extent_hooks should be same as pointer used to set hooks");
-	expect_ptr_eq(extent_hooks->split, extent_split_hook,
-	    "Wrong hook function");
+	expect_ptr_eq(
+	    extent_hooks->split, extent_split_hook, "Wrong hook function");
 	called_split = true;
 	if (!try_split) {
 		return true;
 	}
-	err = (default_hooks->split == NULL ||
-	    default_hooks->split(default_hooks, addr, size, size_a, size_b,
-	    committed, 0));
+	err = (default_hooks->split == NULL
+	    || default_hooks->split(
+	        default_hooks, addr, size, size_a, size_b, committed, 0));
 	did_split = !err;
 	return err;
 }
@@ -258,23 +261,24 @@ extent_merge_hook(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
     void *addr_b, size_t size_b, bool committed, unsigned arena_ind) {
 	bool err;
 
-	TRACE_HOOK("%s(extent_hooks=%p, addr_a=%p, size_a=%zu, addr_b=%p "
-	    "size_b=%zu, committed=%s, arena_ind=%u)\n", __func__, extent_hooks,
-	    addr_a, size_a, addr_b, size_b, committed ? "true" : "false",
-	    arena_ind);
+	TRACE_HOOK(
+	    "%s(extent_hooks=%p, addr_a=%p, size_a=%zu, addr_b=%p "
+	    "size_b=%zu, committed=%s, arena_ind=%u)\n",
+	    __func__, extent_hooks, addr_a, size_a, addr_b, size_b,
+	    committed ? "true" : "false", arena_ind);
 	expect_ptr_eq(extent_hooks, &hooks,
 	    "extent_hooks should be same as pointer used to set hooks");
-	expect_ptr_eq(extent_hooks->merge, extent_merge_hook,
-	    "Wrong hook function");
+	expect_ptr_eq(
+	    extent_hooks->merge, extent_merge_hook, "Wrong hook function");
 	expect_ptr_eq((void *)((uintptr_t)addr_a + size_a), addr_b,
 	    "Extents not mergeable");
 	called_merge = true;
 	if (!try_merge) {
 		return true;
 	}
-	err = (default_hooks->merge == NULL ||
-	    default_hooks->merge(default_hooks, addr_a, size_a, addr_b, size_b,
-	    committed, 0));
+	err = (default_hooks->merge == NULL
+	    || default_hooks->merge(
+	        default_hooks, addr_a, size_a, addr_b, size_b, committed, 0));
 	did_merge = !err;
 	return err;
 }
@@ -285,5 +289,6 @@ extent_hooks_prep(void) {
 
 	sz = sizeof(default_hooks);
 	expect_d_eq(mallctl("arena.0.extent_hooks", (void *)&default_hooks, &sz,
-	    NULL, 0), 0, "Unexpected mallctl() error");
+	                NULL, 0),
+	    0, "Unexpected mallctl() error");
 }
diff --git a/test/include/test/fork.h b/test/include/test/fork.h
index ac9b1858..9e04d279 100644
--- a/test/include/test/fork.h
+++ b/test/include/test/fork.h
@@ -3,7 +3,7 @@
 
 #ifndef _WIN32
 
-#include <sys/wait.h>
+#	include <sys/wait.h>
 
 static inline void
 fork_wait_for_child_exit(int pid) {
@@ -13,8 +13,10 @@ fork_wait_for_child_exit(int pid) {
 			test_fail("Unexpected waitpid() failure.");
 		}
 		if (WIFSIGNALED(status)) {
-			test_fail("Unexpected child termination due to "
-			    "signal %d", WTERMSIG(status));
+			test_fail(
+			    "Unexpected child termination due to "
+			    "signal %d",
+			    WTERMSIG(status));
 			break;
 		}
 		if (WIFEXITED(status)) {
diff --git a/test/include/test/math.h b/test/include/test/math.h
index efba086d..c9b32e91 100644
--- a/test/include/test/math.h
+++ b/test/include/test/math.h
@@ -27,9 +27,12 @@ ln_gamma(double x) {
 
 	z = 1.0 / (x * x);
 
-	return f + (x-0.5) * log(x) - x + 0.918938533204673 +
-	    (((-0.000595238095238 * z + 0.000793650793651) * z -
-	    0.002777777777778) * z + 0.083333333333333) / x;
+	return f + (x - 0.5) * log(x) - x + 0.918938533204673
+	    + (((-0.000595238095238 * z + 0.000793650793651) * z
+	           - 0.002777777777778)
+	              * z
+	          + 0.083333333333333)
+	    / x;
 }
 
 /*
@@ -43,8 +46,8 @@ ln_gamma(double x) {
  */
 static inline double
 i_gamma(double x, double p, double ln_gamma_p) {
-	double acu, factor, oflo, gin, term, rn, a, b, an, dif;
-	double pn[6];
+	double   acu, factor, oflo, gin, term, rn, a, b, an, dif;
+	double   pn[6];
 	unsigned i;
 
 	assert(p > 0.0);
@@ -91,7 +94,7 @@ i_gamma(double x, double p, double ln_gamma_p) {
 			term += 1.0;
 			an = a * term;
 			for (i = 0; i < 2; i++) {
-				pn[i+4] = b * pn[i+2] - an * pn[i];
+				pn[i + 4] = b * pn[i + 2] - an * pn[i];
 			}
 			if (pn[5] != 0.0) {
 				rn = pn[4] / pn[5];
@@ -103,7 +106,7 @@ i_gamma(double x, double p, double ln_gamma_p) {
 				gin = rn;
 			}
 			for (i = 0; i < 4; i++) {
-				pn[i] = pn[i+2];
+				pn[i] = pn[i + 2];
 			}
 
 			if (fabs(pn[4]) >= oflo) {
@@ -135,16 +138,35 @@ pt_norm(double p) {
 	if (fabs(q) <= 0.425) {
 		/* p close to 1/2. */
 		r = 0.180625 - q * q;
-		return q * (((((((2.5090809287301226727e3 * r +
-		    3.3430575583588128105e4) * r + 6.7265770927008700853e4) * r
-		    + 4.5921953931549871457e4) * r + 1.3731693765509461125e4) *
-		    r + 1.9715909503065514427e3) * r + 1.3314166789178437745e2)
-		    * r + 3.3871328727963666080e0) /
-		    (((((((5.2264952788528545610e3 * r +
-		    2.8729085735721942674e4) * r + 3.9307895800092710610e4) * r
-		    + 2.1213794301586595867e4) * r + 5.3941960214247511077e3) *
-		    r + 6.8718700749205790830e2) * r + 4.2313330701600911252e1)
-		    * r + 1.0);
+		return q
+		    * (((((((2.5090809287301226727e3 * r
+		                + 3.3430575583588128105e4)
+		                   * r
+		               + 6.7265770927008700853e4)
+		                  * r
+		              + 4.5921953931549871457e4)
+		                 * r
+		             + 1.3731693765509461125e4)
+		                * r
+		            + 1.9715909503065514427e3)
+		               * r
+		           + 1.3314166789178437745e2)
+		            * r
+		        + 3.3871328727963666080e0)
+		    / (((((((5.2264952788528545610e3 * r
+		                + 2.8729085735721942674e4)
+		                   * r
+		               + 3.9307895800092710610e4)
+		                  * r
+		              + 2.1213794301586595867e4)
+		                 * r
+		             + 5.3941960214247511077e3)
+		                * r
+		            + 6.8718700749205790830e2)
+		               * r
+		           + 4.2313330701600911252e1)
+		            * r
+		        + 1.0);
 	} else {
 		if (q < 0.0) {
 			r = p;
@@ -157,40 +179,65 @@ pt_norm(double p) {
 		if (r <= 5.0) {
 			/* p neither close to 1/2 nor 0 or 1. */
 			r -= 1.6;
-			ret = ((((((((7.74545014278341407640e-4 * r +
-			    2.27238449892691845833e-2) * r +
-			    2.41780725177450611770e-1) * r +
-			    1.27045825245236838258e0) * r +
-			    3.64784832476320460504e0) * r +
-			    5.76949722146069140550e0) * r +
-			    4.63033784615654529590e0) * r +
-			    1.42343711074968357734e0) /
-			    (((((((1.05075007164441684324e-9 * r +
-			    5.47593808499534494600e-4) * r +
-			    1.51986665636164571966e-2)
-			    * r + 1.48103976427480074590e-1) * r +
-			    6.89767334985100004550e-1) * r +
-			    1.67638483018380384940e0) * r +
-			    2.05319162663775882187e0) * r + 1.0));
+			ret = ((((((((7.74545014278341407640e-4 * r
+			                 + 2.27238449892691845833e-2)
+			                    * r
+			                + 2.41780725177450611770e-1)
+			                   * r
+			               + 1.27045825245236838258e0)
+			                  * r
+			              + 3.64784832476320460504e0)
+			                 * r
+			             + 5.76949722146069140550e0)
+			                * r
+			            + 4.63033784615654529590e0)
+			               * r
+			           + 1.42343711074968357734e0)
+			    / (((((((1.05075007164441684324e-9 * r
+			                + 5.47593808499534494600e-4)
+			                   * r
+			               + 1.51986665636164571966e-2)
+			                  * r
+			              + 1.48103976427480074590e-1)
+			                 * r
+			             + 6.89767334985100004550e-1)
+			                * r
+			            + 1.67638483018380384940e0)
+			               * r
+			           + 2.05319162663775882187e0)
+			            * r
+			        + 1.0));
 		} else {
 			/* p near 0 or 1. */
 			r -= 5.0;
-			ret = ((((((((2.01033439929228813265e-7 * r +
-			    2.71155556874348757815e-5) * r +
-			    1.24266094738807843860e-3) * r +
-			    2.65321895265761230930e-2) * r +
-			    2.96560571828504891230e-1) * r +
-			    1.78482653991729133580e0) * r +
-			    5.46378491116411436990e0) * r +
-			    6.65790464350110377720e0) /
-			    (((((((2.04426310338993978564e-15 * r +
-			    1.42151175831644588870e-7) * r +
-			    1.84631831751005468180e-5) * r +
-			    7.86869131145613259100e-4) * r +
-			    1.48753612908506148525e-2) * r +
-			    1.36929880922735805310e-1) * r +
-			    5.99832206555887937690e-1)
-			    * r + 1.0));
+			ret = ((((((((2.01033439929228813265e-7 * r
+			                 + 2.71155556874348757815e-5)
+			                    * r
+			                + 1.24266094738807843860e-3)
+			                   * r
+			               + 2.65321895265761230930e-2)
+			                  * r
+			              + 2.96560571828504891230e-1)
+			                 * r
+			             + 1.78482653991729133580e0)
+			                * r
+			            + 5.46378491116411436990e0)
+			               * r
+			           + 6.65790464350110377720e0)
+			    / (((((((2.04426310338993978564e-15 * r
+			                + 1.42151175831644588870e-7)
+			                   * r
+			               + 1.84631831751005468180e-5)
+			                  * r
+			              + 7.86869131145613259100e-4)
+			                 * r
+			             + 1.48753612908506148525e-2)
+			                * r
+			            + 1.36929880922735805310e-1)
+			               * r
+			           + 5.99832206555887937690e-1)
+			            * r
+			        + 1.0));
 		}
 		if (q < 0.0) {
 			ret = -ret;
@@ -244,8 +291,9 @@ pt_chi2(double p, double df, double ln_gamma_df_2) {
 			ch = df * pow(x * sqrt(p1) + 1.0 - p1, 3.0);
 			/* Starting approximation for p tending to 1. */
 			if (ch > 2.2 * df + 6.0) {
-				ch = -2.0 * (log(1.0 - p) - c * log(0.5 * ch) +
-				    ln_gamma_df_2);
+				ch = -2.0
+				    * (log(1.0 - p) - c * log(0.5 * ch)
+				        + ln_gamma_df_2);
 			}
 		} else {
 			ch = 0.4;
@@ -254,10 +302,13 @@ pt_chi2(double p, double df, double ln_gamma_df_2) {
 				q = ch;
 				p1 = 1.0 + ch * (4.67 + ch);
 				p2 = ch * (6.73 + ch * (6.66 + ch));
-				t = -0.5 + (4.67 + 2.0 * ch) / p1 - (6.73 + ch
-				    * (13.32 + 3.0 * ch)) / p2;
-				ch -= (1.0 - exp(a + ln_gamma_df_2 + 0.5 * ch +
-				    c * aa) * p2 / p1) / t;
+				t = -0.5 + (4.67 + 2.0 * ch) / p1
+				    - (6.73 + ch * (13.32 + 3.0 * ch)) / p2;
+				ch -= (1.0
+				          - exp(a + ln_gamma_df_2 + 0.5 * ch
+				                + c * aa)
+				              * p2 / p1)
+				    / t;
 				if (fabs(q / ch - 1.0) - 0.01 <= 0.0) {
 					break;
 				}
@@ -276,17 +327,36 @@ pt_chi2(double p, double df, double ln_gamma_df_2) {
 		t = p2 * exp(xx * aa + ln_gamma_df_2 + p1 - c * log(ch));
 		b = t / ch;
 		a = 0.5 * t - b * c;
-		s1 = (210.0 + a * (140.0 + a * (105.0 + a * (84.0 + a * (70.0 +
-		    60.0 * a))))) / 420.0;
-		s2 = (420.0 + a * (735.0 + a * (966.0 + a * (1141.0 + 1278.0 *
-		    a)))) / 2520.0;
+		s1 = (210.0
+		         + a
+		             * (140.0
+		                 + a
+		                     * (105.0
+		                         + a * (84.0 + a * (70.0 + 60.0 * a)))))
+		    / 420.0;
+		s2 =
+		    (420.0
+		        + a * (735.0 + a * (966.0 + a * (1141.0 + 1278.0 * a))))
+		    / 2520.0;
 		s3 = (210.0 + a * (462.0 + a * (707.0 + 932.0 * a))) / 2520.0;
-		s4 = (252.0 + a * (672.0 + 1182.0 * a) + c * (294.0 + a *
-		    (889.0 + 1740.0 * a))) / 5040.0;
+		s4 = (252.0 + a * (672.0 + 1182.0 * a)
+		         + c * (294.0 + a * (889.0 + 1740.0 * a)))
+		    / 5040.0;
 		s5 = (84.0 + 264.0 * a + c * (175.0 + 606.0 * a)) / 2520.0;
 		s6 = (120.0 + c * (346.0 + 127.0 * c)) / 5040.0;
-		ch += t * (1.0 + 0.5 * t * s1 - b * c * (s1 - b * (s2 - b * (s3
-		    - b * (s4 - b * (s5 - b * s6))))));
+		ch += t
+		    * (1.0 + 0.5 * t * s1
+		        - b * c
+		            * (s1
+		                - b
+		                    * (s2
+		                        - b
+		                            * (s3
+		                                - b
+		                                    * (s4
+		                                        - b
+		                                            * (s5
+		                                                - b * s6))))));
 		if (fabs(q / ch - 1.0) <= e) {
 			break;
 		}
diff --git a/test/include/test/mq.h b/test/include/test/mq.h
index 5dc6486c..4a68d709 100644
--- a/test/include/test/mq.h
+++ b/test/include/test/mq.h
@@ -26,82 +26,74 @@
  * does not perform any cleanup of messages, since it knows nothing of their
  * payloads.
  */
-#define mq_msg(a_mq_msg_type)	ql_elm(a_mq_msg_type)
+#define mq_msg(a_mq_msg_type) ql_elm(a_mq_msg_type)
 
-#define mq_gen(a_attr, a_prefix, a_mq_type, a_mq_msg_type, a_field)	\
-typedef struct {							\
-	mtx_t			lock;					\
-	ql_head(a_mq_msg_type)	msgs;					\
-	unsigned		count;					\
-} a_mq_type;								\
-a_attr bool								\
-a_prefix##init(a_mq_type *mq) {						\
-									\
-	if (mtx_init(&mq->lock)) {					\
-		return true;						\
-	}								\
-	ql_new(&mq->msgs);						\
-	mq->count = 0;							\
-	return false;							\
-}									\
-a_attr void								\
-a_prefix##fini(a_mq_type *mq) {						\
-	mtx_fini(&mq->lock);						\
-}									\
-a_attr unsigned								\
-a_prefix##count(a_mq_type *mq) {					\
-	unsigned count;							\
-									\
-	mtx_lock(&mq->lock);						\
-	count = mq->count;						\
-	mtx_unlock(&mq->lock);						\
-	return count;							\
-}									\
-a_attr a_mq_msg_type *							\
-a_prefix##tryget(a_mq_type *mq) {					\
-	a_mq_msg_type *msg;						\
-									\
-	mtx_lock(&mq->lock);						\
-	msg = ql_first(&mq->msgs);					\
-	if (msg != NULL) {						\
-		ql_head_remove(&mq->msgs, a_mq_msg_type, a_field);	\
-		mq->count--;						\
-	}								\
-	mtx_unlock(&mq->lock);						\
-	return msg;							\
-}									\
-a_attr a_mq_msg_type *							\
-a_prefix##get(a_mq_type *mq) {						\
-	a_mq_msg_type *msg;						\
-	unsigned ns;							\
-									\
-	msg = a_prefix##tryget(mq);					\
-	if (msg != NULL) {						\
-		return msg;						\
-	}								\
-									\
-	ns = 1;								\
-	while (true) {							\
-		sleep_ns(ns);						\
-		msg = a_prefix##tryget(mq);				\
-		if (msg != NULL) {					\
-			return msg;					\
-		}							\
-		if (ns < 1000*1000*1000) {				\
-			/* Double sleep time, up to max 1 second. */	\
-			ns <<= 1;					\
-			if (ns > 1000*1000*1000) {			\
-				ns = 1000*1000*1000;			\
-			}						\
-		}							\
-	}								\
-}									\
-a_attr void								\
-a_prefix##put(a_mq_type *mq, a_mq_msg_type *msg) {			\
-									\
-	mtx_lock(&mq->lock);						\
-	ql_elm_new(msg, a_field);					\
-	ql_tail_insert(&mq->msgs, msg, a_field);			\
-	mq->count++;							\
-	mtx_unlock(&mq->lock);						\
-}
+#define mq_gen(a_attr, a_prefix, a_mq_type, a_mq_msg_type, a_field)            \
+	typedef struct {                                                       \
+		mtx_t lock;                                                    \
+		ql_head(a_mq_msg_type) msgs;                                   \
+		unsigned count;                                                \
+	} a_mq_type;                                                           \
+	a_attr bool a_prefix##init(a_mq_type *mq) {                            \
+		if (mtx_init(&mq->lock)) {                                     \
+			return true;                                           \
+		}                                                              \
+		ql_new(&mq->msgs);                                             \
+		mq->count = 0;                                                 \
+		return false;                                                  \
+	}                                                                      \
+	a_attr void a_prefix##fini(a_mq_type *mq) {                            \
+		mtx_fini(&mq->lock);                                           \
+	}                                                                      \
+	a_attr unsigned a_prefix##count(a_mq_type *mq) {                       \
+		unsigned count;                                                \
+                                                                               \
+		mtx_lock(&mq->lock);                                           \
+		count = mq->count;                                             \
+		mtx_unlock(&mq->lock);                                         \
+		return count;                                                  \
+	}                                                                      \
+	a_attr a_mq_msg_type *a_prefix##tryget(a_mq_type *mq) {                \
+		a_mq_msg_type *msg;                                            \
+                                                                               \
+		mtx_lock(&mq->lock);                                           \
+		msg = ql_first(&mq->msgs);                                     \
+		if (msg != NULL) {                                             \
+			ql_head_remove(&mq->msgs, a_mq_msg_type, a_field);     \
+			mq->count--;                                           \
+		}                                                              \
+		mtx_unlock(&mq->lock);                                         \
+		return msg;                                                    \
+	}                                                                      \
+	a_attr a_mq_msg_type *a_prefix##get(a_mq_type *mq) {                   \
+		a_mq_msg_type *msg;                                            \
+		unsigned       ns;                                             \
+                                                                               \
+		msg = a_prefix##tryget(mq);                                    \
+		if (msg != NULL) {                                             \
+			return msg;                                            \
+		}                                                              \
+                                                                               \
+		ns = 1;                                                        \
+		while (true) {                                                 \
+			sleep_ns(ns);                                          \
+			msg = a_prefix##tryget(mq);                            \
+			if (msg != NULL) {                                     \
+				return msg;                                    \
+			}                                                      \
+			if (ns < 1000 * 1000 * 1000) {                         \
+				/* Double sleep time, up to max 1 second. */   \
+				ns <<= 1;                                      \
+				if (ns > 1000 * 1000 * 1000) {                 \
+					ns = 1000 * 1000 * 1000;               \
+				}                                              \
+			}                                                      \
+		}                                                              \
+	}                                                                      \
+	a_attr void a_prefix##put(a_mq_type *mq, a_mq_msg_type *msg) {         \
+		mtx_lock(&mq->lock);                                           \
+		ql_elm_new(msg, a_field);                                      \
+		ql_tail_insert(&mq->msgs, msg, a_field);                       \
+		mq->count++;                                                   \
+		mtx_unlock(&mq->lock);                                         \
+	}
diff --git a/test/include/test/mtx.h b/test/include/test/mtx.h
index 066a2137..c771ca3a 100644
--- a/test/include/test/mtx.h
+++ b/test/include/test/mtx.h
@@ -7,15 +7,15 @@
 
 typedef struct {
 #ifdef _WIN32
-	CRITICAL_SECTION	lock;
+	CRITICAL_SECTION lock;
 #elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
-	os_unfair_lock		lock;
+	os_unfair_lock lock;
 #else
-	pthread_mutex_t		lock;
+	pthread_mutex_t lock;
 #endif
 } mtx_t;
 
-bool	mtx_init(mtx_t *mtx);
-void	mtx_fini(mtx_t *mtx);
-void	mtx_lock(mtx_t *mtx);
-void	mtx_unlock(mtx_t *mtx);
+bool mtx_init(mtx_t *mtx);
+void mtx_fini(mtx_t *mtx);
+void mtx_lock(mtx_t *mtx);
+void mtx_unlock(mtx_t *mtx);
diff --git a/test/include/test/nbits.h b/test/include/test/nbits.h
index c06cf1b4..2c30a61c 100644
--- a/test/include/test/nbits.h
+++ b/test/include/test/nbits.h
@@ -3,109 +3,109 @@
 
 /* Interesting bitmap counts to test. */
 
-#define NBITS_TAB \
-    NB( 1) \
-    NB( 2) \
-    NB( 3) \
-    NB( 4) \
-    NB( 5) \
-    NB( 6) \
-    NB( 7) \
-    NB( 8) \
-    NB( 9) \
-    NB(10) \
-    NB(11) \
-    NB(12) \
-    NB(13) \
-    NB(14) \
-    NB(15) \
-    NB(16) \
-    NB(17) \
-    NB(18) \
-    NB(19) \
-    NB(20) \
-    NB(21) \
-    NB(22) \
-    NB(23) \
-    NB(24) \
-    NB(25) \
-    NB(26) \
-    NB(27) \
-    NB(28) \
-    NB(29) \
-    NB(30) \
-    NB(31) \
-    NB(32) \
-    \
-    NB(33) \
-    NB(34) \
-    NB(35) \
-    NB(36) \
-    NB(37) \
-    NB(38) \
-    NB(39) \
-    NB(40) \
-    NB(41) \
-    NB(42) \
-    NB(43) \
-    NB(44) \
-    NB(45) \
-    NB(46) \
-    NB(47) \
-    NB(48) \
-    NB(49) \
-    NB(50) \
-    NB(51) \
-    NB(52) \
-    NB(53) \
-    NB(54) \
-    NB(55) \
-    NB(56) \
-    NB(57) \
-    NB(58) \
-    NB(59) \
-    NB(60) \
-    NB(61) \
-    NB(62) \
-    NB(63) \
-    NB(64) \
-    NB(65) \
-    NB(66) \
-    NB(67) \
-    \
-    NB(126) \
-    NB(127) \
-    NB(128) \
-    NB(129) \
-    NB(130) \
-    \
-    NB(254) \
-    NB(255) \
-    NB(256) \
-    NB(257) \
-    NB(258) \
-    \
-    NB(510) \
-    NB(511) \
-    NB(512) \
-    NB(513) \
-    NB(514) \
-    \
-    NB(1022) \
-    NB(1023) \
-    NB(1024) \
-    NB(1025) \
-    NB(1026) \
-    \
-    NB(2048) \
-    \
-    NB(4094) \
-    NB(4095) \
-    NB(4096) \
-    NB(4097) \
-    NB(4098) \
-    \
-    NB(8192) \
-    NB(16384)
+#define NBITS_TAB                                                              \
+	NB(1)                                                                  \
+	NB(2)                                                                  \
+	NB(3)                                                                  \
+	NB(4)                                                                  \
+	NB(5)                                                                  \
+	NB(6)                                                                  \
+	NB(7)                                                                  \
+	NB(8)                                                                  \
+	NB(9)                                                                  \
+	NB(10)                                                                 \
+	NB(11)                                                                 \
+	NB(12)                                                                 \
+	NB(13)                                                                 \
+	NB(14)                                                                 \
+	NB(15)                                                                 \
+	NB(16)                                                                 \
+	NB(17)                                                                 \
+	NB(18)                                                                 \
+	NB(19)                                                                 \
+	NB(20)                                                                 \
+	NB(21)                                                                 \
+	NB(22)                                                                 \
+	NB(23)                                                                 \
+	NB(24)                                                                 \
+	NB(25)                                                                 \
+	NB(26)                                                                 \
+	NB(27)                                                                 \
+	NB(28)                                                                 \
+	NB(29)                                                                 \
+	NB(30)                                                                 \
+	NB(31)                                                                 \
+	NB(32)                                                                 \
+                                                                               \
+	NB(33)                                                                 \
+	NB(34)                                                                 \
+	NB(35)                                                                 \
+	NB(36)                                                                 \
+	NB(37)                                                                 \
+	NB(38)                                                                 \
+	NB(39)                                                                 \
+	NB(40)                                                                 \
+	NB(41)                                                                 \
+	NB(42)                                                                 \
+	NB(43)                                                                 \
+	NB(44)                                                                 \
+	NB(45)                                                                 \
+	NB(46)                                                                 \
+	NB(47)                                                                 \
+	NB(48)                                                                 \
+	NB(49)                                                                 \
+	NB(50)                                                                 \
+	NB(51)                                                                 \
+	NB(52)                                                                 \
+	NB(53)                                                                 \
+	NB(54)                                                                 \
+	NB(55)                                                                 \
+	NB(56)                                                                 \
+	NB(57)                                                                 \
+	NB(58)                                                                 \
+	NB(59)                                                                 \
+	NB(60)                                                                 \
+	NB(61)                                                                 \
+	NB(62)                                                                 \
+	NB(63)                                                                 \
+	NB(64)                                                                 \
+	NB(65)                                                                 \
+	NB(66)                                                                 \
+	NB(67)                                                                 \
+                                                                               \
+	NB(126)                                                                \
+	NB(127)                                                                \
+	NB(128)                                                                \
+	NB(129)                                                                \
+	NB(130)                                                                \
+                                                                               \
+	NB(254)                                                                \
+	NB(255)                                                                \
+	NB(256)                                                                \
+	NB(257)                                                                \
+	NB(258)                                                                \
+                                                                               \
+	NB(510)                                                                \
+	NB(511)                                                                \
+	NB(512)                                                                \
+	NB(513)                                                                \
+	NB(514)                                                                \
+                                                                               \
+	NB(1022)                                                               \
+	NB(1023)                                                               \
+	NB(1024)                                                               \
+	NB(1025)                                                               \
+	NB(1026)                                                               \
+                                                                               \
+	NB(2048)                                                               \
+                                                                               \
+	NB(4094)                                                               \
+	NB(4095)                                                               \
+	NB(4096)                                                               \
+	NB(4097)                                                               \
+	NB(4098)                                                               \
+                                                                               \
+	NB(8192)                                                               \
+	NB(16384)
 
 #endif /* TEST_NBITS_H */
diff --git a/test/include/test/san.h b/test/include/test/san.h
index da07865c..65a235e9 100644
--- a/test/include/test/san.h
+++ b/test/include/test/san.h
@@ -1,9 +1,9 @@
 #if defined(JEMALLOC_UAF_DETECTION) || defined(JEMALLOC_DEBUG)
-#  define TEST_SAN_UAF_ALIGN_ENABLE "lg_san_uaf_align:12"
-#  define TEST_SAN_UAF_ALIGN_DISABLE "lg_san_uaf_align:-1"
+#	define TEST_SAN_UAF_ALIGN_ENABLE "lg_san_uaf_align:12"
+#	define TEST_SAN_UAF_ALIGN_DISABLE "lg_san_uaf_align:-1"
 #else
-#  define TEST_SAN_UAF_ALIGN_ENABLE ""
-#  define TEST_SAN_UAF_ALIGN_DISABLE ""
+#	define TEST_SAN_UAF_ALIGN_ENABLE ""
+#	define TEST_SAN_UAF_ALIGN_DISABLE ""
 #endif
 
 static inline bool
@@ -11,4 +11,3 @@ extent_is_guarded(tsdn_t *tsdn, void *ptr) {
 	edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
 	return edata_guarded_get(edata);
 }
-
diff --git a/test/include/test/test.h b/test/include/test/test.h
index 80ca7cbb..025c167d 100644
--- a/test/include/test/test.h
+++ b/test/include/test/test.h
@@ -1,502 +1,503 @@
-#define ASSERT_BUFSIZE	256
+#define ASSERT_BUFSIZE 256
 
-#define verify_cmp(may_abort, t, a, b, cmp, neg_cmp, pri, ...) do {	\
-	const t a_ = (a);						\
-	const t b_ = (b);						\
-	if (!(a_ cmp b_)) {						\
-		char prefix[ASSERT_BUFSIZE];				\
-		char message[ASSERT_BUFSIZE];				\
-		malloc_snprintf(prefix, sizeof(prefix),			\
-		    "%s:%s:%d: Failed assertion: "			\
-		    "(%s) " #cmp " (%s) --> "				\
-		    "%" pri " " #neg_cmp " %" pri ": ",			\
-		    __func__, __FILE__, __LINE__,			\
-		    #a, #b, a_, b_);					\
-		malloc_snprintf(message, sizeof(message), __VA_ARGS__);	\
-		p_test_fail(may_abort, prefix, message);		\
-	}								\
-} while (0)
+#define verify_cmp(may_abort, t, a, b, cmp, neg_cmp, pri, ...)                 \
+	do {                                                                   \
+		const t a_ = (a);                                              \
+		const t b_ = (b);                                              \
+		if (!(a_ cmp b_)) {                                            \
+			char prefix[ASSERT_BUFSIZE];                           \
+			char message[ASSERT_BUFSIZE];                          \
+			malloc_snprintf(prefix, sizeof(prefix),                \
+			    "%s:%s:%d: Failed assertion: "                     \
+			    "(%s) " #cmp                                       \
+			    " (%s) --> "                                       \
+			    "%" pri " " #neg_cmp " %" pri ": ",                \
+			    __func__, __FILE__, __LINE__, #a, #b, a_, b_);     \
+			malloc_snprintf(                                       \
+			    message, sizeof(message), __VA_ARGS__);            \
+			p_test_fail(may_abort, prefix, message);               \
+		}                                                              \
+	} while (0)
 
-#define expect_cmp(t, a, b, cmp, neg_cmp, pri, ...) verify_cmp(false,	\
-    t, a, b, cmp, neg_cmp, pri, __VA_ARGS__)
+#define expect_cmp(t, a, b, cmp, neg_cmp, pri, ...)                            \
+	verify_cmp(false, t, a, b, cmp, neg_cmp, pri, __VA_ARGS__)
 
-#define expect_ptr_eq(a, b, ...)	expect_cmp(void *, a, b, ==,	\
-    !=, "p", __VA_ARGS__)
-#define expect_ptr_ne(a, b, ...)	expect_cmp(void *, a, b, !=,	\
-    ==, "p", __VA_ARGS__)
-#define expect_ptr_null(a, ...)		expect_cmp(void *, a, NULL, ==,	\
-    !=, "p", __VA_ARGS__)
-#define expect_ptr_not_null(a, ...)	expect_cmp(void *, a, NULL, !=,	\
-    ==, "p", __VA_ARGS__)
+#define expect_ptr_eq(a, b, ...)                                               \
+	expect_cmp(void *, a, b, ==, !=, "p", __VA_ARGS__)
+#define expect_ptr_ne(a, b, ...)                                               \
+	expect_cmp(void *, a, b, !=, ==, "p", __VA_ARGS__)
+#define expect_ptr_null(a, ...)                                                \
+	expect_cmp(void *, a, NULL, ==, !=, "p", __VA_ARGS__)
+#define expect_ptr_not_null(a, ...)                                            \
+	expect_cmp(void *, a, NULL, !=, ==, "p", __VA_ARGS__)
 
-#define expect_c_eq(a, b, ...)	expect_cmp(char, a, b, ==, !=, "c", __VA_ARGS__)
-#define expect_c_ne(a, b, ...)	expect_cmp(char, a, b, !=, ==, "c", __VA_ARGS__)
-#define expect_c_lt(a, b, ...)	expect_cmp(char, a, b, <, >=, "c", __VA_ARGS__)
-#define expect_c_le(a, b, ...)	expect_cmp(char, a, b, <=, >, "c", __VA_ARGS__)
-#define expect_c_ge(a, b, ...)	expect_cmp(char, a, b, >=, <, "c", __VA_ARGS__)
-#define expect_c_gt(a, b, ...)	expect_cmp(char, a, b, >, <=, "c", __VA_ARGS__)
+#define expect_c_eq(a, b, ...) expect_cmp(char, a, b, ==, !=, "c", __VA_ARGS__)
+#define expect_c_ne(a, b, ...) expect_cmp(char, a, b, !=, ==, "c", __VA_ARGS__)
+#define expect_c_lt(a, b, ...) expect_cmp(char, a, b, <, >=, "c", __VA_ARGS__)
+#define expect_c_le(a, b, ...) expect_cmp(char, a, b, <=, >, "c", __VA_ARGS__)
+#define expect_c_ge(a, b, ...) expect_cmp(char, a, b, >=, <, "c", __VA_ARGS__)
+#define expect_c_gt(a, b, ...) expect_cmp(char, a, b, >, <=, "c", __VA_ARGS__)
 
-#define expect_x_eq(a, b, ...)	expect_cmp(int, a, b, ==, !=, "#x", __VA_ARGS__)
-#define expect_x_ne(a, b, ...)	expect_cmp(int, a, b, !=, ==, "#x", __VA_ARGS__)
-#define expect_x_lt(a, b, ...)	expect_cmp(int, a, b, <, >=, "#x", __VA_ARGS__)
-#define expect_x_le(a, b, ...)	expect_cmp(int, a, b, <=, >, "#x", __VA_ARGS__)
-#define expect_x_ge(a, b, ...)	expect_cmp(int, a, b, >=, <, "#x", __VA_ARGS__)
-#define expect_x_gt(a, b, ...)	expect_cmp(int, a, b, >, <=, "#x", __VA_ARGS__)
+#define expect_x_eq(a, b, ...) expect_cmp(int, a, b, ==, !=, "#x", __VA_ARGS__)
+#define expect_x_ne(a, b, ...) expect_cmp(int, a, b, !=, ==, "#x", __VA_ARGS__)
+#define expect_x_lt(a, b, ...) expect_cmp(int, a, b, <, >=, "#x", __VA_ARGS__)
+#define expect_x_le(a, b, ...) expect_cmp(int, a, b, <=, >, "#x", __VA_ARGS__)
+#define expect_x_ge(a, b, ...) expect_cmp(int, a, b, >=, <, "#x", __VA_ARGS__)
+#define expect_x_gt(a, b, ...) expect_cmp(int, a, b, >, <=, "#x", __VA_ARGS__)
 
-#define expect_d_eq(a, b, ...)	expect_cmp(int, a, b, ==, !=, "d", __VA_ARGS__)
-#define expect_d_ne(a, b, ...)	expect_cmp(int, a, b, !=, ==, "d", __VA_ARGS__)
-#define expect_d_lt(a, b, ...)	expect_cmp(int, a, b, <, >=, "d", __VA_ARGS__)
-#define expect_d_le(a, b, ...)	expect_cmp(int, a, b, <=, >, "d", __VA_ARGS__)
-#define expect_d_ge(a, b, ...)	expect_cmp(int, a, b, >=, <, "d", __VA_ARGS__)
-#define expect_d_gt(a, b, ...)	expect_cmp(int, a, b, >, <=, "d", __VA_ARGS__)
+#define expect_d_eq(a, b, ...) expect_cmp(int, a, b, ==, !=, "d", __VA_ARGS__)
+#define expect_d_ne(a, b, ...) expect_cmp(int, a, b, !=, ==, "d", __VA_ARGS__)
+#define expect_d_lt(a, b, ...) expect_cmp(int, a, b, <, >=, "d", __VA_ARGS__)
+#define expect_d_le(a, b, ...) expect_cmp(int, a, b, <=, >, "d", __VA_ARGS__)
+#define expect_d_ge(a, b, ...) expect_cmp(int, a, b, >=, <, "d", __VA_ARGS__)
+#define expect_d_gt(a, b, ...) expect_cmp(int, a, b, >, <=, "d", __VA_ARGS__)
 
-#define expect_u_eq(a, b, ...)	expect_cmp(int, a, b, ==, !=, "u", __VA_ARGS__)
-#define expect_u_ne(a, b, ...)	expect_cmp(int, a, b, !=, ==, "u", __VA_ARGS__)
-#define expect_u_lt(a, b, ...)	expect_cmp(int, a, b, <, >=, "u", __VA_ARGS__)
-#define expect_u_le(a, b, ...)	expect_cmp(int, a, b, <=, >, "u", __VA_ARGS__)
-#define expect_u_ge(a, b, ...)	expect_cmp(int, a, b, >=, <, "u", __VA_ARGS__)
-#define expect_u_gt(a, b, ...)	expect_cmp(int, a, b, >, <=, "u", __VA_ARGS__)
+#define expect_u_eq(a, b, ...) expect_cmp(int, a, b, ==, !=, "u", __VA_ARGS__)
+#define expect_u_ne(a, b, ...) expect_cmp(int, a, b, !=, ==, "u", __VA_ARGS__)
+#define expect_u_lt(a, b, ...) expect_cmp(int, a, b, <, >=, "u", __VA_ARGS__)
+#define expect_u_le(a, b, ...) expect_cmp(int, a, b, <=, >, "u", __VA_ARGS__)
+#define expect_u_ge(a, b, ...) expect_cmp(int, a, b, >=, <, "u", __VA_ARGS__)
+#define expect_u_gt(a, b, ...) expect_cmp(int, a, b, >, <=, "u", __VA_ARGS__)
 
-#define expect_ld_eq(a, b, ...)	expect_cmp(long, a, b, ==,	\
-    !=, "ld", __VA_ARGS__)
-#define expect_ld_ne(a, b, ...)	expect_cmp(long, a, b, !=,	\
-    ==, "ld", __VA_ARGS__)
-#define expect_ld_lt(a, b, ...)	expect_cmp(long, a, b, <,	\
-    >=, "ld", __VA_ARGS__)
-#define expect_ld_le(a, b, ...)	expect_cmp(long, a, b, <=,	\
-    >, "ld", __VA_ARGS__)
-#define expect_ld_ge(a, b, ...)	expect_cmp(long, a, b, >=,	\
-    <, "ld", __VA_ARGS__)
-#define expect_ld_gt(a, b, ...)	expect_cmp(long, a, b, >,	\
-    <=, "ld", __VA_ARGS__)
+#define expect_ld_eq(a, b, ...)                                                \
+	expect_cmp(long, a, b, ==, !=, "ld", __VA_ARGS__)
+#define expect_ld_ne(a, b, ...)                                                \
+	expect_cmp(long, a, b, !=, ==, "ld", __VA_ARGS__)
+#define expect_ld_lt(a, b, ...) expect_cmp(long, a, b, <, >=, "ld", __VA_ARGS__)
+#define expect_ld_le(a, b, ...) expect_cmp(long, a, b, <=, >, "ld", __VA_ARGS__)
+#define expect_ld_ge(a, b, ...) expect_cmp(long, a, b, >=, <, "ld", __VA_ARGS__)
+#define expect_ld_gt(a, b, ...) expect_cmp(long, a, b, >, <=, "ld", __VA_ARGS__)
 
-#define expect_lu_eq(a, b, ...)	expect_cmp(unsigned long,	\
-    a, b, ==, !=, "lu", __VA_ARGS__)
-#define expect_lu_ne(a, b, ...)	expect_cmp(unsigned long,	\
-    a, b, !=, ==, "lu", __VA_ARGS__)
-#define expect_lu_lt(a, b, ...)	expect_cmp(unsigned long,	\
-    a, b, <, >=, "lu", __VA_ARGS__)
-#define expect_lu_le(a, b, ...)	expect_cmp(unsigned long,	\
-    a, b, <=, >, "lu", __VA_ARGS__)
-#define expect_lu_ge(a, b, ...)	expect_cmp(unsigned long,	\
-    a, b, >=, <, "lu", __VA_ARGS__)
-#define expect_lu_gt(a, b, ...)	expect_cmp(unsigned long,	\
-    a, b, >, <=, "lu", __VA_ARGS__)
+#define expect_lu_eq(a, b, ...)                                                \
+	expect_cmp(unsigned long, a, b, ==, !=, "lu", __VA_ARGS__)
+#define expect_lu_ne(a, b, ...)                                                \
+	expect_cmp(unsigned long, a, b, !=, ==, "lu", __VA_ARGS__)
+#define expect_lu_lt(a, b, ...)                                                \
+	expect_cmp(unsigned long, a, b, <, >=, "lu", __VA_ARGS__)
+#define expect_lu_le(a, b, ...)                                                \
+	expect_cmp(unsigned long, a, b, <=, >, "lu", __VA_ARGS__)
+#define expect_lu_ge(a, b, ...)                                                \
+	expect_cmp(unsigned long, a, b, >=, <, "lu", __VA_ARGS__)
+#define expect_lu_gt(a, b, ...)                                                \
+	expect_cmp(unsigned long, a, b, >, <=, "lu", __VA_ARGS__)
 
-#define expect_qd_eq(a, b, ...)	expect_cmp(long long, a, b, ==,	\
-    !=, "qd", __VA_ARGS__)
-#define expect_qd_ne(a, b, ...)	expect_cmp(long long, a, b, !=,	\
-    ==, "qd", __VA_ARGS__)
-#define expect_qd_lt(a, b, ...)	expect_cmp(long long, a, b, <,	\
-    >=, "qd", __VA_ARGS__)
-#define expect_qd_le(a, b, ...)	expect_cmp(long long, a, b, <=,	\
-    >, "qd", __VA_ARGS__)
-#define expect_qd_ge(a, b, ...)	expect_cmp(long long, a, b, >=,	\
-    <, "qd", __VA_ARGS__)
-#define expect_qd_gt(a, b, ...)	expect_cmp(long long, a, b, >,	\
-    <=, "qd", __VA_ARGS__)
+#define expect_qd_eq(a, b, ...)                                                \
+	expect_cmp(long long, a, b, ==, !=, "qd", __VA_ARGS__)
+#define expect_qd_ne(a, b, ...)                                                \
+	expect_cmp(long long, a, b, !=, ==, "qd", __VA_ARGS__)
+#define expect_qd_lt(a, b, ...)                                                \
+	expect_cmp(long long, a, b, <, >=, "qd", __VA_ARGS__)
+#define expect_qd_le(a, b, ...)                                                \
+	expect_cmp(long long, a, b, <=, >, "qd", __VA_ARGS__)
+#define expect_qd_ge(a, b, ...)                                                \
+	expect_cmp(long long, a, b, >=, <, "qd", __VA_ARGS__)
+#define expect_qd_gt(a, b, ...)                                                \
+	expect_cmp(long long, a, b, >, <=, "qd", __VA_ARGS__)
 
-#define expect_qu_eq(a, b, ...)	expect_cmp(unsigned long long,	\
-    a, b, ==, !=, "qu", __VA_ARGS__)
-#define expect_qu_ne(a, b, ...)	expect_cmp(unsigned long long,	\
-    a, b, !=, ==, "qu", __VA_ARGS__)
-#define expect_qu_lt(a, b, ...)	expect_cmp(unsigned long long,	\
-    a, b, <, >=, "qu", __VA_ARGS__)
-#define expect_qu_le(a, b, ...)	expect_cmp(unsigned long long,	\
-    a, b, <=, >, "qu", __VA_ARGS__)
-#define expect_qu_ge(a, b, ...)	expect_cmp(unsigned long long,	\
-    a, b, >=, <, "qu", __VA_ARGS__)
-#define expect_qu_gt(a, b, ...)	expect_cmp(unsigned long long,	\
-    a, b, >, <=, "qu", __VA_ARGS__)
+#define expect_qu_eq(a, b, ...)                                                \
+	expect_cmp(unsigned long long, a, b, ==, !=, "qu", __VA_ARGS__)
+#define expect_qu_ne(a, b, ...)                                                \
+	expect_cmp(unsigned long long, a, b, !=, ==, "qu", __VA_ARGS__)
+#define expect_qu_lt(a, b, ...)                                                \
+	expect_cmp(unsigned long long, a, b, <, >=, "qu", __VA_ARGS__)
+#define expect_qu_le(a, b, ...)                                                \
+	expect_cmp(unsigned long long, a, b, <=, >, "qu", __VA_ARGS__)
+#define expect_qu_ge(a, b, ...)                                                \
+	expect_cmp(unsigned long long, a, b, >=, <, "qu", __VA_ARGS__)
+#define expect_qu_gt(a, b, ...)                                                \
+	expect_cmp(unsigned long long, a, b, >, <=, "qu", __VA_ARGS__)
 
-#define expect_jd_eq(a, b, ...)	expect_cmp(intmax_t, a, b, ==,	\
-    !=, "jd", __VA_ARGS__)
-#define expect_jd_ne(a, b, ...)	expect_cmp(intmax_t, a, b, !=,	\
-    ==, "jd", __VA_ARGS__)
-#define expect_jd_lt(a, b, ...)	expect_cmp(intmax_t, a, b, <,	\
-    >=, "jd", __VA_ARGS__)
-#define expect_jd_le(a, b, ...)	expect_cmp(intmax_t, a, b, <=,	\
-    >, "jd", __VA_ARGS__)
-#define expect_jd_ge(a, b, ...)	expect_cmp(intmax_t, a, b, >=,	\
-    <, "jd", __VA_ARGS__)
-#define expect_jd_gt(a, b, ...)	expect_cmp(intmax_t, a, b, >,	\
-    <=, "jd", __VA_ARGS__)
+#define expect_jd_eq(a, b, ...)                                                \
+	expect_cmp(intmax_t, a, b, ==, !=, "jd", __VA_ARGS__)
+#define expect_jd_ne(a, b, ...)                                                \
+	expect_cmp(intmax_t, a, b, !=, ==, "jd", __VA_ARGS__)
+#define expect_jd_lt(a, b, ...)                                                \
+	expect_cmp(intmax_t, a, b, <, >=, "jd", __VA_ARGS__)
+#define expect_jd_le(a, b, ...)                                                \
+	expect_cmp(intmax_t, a, b, <=, >, "jd", __VA_ARGS__)
+#define expect_jd_ge(a, b, ...)                                                \
+	expect_cmp(intmax_t, a, b, >=, <, "jd", __VA_ARGS__)
+#define expect_jd_gt(a, b, ...)                                                \
+	expect_cmp(intmax_t, a, b, >, <=, "jd", __VA_ARGS__)
 
-#define expect_ju_eq(a, b, ...)	expect_cmp(uintmax_t, a, b, ==,	\
-    !=, "ju", __VA_ARGS__)
-#define expect_ju_ne(a, b, ...)	expect_cmp(uintmax_t, a, b, !=,	\
-    ==, "ju", __VA_ARGS__)
-#define expect_ju_lt(a, b, ...)	expect_cmp(uintmax_t, a, b, <,	\
-    >=, "ju", __VA_ARGS__)
-#define expect_ju_le(a, b, ...)	expect_cmp(uintmax_t, a, b, <=,	\
-    >, "ju", __VA_ARGS__)
-#define expect_ju_ge(a, b, ...)	expect_cmp(uintmax_t, a, b, >=,	\
-    <, "ju", __VA_ARGS__)
-#define expect_ju_gt(a, b, ...)	expect_cmp(uintmax_t, a, b, >,	\
-    <=, "ju", __VA_ARGS__)
+#define expect_ju_eq(a, b, ...)                                                \
+	expect_cmp(uintmax_t, a, b, ==, !=, "ju", __VA_ARGS__)
+#define expect_ju_ne(a, b, ...)                                                \
+	expect_cmp(uintmax_t, a, b, !=, ==, "ju", __VA_ARGS__)
+#define expect_ju_lt(a, b, ...)                                                \
+	expect_cmp(uintmax_t, a, b, <, >=, "ju", __VA_ARGS__)
+#define expect_ju_le(a, b, ...)                                                \
+	expect_cmp(uintmax_t, a, b, <=, >, "ju", __VA_ARGS__)
+#define expect_ju_ge(a, b, ...)                                                \
+	expect_cmp(uintmax_t, a, b, >=, <, "ju", __VA_ARGS__)
+#define expect_ju_gt(a, b, ...)                                                \
+	expect_cmp(uintmax_t, a, b, >, <=, "ju", __VA_ARGS__)
 
-#define expect_zd_eq(a, b, ...)	expect_cmp(ssize_t, a, b, ==,	\
-    !=, "zd", __VA_ARGS__)
-#define expect_zd_ne(a, b, ...)	expect_cmp(ssize_t, a, b, !=,	\
-    ==, "zd", __VA_ARGS__)
-#define expect_zd_lt(a, b, ...)	expect_cmp(ssize_t, a, b, <,	\
-    >=, "zd", __VA_ARGS__)
-#define expect_zd_le(a, b, ...)	expect_cmp(ssize_t, a, b, <=,	\
-    >, "zd", __VA_ARGS__)
-#define expect_zd_ge(a, b, ...)	expect_cmp(ssize_t, a, b, >=,	\
-    <, "zd", __VA_ARGS__)
-#define expect_zd_gt(a, b, ...)	expect_cmp(ssize_t, a, b, >,	\
-    <=, "zd", __VA_ARGS__)
+#define expect_zd_eq(a, b, ...)                                                \
+	expect_cmp(ssize_t, a, b, ==, !=, "zd", __VA_ARGS__)
+#define expect_zd_ne(a, b, ...)                                                \
+	expect_cmp(ssize_t, a, b, !=, ==, "zd", __VA_ARGS__)
+#define expect_zd_lt(a, b, ...)                                                \
+	expect_cmp(ssize_t, a, b, <, >=, "zd", __VA_ARGS__)
+#define expect_zd_le(a, b, ...)                                                \
+	expect_cmp(ssize_t, a, b, <=, >, "zd", __VA_ARGS__)
+#define expect_zd_ge(a, b, ...)                                                \
+	expect_cmp(ssize_t, a, b, >=, <, "zd", __VA_ARGS__)
+#define expect_zd_gt(a, b, ...)                                                \
+	expect_cmp(ssize_t, a, b, >, <=, "zd", __VA_ARGS__)
 
-#define expect_zu_eq(a, b, ...)	expect_cmp(size_t, a, b, ==,	\
-    !=, "zu", __VA_ARGS__)
-#define expect_zu_ne(a, b, ...)	expect_cmp(size_t, a, b, !=,	\
-    ==, "zu", __VA_ARGS__)
-#define expect_zu_lt(a, b, ...)	expect_cmp(size_t, a, b, <,	\
-    >=, "zu", __VA_ARGS__)
-#define expect_zu_le(a, b, ...)	expect_cmp(size_t, a, b, <=,	\
-    >, "zu", __VA_ARGS__)
-#define expect_zu_ge(a, b, ...)	expect_cmp(size_t, a, b, >=,	\
-    <, "zu", __VA_ARGS__)
-#define expect_zu_gt(a, b, ...)	expect_cmp(size_t, a, b, >,	\
-    <=, "zu", __VA_ARGS__)
+#define expect_zu_eq(a, b, ...)                                                \
+	expect_cmp(size_t, a, b, ==, !=, "zu", __VA_ARGS__)
+#define expect_zu_ne(a, b, ...)                                                \
+	expect_cmp(size_t, a, b, !=, ==, "zu", __VA_ARGS__)
+#define expect_zu_lt(a, b, ...)                                                \
+	expect_cmp(size_t, a, b, <, >=, "zu", __VA_ARGS__)
+#define expect_zu_le(a, b, ...)                                                \
+	expect_cmp(size_t, a, b, <=, >, "zu", __VA_ARGS__)
+#define expect_zu_ge(a, b, ...)                                                \
+	expect_cmp(size_t, a, b, >=, <, "zu", __VA_ARGS__)
+#define expect_zu_gt(a, b, ...)                                                \
+	expect_cmp(size_t, a, b, >, <=, "zu", __VA_ARGS__)
 
-#define expect_d32_eq(a, b, ...)	expect_cmp(int32_t, a, b, ==,	\
-    !=, FMTd32, __VA_ARGS__)
-#define expect_d32_ne(a, b, ...)	expect_cmp(int32_t, a, b, !=,	\
-    ==, FMTd32, __VA_ARGS__)
-#define expect_d32_lt(a, b, ...)	expect_cmp(int32_t, a, b, <,	\
-    >=, FMTd32, __VA_ARGS__)
-#define expect_d32_le(a, b, ...)	expect_cmp(int32_t, a, b, <=,	\
-    >, FMTd32, __VA_ARGS__)
-#define expect_d32_ge(a, b, ...)	expect_cmp(int32_t, a, b, >=,	\
-    <, FMTd32, __VA_ARGS__)
-#define expect_d32_gt(a, b, ...)	expect_cmp(int32_t, a, b, >,	\
-    <=, FMTd32, __VA_ARGS__)
+#define expect_d32_eq(a, b, ...)                                               \
+	expect_cmp(int32_t, a, b, ==, !=, FMTd32, __VA_ARGS__)
+#define expect_d32_ne(a, b, ...)                                               \
+	expect_cmp(int32_t, a, b, !=, ==, FMTd32, __VA_ARGS__)
+#define expect_d32_lt(a, b, ...)                                               \
+	expect_cmp(int32_t, a, b, <, >=, FMTd32, __VA_ARGS__)
+#define expect_d32_le(a, b, ...)                                               \
+	expect_cmp(int32_t, a, b, <=, >, FMTd32, __VA_ARGS__)
+#define expect_d32_ge(a, b, ...)                                               \
+	expect_cmp(int32_t, a, b, >=, <, FMTd32, __VA_ARGS__)
+#define expect_d32_gt(a, b, ...)                                               \
+	expect_cmp(int32_t, a, b, >, <=, FMTd32, __VA_ARGS__)
 
-#define expect_u32_eq(a, b, ...)	expect_cmp(uint32_t, a, b, ==,	\
-    !=, FMTu32, __VA_ARGS__)
-#define expect_u32_ne(a, b, ...)	expect_cmp(uint32_t, a, b, !=,	\
-    ==, FMTu32, __VA_ARGS__)
-#define expect_u32_lt(a, b, ...)	expect_cmp(uint32_t, a, b, <,	\
-    >=, FMTu32, __VA_ARGS__)
-#define expect_u32_le(a, b, ...)	expect_cmp(uint32_t, a, b, <=,	\
-    >, FMTu32, __VA_ARGS__)
-#define expect_u32_ge(a, b, ...)	expect_cmp(uint32_t, a, b, >=,	\
-    <, FMTu32, __VA_ARGS__)
-#define expect_u32_gt(a, b, ...)	expect_cmp(uint32_t, a, b, >,	\
-    <=, FMTu32, __VA_ARGS__)
+#define expect_u32_eq(a, b, ...)                                               \
+	expect_cmp(uint32_t, a, b, ==, !=, FMTu32, __VA_ARGS__)
+#define expect_u32_ne(a, b, ...)                                               \
+	expect_cmp(uint32_t, a, b, !=, ==, FMTu32, __VA_ARGS__)
+#define expect_u32_lt(a, b, ...)                                               \
+	expect_cmp(uint32_t, a, b, <, >=, FMTu32, __VA_ARGS__)
+#define expect_u32_le(a, b, ...)                                               \
+	expect_cmp(uint32_t, a, b, <=, >, FMTu32, __VA_ARGS__)
+#define expect_u32_ge(a, b, ...)                                               \
+	expect_cmp(uint32_t, a, b, >=, <, FMTu32, __VA_ARGS__)
+#define expect_u32_gt(a, b, ...)                                               \
+	expect_cmp(uint32_t, a, b, >, <=, FMTu32, __VA_ARGS__)
 
-#define expect_d64_eq(a, b, ...)	expect_cmp(int64_t, a, b, ==,	\
-    !=, FMTd64, __VA_ARGS__)
-#define expect_d64_ne(a, b, ...)	expect_cmp(int64_t, a, b, !=,	\
-    ==, FMTd64, __VA_ARGS__)
-#define expect_d64_lt(a, b, ...)	expect_cmp(int64_t, a, b, <,	\
-    >=, FMTd64, __VA_ARGS__)
-#define expect_d64_le(a, b, ...)	expect_cmp(int64_t, a, b, <=,	\
-    >, FMTd64, __VA_ARGS__)
-#define expect_d64_ge(a, b, ...)	expect_cmp(int64_t, a, b, >=,	\
-    <, FMTd64, __VA_ARGS__)
-#define expect_d64_gt(a, b, ...)	expect_cmp(int64_t, a, b, >,	\
-    <=, FMTd64, __VA_ARGS__)
+#define expect_d64_eq(a, b, ...)                                               \
+	expect_cmp(int64_t, a, b, ==, !=, FMTd64, __VA_ARGS__)
+#define expect_d64_ne(a, b, ...)                                               \
+	expect_cmp(int64_t, a, b, !=, ==, FMTd64, __VA_ARGS__)
+#define expect_d64_lt(a, b, ...)                                               \
+	expect_cmp(int64_t, a, b, <, >=, FMTd64, __VA_ARGS__)
+#define expect_d64_le(a, b, ...)                                               \
+	expect_cmp(int64_t, a, b, <=, >, FMTd64, __VA_ARGS__)
+#define expect_d64_ge(a, b, ...)                                               \
+	expect_cmp(int64_t, a, b, >=, <, FMTd64, __VA_ARGS__)
+#define expect_d64_gt(a, b, ...)                                               \
+	expect_cmp(int64_t, a, b, >, <=, FMTd64, __VA_ARGS__)
 
-#define expect_u64_eq(a, b, ...)	expect_cmp(uint64_t, a, b, ==,	\
-    !=, FMTu64, __VA_ARGS__)
-#define expect_u64_ne(a, b, ...)	expect_cmp(uint64_t, a, b, !=,	\
-    ==, FMTu64, __VA_ARGS__)
-#define expect_u64_lt(a, b, ...)	expect_cmp(uint64_t, a, b, <,	\
-    >=, FMTu64, __VA_ARGS__)
-#define expect_u64_le(a, b, ...)	expect_cmp(uint64_t, a, b, <=,	\
-    >, FMTu64, __VA_ARGS__)
-#define expect_u64_ge(a, b, ...)	expect_cmp(uint64_t, a, b, >=,	\
-    <, FMTu64, __VA_ARGS__)
-#define expect_u64_gt(a, b, ...)	expect_cmp(uint64_t, a, b, >,	\
-    <=, FMTu64, __VA_ARGS__)
+#define expect_u64_eq(a, b, ...)                                               \
+	expect_cmp(uint64_t, a, b, ==, !=, FMTu64, __VA_ARGS__)
+#define expect_u64_ne(a, b, ...)                                               \
+	expect_cmp(uint64_t, a, b, !=, ==, FMTu64, __VA_ARGS__)
+#define expect_u64_lt(a, b, ...)                                               \
+	expect_cmp(uint64_t, a, b, <, >=, FMTu64, __VA_ARGS__)
+#define expect_u64_le(a, b, ...)                                               \
+	expect_cmp(uint64_t, a, b, <=, >, FMTu64, __VA_ARGS__)
+#define expect_u64_ge(a, b, ...)                                               \
+	expect_cmp(uint64_t, a, b, >=, <, FMTu64, __VA_ARGS__)
+#define expect_u64_gt(a, b, ...)                                               \
+	expect_cmp(uint64_t, a, b, >, <=, FMTu64, __VA_ARGS__)
 
-#define verify_b_eq(may_abort, a, b, ...) do {				\
-	bool a_ = (a);							\
-	bool b_ = (b);							\
-	if (!(a_ == b_)) {						\
-		char prefix[ASSERT_BUFSIZE];				\
-		char message[ASSERT_BUFSIZE];				\
-		malloc_snprintf(prefix, sizeof(prefix),			\
-		    "%s:%s:%d: Failed assertion: "			\
-		    "(%s) == (%s) --> %s != %s: ",			\
-		    __func__, __FILE__, __LINE__,			\
-		    #a, #b, a_ ? "true" : "false",			\
-		    b_ ? "true" : "false");				\
-		malloc_snprintf(message, sizeof(message), __VA_ARGS__);	\
-		p_test_fail(may_abort, prefix, message);		\
-	}								\
-} while (0)
+#define verify_b_eq(may_abort, a, b, ...)                                      \
+	do {                                                                   \
+		bool a_ = (a);                                                 \
+		bool b_ = (b);                                                 \
+		if (!(a_ == b_)) {                                             \
+			char prefix[ASSERT_BUFSIZE];                           \
+			char message[ASSERT_BUFSIZE];                          \
+			malloc_snprintf(prefix, sizeof(prefix),                \
+			    "%s:%s:%d: Failed assertion: "                     \
+			    "(%s) == (%s) --> %s != %s: ",                     \
+			    __func__, __FILE__, __LINE__, #a, #b,              \
+			    a_ ? "true" : "false", b_ ? "true" : "false");     \
+			malloc_snprintf(                                       \
+			    message, sizeof(message), __VA_ARGS__);            \
+			p_test_fail(may_abort, prefix, message);               \
+		}                                                              \
+	} while (0)
 
-#define verify_b_ne(may_abort, a, b, ...) do {				\
-	bool a_ = (a);							\
-	bool b_ = (b);							\
-	if (!(a_ != b_)) {						\
-		char prefix[ASSERT_BUFSIZE];				\
-		char message[ASSERT_BUFSIZE];				\
-		malloc_snprintf(prefix, sizeof(prefix),			\
-		    "%s:%s:%d: Failed assertion: "			\
-		    "(%s) != (%s) --> %s == %s: ",			\
-		    __func__, __FILE__, __LINE__,			\
-		    #a, #b, a_ ? "true" : "false",			\
-		    b_ ? "true" : "false");				\
-		malloc_snprintf(message, sizeof(message), __VA_ARGS__);	\
-		p_test_fail(may_abort, prefix, message);		\
-	}								\
-} while (0)
+#define verify_b_ne(may_abort, a, b, ...)                                      \
+	do {                                                                   \
+		bool a_ = (a);                                                 \
+		bool b_ = (b);                                                 \
+		if (!(a_ != b_)) {                                             \
+			char prefix[ASSERT_BUFSIZE];                           \
+			char message[ASSERT_BUFSIZE];                          \
+			malloc_snprintf(prefix, sizeof(prefix),                \
+			    "%s:%s:%d: Failed assertion: "                     \
+			    "(%s) != (%s) --> %s == %s: ",                     \
+			    __func__, __FILE__, __LINE__, #a, #b,              \
+			    a_ ? "true" : "false", b_ ? "true" : "false");     \
+			malloc_snprintf(                                       \
+			    message, sizeof(message), __VA_ARGS__);            \
+			p_test_fail(may_abort, prefix, message);               \
+		}                                                              \
+	} while (0)
 
-#define expect_b_eq(a, b, ...)	verify_b_eq(false, a, b, __VA_ARGS__)
-#define expect_b_ne(a, b, ...)	verify_b_ne(false, a, b, __VA_ARGS__)
+#define expect_b_eq(a, b, ...) verify_b_eq(false, a, b, __VA_ARGS__)
+#define expect_b_ne(a, b, ...) verify_b_ne(false, a, b, __VA_ARGS__)
 
-#define expect_true(a, ...)	expect_b_eq(a, true, __VA_ARGS__)
-#define expect_false(a, ...)	expect_b_eq(a, false, __VA_ARGS__)
+#define expect_true(a, ...) expect_b_eq(a, true, __VA_ARGS__)
+#define expect_false(a, ...) expect_b_eq(a, false, __VA_ARGS__)
 
-#define verify_str_eq(may_abort, a, b, ...) do {			\
-	if (strcmp((a), (b)) != 0) {						\
-		char prefix[ASSERT_BUFSIZE];				\
-		char message[ASSERT_BUFSIZE];				\
-		malloc_snprintf(prefix, sizeof(prefix),			\
-		    "%s:%s:%d: Failed assertion: "			\
-		    "(%s) same as (%s) --> "				\
-		    "\"%s\" differs from \"%s\": ",			\
-		    __func__, __FILE__, __LINE__, #a, #b, a, b);	\
-		malloc_snprintf(message, sizeof(message), __VA_ARGS__);	\
-		p_test_fail(may_abort, prefix, message);		\
-	}								\
-} while (0)
+#define verify_str_eq(may_abort, a, b, ...)                                    \
+	do {                                                                   \
+		if (strcmp((a), (b)) != 0) {                                   \
+			char prefix[ASSERT_BUFSIZE];                           \
+			char message[ASSERT_BUFSIZE];                          \
+			malloc_snprintf(prefix, sizeof(prefix),                \
+			    "%s:%s:%d: Failed assertion: "                     \
+			    "(%s) same as (%s) --> "                           \
+			    "\"%s\" differs from \"%s\": ",                    \
+			    __func__, __FILE__, __LINE__, #a, #b, a, b);       \
+			malloc_snprintf(                                       \
+			    message, sizeof(message), __VA_ARGS__);            \
+			p_test_fail(may_abort, prefix, message);               \
+		}                                                              \
+	} while (0)
 
-#define verify_str_ne(may_abort, a, b, ...) do {			\
-	if (strcmp((a), (b)) == 0) {					\
-		char prefix[ASSERT_BUFSIZE];				\
-		char message[ASSERT_BUFSIZE];				\
-		malloc_snprintf(prefix, sizeof(prefix),			\
-		    "%s:%s:%d: Failed assertion: "			\
-		    "(%s) differs from (%s) --> "			\
-		    "\"%s\" same as \"%s\": ",				\
-		    __func__, __FILE__, __LINE__, #a, #b, a, b);	\
-		malloc_snprintf(message, sizeof(message), __VA_ARGS__);	\
-		p_test_fail(may_abort, prefix, message);		\
-	}								\
-} while (0)
+#define verify_str_ne(may_abort, a, b, ...)                                    \
+	do {                                                                   \
+		if (strcmp((a), (b)) == 0) {                                   \
+			char prefix[ASSERT_BUFSIZE];                           \
+			char message[ASSERT_BUFSIZE];                          \
+			malloc_snprintf(prefix, sizeof(prefix),                \
+			    "%s:%s:%d: Failed assertion: "                     \
+			    "(%s) differs from (%s) --> "                      \
+			    "\"%s\" same as \"%s\": ",                         \
+			    __func__, __FILE__, __LINE__, #a, #b, a, b);       \
+			malloc_snprintf(                                       \
+			    message, sizeof(message), __VA_ARGS__);            \
+			p_test_fail(may_abort, prefix, message);               \
+		}                                                              \
+	} while (0)
 
 #define expect_str_eq(a, b, ...) verify_str_eq(false, a, b, __VA_ARGS__)
 #define expect_str_ne(a, b, ...) verify_str_ne(false, a, b, __VA_ARGS__)
 
-#define verify_not_reached(may_abort, ...) do {				\
-	char prefix[ASSERT_BUFSIZE];					\
-	char message[ASSERT_BUFSIZE];					\
-	malloc_snprintf(prefix, sizeof(prefix),				\
-	    "%s:%s:%d: Unreachable code reached: ",			\
-	    __func__, __FILE__, __LINE__);				\
-	malloc_snprintf(message, sizeof(message), __VA_ARGS__);		\
-	p_test_fail(may_abort, prefix, message);			\
-} while (0)
+#define verify_not_reached(may_abort, ...)                                     \
+	do {                                                                   \
+		char prefix[ASSERT_BUFSIZE];                                   \
+		char message[ASSERT_BUFSIZE];                                  \
+		malloc_snprintf(prefix, sizeof(prefix),                        \
+		    "%s:%s:%d: Unreachable code reached: ", __func__,          \
+		    __FILE__, __LINE__);                                       \
+		malloc_snprintf(message, sizeof(message), __VA_ARGS__);        \
+		p_test_fail(may_abort, prefix, message);                       \
+	} while (0)
 
 #define expect_not_reached(...) verify_not_reached(false, __VA_ARGS__)
 
-#define assert_cmp(t, a, b, cmp, neg_cmp, pri, ...) verify_cmp(true,	\
-    t, a, b, cmp, neg_cmp, pri, __VA_ARGS__)
+#define assert_cmp(t, a, b, cmp, neg_cmp, pri, ...)                            \
+	verify_cmp(true, t, a, b, cmp, neg_cmp, pri, __VA_ARGS__)
 
-#define assert_ptr_eq(a, b, ...)	assert_cmp(void *, a, b, ==,	\
-    !=, "p", __VA_ARGS__)
-#define assert_ptr_ne(a, b, ...)	assert_cmp(void *, a, b, !=,	\
-    ==, "p", __VA_ARGS__)
-#define assert_ptr_null(a, ...)		assert_cmp(void *, a, NULL, ==,	\
-    !=, "p", __VA_ARGS__)
-#define assert_ptr_not_null(a, ...)	assert_cmp(void *, a, NULL, !=,	\
-    ==, "p", __VA_ARGS__)
+#define assert_ptr_eq(a, b, ...)                                               \
+	assert_cmp(void *, a, b, ==, !=, "p", __VA_ARGS__)
+#define assert_ptr_ne(a, b, ...)                                               \
+	assert_cmp(void *, a, b, !=, ==, "p", __VA_ARGS__)
+#define assert_ptr_null(a, ...)                                                \
+	assert_cmp(void *, a, NULL, ==, !=, "p", __VA_ARGS__)
+#define assert_ptr_not_null(a, ...)                                            \
+	assert_cmp(void *, a, NULL, !=, ==, "p", __VA_ARGS__)
 
-#define assert_c_eq(a, b, ...)	assert_cmp(char, a, b, ==, !=, "c", __VA_ARGS__)
-#define assert_c_ne(a, b, ...)	assert_cmp(char, a, b, !=, ==, "c", __VA_ARGS__)
-#define assert_c_lt(a, b, ...)	assert_cmp(char, a, b, <, >=, "c", __VA_ARGS__)
-#define assert_c_le(a, b, ...)	assert_cmp(char, a, b, <=, >, "c", __VA_ARGS__)
-#define assert_c_ge(a, b, ...)	assert_cmp(char, a, b, >=, <, "c", __VA_ARGS__)
-#define assert_c_gt(a, b, ...)	assert_cmp(char, a, b, >, <=, "c", __VA_ARGS__)
+#define assert_c_eq(a, b, ...) assert_cmp(char, a, b, ==, !=, "c", __VA_ARGS__)
+#define assert_c_ne(a, b, ...) assert_cmp(char, a, b, !=, ==, "c", __VA_ARGS__)
+#define assert_c_lt(a, b, ...) assert_cmp(char, a, b, <, >=, "c", __VA_ARGS__)
+#define assert_c_le(a, b, ...) assert_cmp(char, a, b, <=, >, "c", __VA_ARGS__)
+#define assert_c_ge(a, b, ...) assert_cmp(char, a, b, >=, <, "c", __VA_ARGS__)
+#define assert_c_gt(a, b, ...) assert_cmp(char, a, b, >, <=, "c", __VA_ARGS__)
 
-#define assert_x_eq(a, b, ...)	assert_cmp(int, a, b, ==, !=, "#x", __VA_ARGS__)
-#define assert_x_ne(a, b, ...)	assert_cmp(int, a, b, !=, ==, "#x", __VA_ARGS__)
-#define assert_x_lt(a, b, ...)	assert_cmp(int, a, b, <, >=, "#x", __VA_ARGS__)
-#define assert_x_le(a, b, ...)	assert_cmp(int, a, b, <=, >, "#x", __VA_ARGS__)
-#define assert_x_ge(a, b, ...)	assert_cmp(int, a, b, >=, <, "#x", __VA_ARGS__)
-#define assert_x_gt(a, b, ...)	assert_cmp(int, a, b, >, <=, "#x", __VA_ARGS__)
+#define assert_x_eq(a, b, ...) assert_cmp(int, a, b, ==, !=, "#x", __VA_ARGS__)
+#define assert_x_ne(a, b, ...) assert_cmp(int, a, b, !=, ==, "#x", __VA_ARGS__)
+#define assert_x_lt(a, b, ...) assert_cmp(int, a, b, <, >=, "#x", __VA_ARGS__)
+#define assert_x_le(a, b, ...) assert_cmp(int, a, b, <=, >, "#x", __VA_ARGS__)
+#define assert_x_ge(a, b, ...) assert_cmp(int, a, b, >=, <, "#x", __VA_ARGS__)
+#define assert_x_gt(a, b, ...) assert_cmp(int, a, b, >, <=, "#x", __VA_ARGS__)
 
-#define assert_d_eq(a, b, ...)	assert_cmp(int, a, b, ==, !=, "d", __VA_ARGS__)
-#define assert_d_ne(a, b, ...)	assert_cmp(int, a, b, !=, ==, "d", __VA_ARGS__)
-#define assert_d_lt(a, b, ...)	assert_cmp(int, a, b, <, >=, "d", __VA_ARGS__)
-#define assert_d_le(a, b, ...)	assert_cmp(int, a, b, <=, >, "d", __VA_ARGS__)
-#define assert_d_ge(a, b, ...)	assert_cmp(int, a, b, >=, <, "d", __VA_ARGS__)
-#define assert_d_gt(a, b, ...)	assert_cmp(int, a, b, >, <=, "d", __VA_ARGS__)
+#define assert_d_eq(a, b, ...) assert_cmp(int, a, b, ==, !=, "d", __VA_ARGS__)
+#define assert_d_ne(a, b, ...) assert_cmp(int, a, b, !=, ==, "d", __VA_ARGS__)
+#define assert_d_lt(a, b, ...) assert_cmp(int, a, b, <, >=, "d", __VA_ARGS__)
+#define assert_d_le(a, b, ...) assert_cmp(int, a, b, <=, >, "d", __VA_ARGS__)
+#define assert_d_ge(a, b, ...) assert_cmp(int, a, b, >=, <, "d", __VA_ARGS__)
+#define assert_d_gt(a, b, ...) assert_cmp(int, a, b, >, <=, "d", __VA_ARGS__)
 
-#define assert_u_eq(a, b, ...)	assert_cmp(int, a, b, ==, !=, "u", __VA_ARGS__)
-#define assert_u_ne(a, b, ...)	assert_cmp(int, a, b, !=, ==, "u", __VA_ARGS__)
-#define assert_u_lt(a, b, ...)	assert_cmp(int, a, b, <, >=, "u", __VA_ARGS__)
-#define assert_u_le(a, b, ...)	assert_cmp(int, a, b, <=, >, "u", __VA_ARGS__)
-#define assert_u_ge(a, b, ...)	assert_cmp(int, a, b, >=, <, "u", __VA_ARGS__)
-#define assert_u_gt(a, b, ...)	assert_cmp(int, a, b, >, <=, "u", __VA_ARGS__)
+#define assert_u_eq(a, b, ...) assert_cmp(int, a, b, ==, !=, "u", __VA_ARGS__)
+#define assert_u_ne(a, b, ...) assert_cmp(int, a, b, !=, ==, "u", __VA_ARGS__)
+#define assert_u_lt(a, b, ...) assert_cmp(int, a, b, <, >=, "u", __VA_ARGS__)
+#define assert_u_le(a, b, ...) assert_cmp(int, a, b, <=, >, "u", __VA_ARGS__)
+#define assert_u_ge(a, b, ...) assert_cmp(int, a, b, >=, <, "u", __VA_ARGS__)
+#define assert_u_gt(a, b, ...) assert_cmp(int, a, b, >, <=, "u", __VA_ARGS__)
 
-#define assert_ld_eq(a, b, ...)	assert_cmp(long, a, b, ==,	\
-    !=, "ld", __VA_ARGS__)
-#define assert_ld_ne(a, b, ...)	assert_cmp(long, a, b, !=,	\
-    ==, "ld", __VA_ARGS__)
-#define assert_ld_lt(a, b, ...)	assert_cmp(long, a, b, <,	\
-    >=, "ld", __VA_ARGS__)
-#define assert_ld_le(a, b, ...)	assert_cmp(long, a, b, <=,	\
-    >, "ld", __VA_ARGS__)
-#define assert_ld_ge(a, b, ...)	assert_cmp(long, a, b, >=,	\
-    <, "ld", __VA_ARGS__)
-#define assert_ld_gt(a, b, ...)	assert_cmp(long, a, b, >,	\
-    <=, "ld", __VA_ARGS__)
+#define assert_ld_eq(a, b, ...)                                                \
+	assert_cmp(long, a, b, ==, !=, "ld", __VA_ARGS__)
+#define assert_ld_ne(a, b, ...)                                                \
+	assert_cmp(long, a, b, !=, ==, "ld", __VA_ARGS__)
+#define assert_ld_lt(a, b, ...) assert_cmp(long, a, b, <, >=, "ld", __VA_ARGS__)
+#define assert_ld_le(a, b, ...) assert_cmp(long, a, b, <=, >, "ld", __VA_ARGS__)
+#define assert_ld_ge(a, b, ...) assert_cmp(long, a, b, >=, <, "ld", __VA_ARGS__)
+#define assert_ld_gt(a, b, ...) assert_cmp(long, a, b, >, <=, "ld", __VA_ARGS__)
 
-#define assert_lu_eq(a, b, ...)	assert_cmp(unsigned long,	\
-    a, b, ==, !=, "lu", __VA_ARGS__)
-#define assert_lu_ne(a, b, ...)	assert_cmp(unsigned long,	\
-    a, b, !=, ==, "lu", __VA_ARGS__)
-#define assert_lu_lt(a, b, ...)	assert_cmp(unsigned long,	\
-    a, b, <, >=, "lu", __VA_ARGS__)
-#define assert_lu_le(a, b, ...)	assert_cmp(unsigned long,	\
-    a, b, <=, >, "lu", __VA_ARGS__)
-#define assert_lu_ge(a, b, ...)	assert_cmp(unsigned long,	\
-    a, b, >=, <, "lu", __VA_ARGS__)
-#define assert_lu_gt(a, b, ...)	assert_cmp(unsigned long,	\
-    a, b, >, <=, "lu", __VA_ARGS__)
+#define assert_lu_eq(a, b, ...)                                                \
+	assert_cmp(unsigned long, a, b, ==, !=, "lu", __VA_ARGS__)
+#define assert_lu_ne(a, b, ...)                                                \
+	assert_cmp(unsigned long, a, b, !=, ==, "lu", __VA_ARGS__)
+#define assert_lu_lt(a, b, ...)                                                \
+	assert_cmp(unsigned long, a, b, <, >=, "lu", __VA_ARGS__)
+#define assert_lu_le(a, b, ...)                                                \
+	assert_cmp(unsigned long, a, b, <=, >, "lu", __VA_ARGS__)
+#define assert_lu_ge(a, b, ...)                                                \
+	assert_cmp(unsigned long, a, b, >=, <, "lu", __VA_ARGS__)
+#define assert_lu_gt(a, b, ...)                                                \
+	assert_cmp(unsigned long, a, b, >, <=, "lu", __VA_ARGS__)
 
-#define assert_qd_eq(a, b, ...)	assert_cmp(long long, a, b, ==,	\
-    !=, "qd", __VA_ARGS__)
-#define assert_qd_ne(a, b, ...)	assert_cmp(long long, a, b, !=,	\
-    ==, "qd", __VA_ARGS__)
-#define assert_qd_lt(a, b, ...)	assert_cmp(long long, a, b, <,	\
-    >=, "qd", __VA_ARGS__)
-#define assert_qd_le(a, b, ...)	assert_cmp(long long, a, b, <=,	\
-    >, "qd", __VA_ARGS__)
-#define assert_qd_ge(a, b, ...)	assert_cmp(long long, a, b, >=,	\
-    <, "qd", __VA_ARGS__)
-#define assert_qd_gt(a, b, ...)	assert_cmp(long long, a, b, >,	\
-    <=, "qd", __VA_ARGS__)
+#define assert_qd_eq(a, b, ...)                                                \
+	assert_cmp(long long, a, b, ==, !=, "qd", __VA_ARGS__)
+#define assert_qd_ne(a, b, ...)                                                \
+	assert_cmp(long long, a, b, !=, ==, "qd", __VA_ARGS__)
+#define assert_qd_lt(a, b, ...)                                                \
+	assert_cmp(long long, a, b, <, >=, "qd", __VA_ARGS__)
+#define assert_qd_le(a, b, ...)                                                \
+	assert_cmp(long long, a, b, <=, >, "qd", __VA_ARGS__)
+#define assert_qd_ge(a, b, ...)                                                \
+	assert_cmp(long long, a, b, >=, <, "qd", __VA_ARGS__)
+#define assert_qd_gt(a, b, ...)                                                \
+	assert_cmp(long long, a, b, >, <=, "qd", __VA_ARGS__)
 
-#define assert_qu_eq(a, b, ...)	assert_cmp(unsigned long long,	\
-    a, b, ==, !=, "qu", __VA_ARGS__)
-#define assert_qu_ne(a, b, ...)	assert_cmp(unsigned long long,	\
-    a, b, !=, ==, "qu", __VA_ARGS__)
-#define assert_qu_lt(a, b, ...)	assert_cmp(unsigned long long,	\
-    a, b, <, >=, "qu", __VA_ARGS__)
-#define assert_qu_le(a, b, ...)	assert_cmp(unsigned long long,	\
-    a, b, <=, >, "qu", __VA_ARGS__)
-#define assert_qu_ge(a, b, ...)	assert_cmp(unsigned long long,	\
-    a, b, >=, <, "qu", __VA_ARGS__)
-#define assert_qu_gt(a, b, ...)	assert_cmp(unsigned long long,	\
-    a, b, >, <=, "qu", __VA_ARGS__)
+#define assert_qu_eq(a, b, ...)                                                \
+	assert_cmp(unsigned long long, a, b, ==, !=, "qu", __VA_ARGS__)
+#define assert_qu_ne(a, b, ...)                                                \
+	assert_cmp(unsigned long long, a, b, !=, ==, "qu", __VA_ARGS__)
+#define assert_qu_lt(a, b, ...)                                                \
+	assert_cmp(unsigned long long, a, b, <, >=, "qu", __VA_ARGS__)
+#define assert_qu_le(a, b, ...)                                                \
+	assert_cmp(unsigned long long, a, b, <=, >, "qu", __VA_ARGS__)
+#define assert_qu_ge(a, b, ...)                                                \
+	assert_cmp(unsigned long long, a, b, >=, <, "qu", __VA_ARGS__)
+#define assert_qu_gt(a, b, ...)                                                \
+	assert_cmp(unsigned long long, a, b, >, <=, "qu", __VA_ARGS__)
 
-#define assert_jd_eq(a, b, ...)	assert_cmp(intmax_t, a, b, ==,	\
-    !=, "jd", __VA_ARGS__)
-#define assert_jd_ne(a, b, ...)	assert_cmp(intmax_t, a, b, !=,	\
-    ==, "jd", __VA_ARGS__)
-#define assert_jd_lt(a, b, ...)	assert_cmp(intmax_t, a, b, <,	\
-    >=, "jd", __VA_ARGS__)
-#define assert_jd_le(a, b, ...)	assert_cmp(intmax_t, a, b, <=,	\
-    >, "jd", __VA_ARGS__)
-#define assert_jd_ge(a, b, ...)	assert_cmp(intmax_t, a, b, >=,	\
-    <, "jd", __VA_ARGS__)
-#define assert_jd_gt(a, b, ...)	assert_cmp(intmax_t, a, b, >,	\
-    <=, "jd", __VA_ARGS__)
+#define assert_jd_eq(a, b, ...)                                                \
+	assert_cmp(intmax_t, a, b, ==, !=, "jd", __VA_ARGS__)
+#define assert_jd_ne(a, b, ...)                                                \
+	assert_cmp(intmax_t, a, b, !=, ==, "jd", __VA_ARGS__)
+#define assert_jd_lt(a, b, ...)                                                \
+	assert_cmp(intmax_t, a, b, <, >=, "jd", __VA_ARGS__)
+#define assert_jd_le(a, b, ...)                                                \
+	assert_cmp(intmax_t, a, b, <=, >, "jd", __VA_ARGS__)
+#define assert_jd_ge(a, b, ...)                                                \
+	assert_cmp(intmax_t, a, b, >=, <, "jd", __VA_ARGS__)
+#define assert_jd_gt(a, b, ...)                                                \
+	assert_cmp(intmax_t, a, b, >, <=, "jd", __VA_ARGS__)
 
-#define assert_ju_eq(a, b, ...)	assert_cmp(uintmax_t, a, b, ==,	\
-    !=, "ju", __VA_ARGS__)
-#define assert_ju_ne(a, b, ...)	assert_cmp(uintmax_t, a, b, !=,	\
-    ==, "ju", __VA_ARGS__)
-#define assert_ju_lt(a, b, ...)	assert_cmp(uintmax_t, a, b, <,	\
-    >=, "ju", __VA_ARGS__)
-#define assert_ju_le(a, b, ...)	assert_cmp(uintmax_t, a, b, <=,	\
-    >, "ju", __VA_ARGS__)
-#define assert_ju_ge(a, b, ...)	assert_cmp(uintmax_t, a, b, >=,	\
-    <, "ju", __VA_ARGS__)
-#define assert_ju_gt(a, b, ...)	assert_cmp(uintmax_t, a, b, >,	\
-    <=, "ju", __VA_ARGS__)
+#define assert_ju_eq(a, b, ...)                                                \
+	assert_cmp(uintmax_t, a, b, ==, !=, "ju", __VA_ARGS__)
+#define assert_ju_ne(a, b, ...)                                                \
+	assert_cmp(uintmax_t, a, b, !=, ==, "ju", __VA_ARGS__)
+#define assert_ju_lt(a, b, ...)                                                \
+	assert_cmp(uintmax_t, a, b, <, >=, "ju", __VA_ARGS__)
+#define assert_ju_le(a, b, ...)                                                \
+	assert_cmp(uintmax_t, a, b, <=, >, "ju", __VA_ARGS__)
+#define assert_ju_ge(a, b, ...)                                                \
+	assert_cmp(uintmax_t, a, b, >=, <, "ju", __VA_ARGS__)
+#define assert_ju_gt(a, b, ...)                                                \
+	assert_cmp(uintmax_t, a, b, >, <=, "ju", __VA_ARGS__)
 
-#define assert_zd_eq(a, b, ...)	assert_cmp(ssize_t, a, b, ==,	\
-    !=, "zd", __VA_ARGS__)
-#define assert_zd_ne(a, b, ...)	assert_cmp(ssize_t, a, b, !=,	\
-    ==, "zd", __VA_ARGS__)
-#define assert_zd_lt(a, b, ...)	assert_cmp(ssize_t, a, b, <,	\
-    >=, "zd", __VA_ARGS__)
-#define assert_zd_le(a, b, ...)	assert_cmp(ssize_t, a, b, <=,	\
-    >, "zd", __VA_ARGS__)
-#define assert_zd_ge(a, b, ...)	assert_cmp(ssize_t, a, b, >=,	\
-    <, "zd", __VA_ARGS__)
-#define assert_zd_gt(a, b, ...)	assert_cmp(ssize_t, a, b, >,	\
-    <=, "zd", __VA_ARGS__)
+#define assert_zd_eq(a, b, ...)                                                \
+	assert_cmp(ssize_t, a, b, ==, !=, "zd", __VA_ARGS__)
+#define assert_zd_ne(a, b, ...)                                                \
+	assert_cmp(ssize_t, a, b, !=, ==, "zd", __VA_ARGS__)
+#define assert_zd_lt(a, b, ...)                                                \
+	assert_cmp(ssize_t, a, b, <, >=, "zd", __VA_ARGS__)
+#define assert_zd_le(a, b, ...)                                                \
+	assert_cmp(ssize_t, a, b, <=, >, "zd", __VA_ARGS__)
+#define assert_zd_ge(a, b, ...)                                                \
+	assert_cmp(ssize_t, a, b, >=, <, "zd", __VA_ARGS__)
+#define assert_zd_gt(a, b, ...)                                                \
+	assert_cmp(ssize_t, a, b, >, <=, "zd", __VA_ARGS__)
 
-#define assert_zu_eq(a, b, ...)	assert_cmp(size_t, a, b, ==,	\
-    !=, "zu", __VA_ARGS__)
-#define assert_zu_ne(a, b, ...)	assert_cmp(size_t, a, b, !=,	\
-    ==, "zu", __VA_ARGS__)
-#define assert_zu_lt(a, b, ...)	assert_cmp(size_t, a, b, <,	\
-    >=, "zu", __VA_ARGS__)
-#define assert_zu_le(a, b, ...)	assert_cmp(size_t, a, b, <=,	\
-    >, "zu", __VA_ARGS__)
-#define assert_zu_ge(a, b, ...)	assert_cmp(size_t, a, b, >=,	\
-    <, "zu", __VA_ARGS__)
-#define assert_zu_gt(a, b, ...)	assert_cmp(size_t, a, b, >,	\
-    <=, "zu", __VA_ARGS__)
+#define assert_zu_eq(a, b, ...)                                                \
+	assert_cmp(size_t, a, b, ==, !=, "zu", __VA_ARGS__)
+#define assert_zu_ne(a, b, ...)                                                \
+	assert_cmp(size_t, a, b, !=, ==, "zu", __VA_ARGS__)
+#define assert_zu_lt(a, b, ...)                                                \
+	assert_cmp(size_t, a, b, <, >=, "zu", __VA_ARGS__)
+#define assert_zu_le(a, b, ...)                                                \
+	assert_cmp(size_t, a, b, <=, >, "zu", __VA_ARGS__)
+#define assert_zu_ge(a, b, ...)                                                \
+	assert_cmp(size_t, a, b, >=, <, "zu", __VA_ARGS__)
+#define assert_zu_gt(a, b, ...)                                                \
+	assert_cmp(size_t, a, b, >, <=, "zu", __VA_ARGS__)
 
-#define assert_d32_eq(a, b, ...)	assert_cmp(int32_t, a, b, ==,	\
-    !=, FMTd32, __VA_ARGS__)
-#define assert_d32_ne(a, b, ...)	assert_cmp(int32_t, a, b, !=,	\
-    ==, FMTd32, __VA_ARGS__)
-#define assert_d32_lt(a, b, ...)	assert_cmp(int32_t, a, b, <,	\
-    >=, FMTd32, __VA_ARGS__)
-#define assert_d32_le(a, b, ...)	assert_cmp(int32_t, a, b, <=,	\
-    >, FMTd32, __VA_ARGS__)
-#define assert_d32_ge(a, b, ...)	assert_cmp(int32_t, a, b, >=,	\
-    <, FMTd32, __VA_ARGS__)
-#define assert_d32_gt(a, b, ...)	assert_cmp(int32_t, a, b, >,	\
-    <=, FMTd32, __VA_ARGS__)
+#define assert_d32_eq(a, b, ...)                                               \
+	assert_cmp(int32_t, a, b, ==, !=, FMTd32, __VA_ARGS__)
+#define assert_d32_ne(a, b, ...)                                               \
+	assert_cmp(int32_t, a, b, !=, ==, FMTd32, __VA_ARGS__)
+#define assert_d32_lt(a, b, ...)                                               \
+	assert_cmp(int32_t, a, b, <, >=, FMTd32, __VA_ARGS__)
+#define assert_d32_le(a, b, ...)                                               \
+	assert_cmp(int32_t, a, b, <=, >, FMTd32, __VA_ARGS__)
+#define assert_d32_ge(a, b, ...)                                               \
+	assert_cmp(int32_t, a, b, >=, <, FMTd32, __VA_ARGS__)
+#define assert_d32_gt(a, b, ...)                                               \
+	assert_cmp(int32_t, a, b, >, <=, FMTd32, __VA_ARGS__)
 
-#define assert_u32_eq(a, b, ...)	assert_cmp(uint32_t, a, b, ==,	\
-    !=, FMTu32, __VA_ARGS__)
-#define assert_u32_ne(a, b, ...)	assert_cmp(uint32_t, a, b, !=,	\
-    ==, FMTu32, __VA_ARGS__)
-#define assert_u32_lt(a, b, ...)	assert_cmp(uint32_t, a, b, <,	\
-    >=, FMTu32, __VA_ARGS__)
-#define assert_u32_le(a, b, ...)	assert_cmp(uint32_t, a, b, <=,	\
-    >, FMTu32, __VA_ARGS__)
-#define assert_u32_ge(a, b, ...)	assert_cmp(uint32_t, a, b, >=,	\
-    <, FMTu32, __VA_ARGS__)
-#define assert_u32_gt(a, b, ...)	assert_cmp(uint32_t, a, b, >,	\
-    <=, FMTu32, __VA_ARGS__)
+#define assert_u32_eq(a, b, ...)                                               \
+	assert_cmp(uint32_t, a, b, ==, !=, FMTu32, __VA_ARGS__)
+#define assert_u32_ne(a, b, ...)                                               \
+	assert_cmp(uint32_t, a, b, !=, ==, FMTu32, __VA_ARGS__)
+#define assert_u32_lt(a, b, ...)                                               \
+	assert_cmp(uint32_t, a, b, <, >=, FMTu32, __VA_ARGS__)
+#define assert_u32_le(a, b, ...)                                               \
+	assert_cmp(uint32_t, a, b, <=, >, FMTu32, __VA_ARGS__)
+#define assert_u32_ge(a, b, ...)                                               \
+	assert_cmp(uint32_t, a, b, >=, <, FMTu32, __VA_ARGS__)
+#define assert_u32_gt(a, b, ...)                                               \
+	assert_cmp(uint32_t, a, b, >, <=, FMTu32, __VA_ARGS__)
 
-#define assert_d64_eq(a, b, ...)	assert_cmp(int64_t, a, b, ==,	\
-    !=, FMTd64, __VA_ARGS__)
-#define assert_d64_ne(a, b, ...)	assert_cmp(int64_t, a, b, !=,	\
-    ==, FMTd64, __VA_ARGS__)
-#define assert_d64_lt(a, b, ...)	assert_cmp(int64_t, a, b, <,	\
-    >=, FMTd64, __VA_ARGS__)
-#define assert_d64_le(a, b, ...)	assert_cmp(int64_t, a, b, <=,	\
-    >, FMTd64, __VA_ARGS__)
-#define assert_d64_ge(a, b, ...)	assert_cmp(int64_t, a, b, >=,	\
-    <, FMTd64, __VA_ARGS__)
-#define assert_d64_gt(a, b, ...)	assert_cmp(int64_t, a, b, >,	\
-    <=, FMTd64, __VA_ARGS__)
+#define assert_d64_eq(a, b, ...)                                               \
+	assert_cmp(int64_t, a, b, ==, !=, FMTd64, __VA_ARGS__)
+#define assert_d64_ne(a, b, ...)                                               \
+	assert_cmp(int64_t, a, b, !=, ==, FMTd64, __VA_ARGS__)
+#define assert_d64_lt(a, b, ...)                                               \
+	assert_cmp(int64_t, a, b, <, >=, FMTd64, __VA_ARGS__)
+#define assert_d64_le(a, b, ...)                                               \
+	assert_cmp(int64_t, a, b, <=, >, FMTd64, __VA_ARGS__)
+#define assert_d64_ge(a, b, ...)                                               \
+	assert_cmp(int64_t, a, b, >=, <, FMTd64, __VA_ARGS__)
+#define assert_d64_gt(a, b, ...)                                               \
+	assert_cmp(int64_t, a, b, >, <=, FMTd64, __VA_ARGS__)
 
-#define assert_u64_eq(a, b, ...)	assert_cmp(uint64_t, a, b, ==,	\
-    !=, FMTu64, __VA_ARGS__)
-#define assert_u64_ne(a, b, ...)	assert_cmp(uint64_t, a, b, !=,	\
-    ==, FMTu64, __VA_ARGS__)
-#define assert_u64_lt(a, b, ...)	assert_cmp(uint64_t, a, b, <,	\
-    >=, FMTu64, __VA_ARGS__)
-#define assert_u64_le(a, b, ...)	assert_cmp(uint64_t, a, b, <=,	\
-    >, FMTu64, __VA_ARGS__)
-#define assert_u64_ge(a, b, ...)	assert_cmp(uint64_t, a, b, >=,	\
-    <, FMTu64, __VA_ARGS__)
-#define assert_u64_gt(a, b, ...)	assert_cmp(uint64_t, a, b, >,	\
-    <=, FMTu64, __VA_ARGS__)
+#define assert_u64_eq(a, b, ...)                                               \
+	assert_cmp(uint64_t, a, b, ==, !=, FMTu64, __VA_ARGS__)
+#define assert_u64_ne(a, b, ...)                                               \
+	assert_cmp(uint64_t, a, b, !=, ==, FMTu64, __VA_ARGS__)
+#define assert_u64_lt(a, b, ...)                                               \
+	assert_cmp(uint64_t, a, b, <, >=, FMTu64, __VA_ARGS__)
+#define assert_u64_le(a, b, ...)                                               \
+	assert_cmp(uint64_t, a, b, <=, >, FMTu64, __VA_ARGS__)
+#define assert_u64_ge(a, b, ...)                                               \
+	assert_cmp(uint64_t, a, b, >=, <, FMTu64, __VA_ARGS__)
+#define assert_u64_gt(a, b, ...)                                               \
+	assert_cmp(uint64_t, a, b, >, <=, FMTu64, __VA_ARGS__)
 
-#define assert_b_eq(a, b, ...)	verify_b_eq(true, a, b, __VA_ARGS__)
-#define assert_b_ne(a, b, ...)	verify_b_ne(true, a, b, __VA_ARGS__)
+#define assert_b_eq(a, b, ...) verify_b_eq(true, a, b, __VA_ARGS__)
+#define assert_b_ne(a, b, ...) verify_b_ne(true, a, b, __VA_ARGS__)
 
-#define assert_true(a, ...)	assert_b_eq(a, true, __VA_ARGS__)
-#define assert_false(a, ...)	assert_b_eq(a, false, __VA_ARGS__)
+#define assert_true(a, ...) assert_b_eq(a, true, __VA_ARGS__)
+#define assert_false(a, ...) assert_b_eq(a, false, __VA_ARGS__)
 
 #define assert_str_eq(a, b, ...) verify_str_eq(true, a, b, __VA_ARGS__)
 #define assert_str_ne(a, b, ...) verify_str_ne(true, a, b, __VA_ARGS__)
@@ -515,45 +516,42 @@ typedef enum {
 	test_status_count = 3
 } test_status_t;
 
-typedef void (test_t)(void);
+typedef void(test_t)(void);
 
-#define TEST_BEGIN(f)							\
-static void								\
-f(void) {								\
-	p_test_init(#f);
+#define TEST_BEGIN(f)                                                          \
+	static void f(void) {                                                  \
+		p_test_init(#f);
 
-#define TEST_END							\
-	goto label_test_end;						\
-label_test_end:								\
-	p_test_fini();							\
-}
+#define TEST_END                                                               \
+	goto label_test_end;                                                   \
+	label_test_end:                                                        \
+	p_test_fini();                                                         \
+	}
 
-#define test(...)							\
-	p_test(__VA_ARGS__, NULL)
+#define test(...) p_test(__VA_ARGS__, NULL)
 
-#define test_no_reentrancy(...)							\
-	p_test_no_reentrancy(__VA_ARGS__, NULL)
+#define test_no_reentrancy(...) p_test_no_reentrancy(__VA_ARGS__, NULL)
 
-#define test_no_malloc_init(...)					\
-	p_test_no_malloc_init(__VA_ARGS__, NULL)
+#define test_no_malloc_init(...) p_test_no_malloc_init(__VA_ARGS__, NULL)
 
-#define test_skip_if(e) do {						\
-	if (e) {							\
-		test_skip("%s:%s:%d: Test skipped: (%s)",		\
-		    __func__, __FILE__, __LINE__, #e);			\
-		goto label_test_end;					\
-	}								\
-} while (0)
+#define test_skip_if(e)                                                        \
+	do {                                                                   \
+		if (e) {                                                       \
+			test_skip("%s:%s:%d: Test skipped: (%s)", __func__,    \
+			    __FILE__, __LINE__, #e);                           \
+			goto label_test_end;                                   \
+		}                                                              \
+	} while (0)
 
 bool test_is_reentrant(void);
 
-void	test_skip(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2);
-void	test_fail(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2);
+void test_skip(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2);
+void test_fail(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2);
 
 /* For private use by macros. */
-test_status_t	p_test(test_t *t, ...);
-test_status_t	p_test_no_reentrancy(test_t *t, ...);
-test_status_t	p_test_no_malloc_init(test_t *t, ...);
-void	p_test_init(const char *name);
-void	p_test_fini(void);
-void	p_test_fail(bool may_abort, const char *prefix, const char *message);
+test_status_t p_test(test_t *t, ...);
+test_status_t p_test_no_reentrancy(test_t *t, ...);
+test_status_t p_test_no_malloc_init(test_t *t, ...);
+void          p_test_init(const char *name);
+void          p_test_fini(void);
+void p_test_fail(bool may_abort, const char *prefix, const char *message);
diff --git a/test/include/test/timer.h b/test/include/test/timer.h
index ace6191b..c1d59eb4 100644
--- a/test/include/test/timer.h
+++ b/test/include/test/timer.h
@@ -5,7 +5,7 @@ typedef struct {
 	nstime_t t1;
 } timedelta_t;
 
-void	timer_start(timedelta_t *timer);
-void	timer_stop(timedelta_t *timer);
-uint64_t	timer_usec(const timedelta_t *timer);
-void	timer_ratio(timedelta_t *a, timedelta_t *b, char *buf, size_t buflen);
+void     timer_start(timedelta_t *timer);
+void     timer_stop(timedelta_t *timer);
+uint64_t timer_usec(const timedelta_t *timer);
+void     timer_ratio(timedelta_t *a, timedelta_t *b, char *buf, size_t buflen);
diff --git a/test/integration/MALLOCX_ARENA.c b/test/integration/MALLOCX_ARENA.c
index 440ad9ef..c81566a8 100644
--- a/test/integration/MALLOCX_ARENA.c
+++ b/test/integration/MALLOCX_ARENA.c
@@ -6,27 +6,27 @@ void *
 thd_start(void *arg) {
 	unsigned thread_ind = (unsigned)(uintptr_t)arg;
 	unsigned arena_ind;
-	void *p;
-	size_t sz;
+	void    *p;
+	size_t   sz;
 
 	sz = sizeof(arena_ind);
 	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
 	    0, "Error in arenas.create");
 
 	if (thread_ind % 4 != 3) {
-		size_t mib[3];
-		size_t miblen = sizeof(mib) / sizeof(size_t);
+		size_t      mib[3];
+		size_t      miblen = sizeof(mib) / sizeof(size_t);
 		const char *dss_precs[] = {"disabled", "primary", "secondary"};
-		unsigned prec_ind = thread_ind %
-		    (sizeof(dss_precs)/sizeof(char*));
+		unsigned    prec_ind = thread_ind
+		    % (sizeof(dss_precs) / sizeof(char *));
 		const char *dss = dss_precs[prec_ind];
 		int expected_err = (have_dss || prec_ind == 0) ? 0 : EFAULT;
 		expect_d_eq(mallctlnametomib("arena.0.dss", mib, &miblen), 0,
 		    "Error in mallctlnametomib()");
 		mib[1] = arena_ind;
 		expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, (void *)&dss,
-		    sizeof(const char *)), expected_err,
-		    "Error in mallctlbymib()");
+		                sizeof(const char *)),
+		    expected_err, "Error in mallctlbymib()");
 	}
 
 	p = mallocx(1, MALLOCX_ARENA(arena_ind));
@@ -37,12 +37,11 @@ thd_start(void *arg) {
 }
 
 TEST_BEGIN(test_MALLOCX_ARENA) {
-	thd_t thds[NTHREADS];
+	thd_t    thds[NTHREADS];
 	unsigned i;
 
 	for (i = 0; i < NTHREADS; i++) {
-		thd_create(&thds[i], thd_start,
-		    (void *)(uintptr_t)i);
+		thd_create(&thds[i], thd_start, (void *)(uintptr_t)i);
 	}
 
 	for (i = 0; i < NTHREADS; i++) {
@@ -53,6 +52,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_MALLOCX_ARENA);
+	return test(test_MALLOCX_ARENA);
 }
diff --git a/test/integration/aligned_alloc.c b/test/integration/aligned_alloc.c
index b37d5ba0..1cf2a2f1 100644
--- a/test/integration/aligned_alloc.c
+++ b/test/integration/aligned_alloc.c
@@ -15,7 +15,7 @@ purge(void) {
 
 TEST_BEGIN(test_alignment_errors) {
 	size_t alignment;
-	void *p;
+	void  *p;
 
 	alignment = 0;
 	set_errno(0);
@@ -24,17 +24,15 @@ TEST_BEGIN(test_alignment_errors) {
 	    "Expected error for invalid alignment %zu", alignment);
 
 	for (alignment = sizeof(size_t); alignment < MAXALIGN;
-	    alignment <<= 1) {
+	     alignment <<= 1) {
 		set_errno(0);
 		p = aligned_alloc(alignment + 1, 1);
 		expect_false(p != NULL || get_errno() != EINVAL,
-		    "Expected error for invalid alignment %zu",
-		    alignment + 1);
+		    "Expected error for invalid alignment %zu", alignment + 1);
 	}
 }
 TEST_END
 
-
 /*
  * GCC "-Walloc-size-larger-than" warning detects when one of the memory
  * allocation functions is called with a size larger than the maximum size that
@@ -47,33 +45,31 @@ JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
 
 TEST_BEGIN(test_oom_errors) {
 	size_t alignment, size;
-	void *p;
+	void  *p;
 
 #if LG_SIZEOF_PTR == 3
 	alignment = UINT64_C(0x8000000000000000);
-	size      = UINT64_C(0x8000000000000000);
+	size = UINT64_C(0x8000000000000000);
 #else
 	alignment = 0x80000000LU;
-	size      = 0x80000000LU;
+	size = 0x80000000LU;
 #endif
 	set_errno(0);
 	p = aligned_alloc(alignment, size);
 	expect_false(p != NULL || get_errno() != ENOMEM,
-	    "Expected error for aligned_alloc(%zu, %zu)",
-	    alignment, size);
+	    "Expected error for aligned_alloc(%zu, %zu)", alignment, size);
 
 #if LG_SIZEOF_PTR == 3
 	alignment = UINT64_C(0x4000000000000000);
-	size      = UINT64_C(0xc000000000000001);
+	size = UINT64_C(0xc000000000000001);
 #else
 	alignment = 0x40000000LU;
-	size      = 0xc0000001LU;
+	size = 0xc0000001LU;
 #endif
 	set_errno(0);
 	p = aligned_alloc(alignment, size);
 	expect_false(p != NULL || get_errno() != ENOMEM,
-	    "Expected error for aligned_alloc(%zu, %zu)",
-	    alignment, size);
+	    "Expected error for aligned_alloc(%zu, %zu)", alignment, size);
 
 	alignment = 0x10LU;
 #if LG_SIZEOF_PTR == 3
@@ -84,8 +80,7 @@ TEST_BEGIN(test_oom_errors) {
 	set_errno(0);
 	p = aligned_alloc(alignment, size);
 	expect_false(p != NULL || get_errno() != ENOMEM,
-	    "Expected error for aligned_alloc(&p, %zu, %zu)",
-	    alignment, size);
+	    "Expected error for aligned_alloc(&p, %zu, %zu)", alignment, size);
 }
 TEST_END
 
@@ -94,21 +89,18 @@ JEMALLOC_DIAGNOSTIC_POP
 
 TEST_BEGIN(test_alignment_and_size) {
 #define NITER 4
-	size_t alignment, size, total;
+	size_t   alignment, size, total;
 	unsigned i;
-	void *ps[NITER];
+	void    *ps[NITER];
 
 	for (i = 0; i < NITER; i++) {
 		ps[i] = NULL;
 	}
 
-	for (alignment = 8;
-	    alignment <= MAXALIGN;
-	    alignment <<= 1) {
+	for (alignment = 8; alignment <= MAXALIGN; alignment <<= 1) {
 		total = 0;
-		for (size = 1;
-		    size < 3 * alignment && size < (1U << 31);
-		    size += (alignment >> (LG_SIZEOF_PTR-1)) - 1) {
+		for (size = 1; size < 3 * alignment && size < (1U << 31);
+		     size += (alignment >> (LG_SIZEOF_PTR - 1)) - 1) {
 			for (i = 0; i < NITER; i++) {
 				ps[i] = aligned_alloc(alignment, size);
 				if (ps[i] == NULL) {
@@ -149,9 +141,6 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_alignment_errors,
-	    test_oom_errors,
-	    test_alignment_and_size,
-	    test_zero_alloc);
+	return test(test_alignment_errors, test_oom_errors,
+	    test_alignment_and_size, test_zero_alloc);
 }
diff --git a/test/integration/allocated.c b/test/integration/allocated.c
index 967e0108..2c46d916 100644
--- a/test/integration/allocated.c
+++ b/test/integration/allocated.c
@@ -2,27 +2,27 @@
 
 void *
 thd_start(void *arg) {
-	int err;
-	void *p;
-	uint64_t a0, a1, d0, d1;
+	int       err;
+	void     *p;
+	uint64_t  a0, a1, d0, d1;
 	uint64_t *ap0, *ap1, *dp0, *dp1;
-	size_t sz, usize;
+	size_t    sz, usize;
 
 	sz = sizeof(a0);
 	if ((err = mallctl("thread.allocated", (void *)&a0, &sz, NULL, 0))) {
 		if (err == ENOENT) {
 			goto label_ENOENT;
 		}
-		test_fail("%s(): Error in mallctl(): %s", __func__,
-		    strerror(err));
+		test_fail(
+		    "%s(): Error in mallctl(): %s", __func__, strerror(err));
 	}
 	sz = sizeof(ap0);
 	if ((err = mallctl("thread.allocatedp", (void *)&ap0, &sz, NULL, 0))) {
 		if (err == ENOENT) {
 			goto label_ENOENT;
 		}
-		test_fail("%s(): Error in mallctl(): %s", __func__,
-		    strerror(err));
+		test_fail(
+		    "%s(): Error in mallctl(): %s", __func__, strerror(err));
 	}
 	expect_u64_eq(*ap0, a0,
 	    "\"thread.allocatedp\" should provide a pointer to internal "
@@ -33,17 +33,17 @@ thd_start(void *arg) {
 		if (err == ENOENT) {
 			goto label_ENOENT;
 		}
-		test_fail("%s(): Error in mallctl(): %s", __func__,
-		    strerror(err));
+		test_fail(
+		    "%s(): Error in mallctl(): %s", __func__, strerror(err));
 	}
 	sz = sizeof(dp0);
-	if ((err = mallctl("thread.deallocatedp", (void *)&dp0, &sz, NULL,
-	    0))) {
+	if ((err = mallctl(
+	         "thread.deallocatedp", (void *)&dp0, &sz, NULL, 0))) {
 		if (err == ENOENT) {
 			goto label_ENOENT;
 		}
-		test_fail("%s(): Error in mallctl(): %s", __func__,
-		    strerror(err));
+		test_fail(
+		    "%s(): Error in mallctl(): %s", __func__, strerror(err));
 	}
 	expect_u64_eq(*dp0, d0,
 	    "\"thread.deallocatedp\" should provide a pointer to internal "
@@ -107,10 +107,6 @@ TEST_END
 int
 main(void) {
 	/* Run tests multiple times to check for bad interactions. */
-	return test(
-	    test_main_thread,
-	    test_subthread,
-	    test_main_thread,
-	    test_subthread,
-	    test_main_thread);
+	return test(test_main_thread, test_subthread, test_main_thread,
+	    test_subthread, test_main_thread);
 }
diff --git a/test/integration/cpp/basic.cpp b/test/integration/cpp/basic.cpp
index c1cf6cd8..e0341176 100644
--- a/test/integration/cpp/basic.cpp
+++ b/test/integration/cpp/basic.cpp
@@ -19,6 +19,5 @@ TEST_END
 
 int
 main() {
-	return test(
-	    test_basic);
+	return test(test_basic);
 }
diff --git a/test/integration/cpp/infallible_new_false.cpp b/test/integration/cpp/infallible_new_false.cpp
index 42196d6a..5ba4f49e 100644
--- a/test/integration/cpp/infallible_new_false.cpp
+++ b/test/integration/cpp/infallible_new_false.cpp
@@ -17,7 +17,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_failing_alloc);
+	return test(test_failing_alloc);
 }
-
diff --git a/test/integration/cpp/infallible_new_true.cpp b/test/integration/cpp/infallible_new_true.cpp
index 3b2862bd..300bdd85 100644
--- a/test/integration/cpp/infallible_new_true.cpp
+++ b/test/integration/cpp/infallible_new_true.cpp
@@ -8,7 +8,8 @@
  */
 typedef void (*abort_hook_t)(const char *message);
 bool fake_abort_called;
-void fake_abort(const char *message) {
+void
+fake_abort(const char *message) {
 	const char *expected_start = "<jemalloc>: Allocation of size";
 	if (strncmp(message, expected_start, strlen(expected_start)) != 0) {
 		abort();
@@ -19,7 +20,7 @@ void fake_abort(const char *message) {
 static bool
 own_operator_new(void) {
 	uint64_t before, after;
-	size_t sz = sizeof(before);
+	size_t   sz = sizeof(before);
 
 	/* thread.allocated is always available, even w/o config_stats. */
 	expect_d_eq(mallctl("thread.allocated", (void *)&before, &sz, NULL, 0),
@@ -35,8 +36,8 @@ own_operator_new(void) {
 TEST_BEGIN(test_failing_alloc) {
 	abort_hook_t abort_hook = &fake_abort;
 	expect_d_eq(mallctl("experimental.hooks.safety_check_abort", NULL, NULL,
-	    (void *)&abort_hook, sizeof(abort_hook)), 0,
-	    "Unexpected mallctl failure setting abort hook");
+	                (void *)&abort_hook, sizeof(abort_hook)),
+	    0, "Unexpected mallctl failure setting abort hook");
 
 	/*
 	 * Not owning operator new is only expected to happen on MinGW which
@@ -61,6 +62,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_failing_alloc);
+	return test(test_failing_alloc);
 }
diff --git a/test/integration/extent.c b/test/integration/extent.c
index 7a028f18..c15bf761 100644
--- a/test/integration/extent.c
+++ b/test/integration/extent.c
@@ -6,26 +6,29 @@
 
 static void
 test_extent_body(unsigned arena_ind) {
-	void *p;
+	void  *p;
 	size_t large0, large1, large2, sz;
 	size_t purge_mib[3];
 	size_t purge_miblen;
-	int flags;
-	bool xallocx_success_a, xallocx_success_b, xallocx_success_c;
+	int    flags;
+	bool   xallocx_success_a, xallocx_success_b, xallocx_success_c;
 
 	flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
 
 	/* Get large size classes. */
 	sz = sizeof(size_t);
-	expect_d_eq(mallctl("arenas.lextent.0.size", (void *)&large0, &sz, NULL,
-	    0), 0, "Unexpected arenas.lextent.0.size failure");
-	expect_d_eq(mallctl("arenas.lextent.1.size", (void *)&large1, &sz, NULL,
-	    0), 0, "Unexpected arenas.lextent.1.size failure");
-	expect_d_eq(mallctl("arenas.lextent.2.size", (void *)&large2, &sz, NULL,
-	    0), 0, "Unexpected arenas.lextent.2.size failure");
+	expect_d_eq(
+	    mallctl("arenas.lextent.0.size", (void *)&large0, &sz, NULL, 0), 0,
+	    "Unexpected arenas.lextent.0.size failure");
+	expect_d_eq(
+	    mallctl("arenas.lextent.1.size", (void *)&large1, &sz, NULL, 0), 0,
+	    "Unexpected arenas.lextent.1.size failure");
+	expect_d_eq(
+	    mallctl("arenas.lextent.2.size", (void *)&large2, &sz, NULL, 0), 0,
+	    "Unexpected arenas.lextent.2.size failure");
 
 	/* Test dalloc/decommit/purge cascade. */
-	purge_miblen = sizeof(purge_mib)/sizeof(size_t);
+	purge_miblen = sizeof(purge_mib) / sizeof(size_t);
 	expect_d_eq(mallctlnametomib("arena.0.purge", purge_mib, &purge_miblen),
 	    0, "Unexpected mallctlnametomib() failure");
 	purge_mib[1] = (size_t)arena_ind;
@@ -47,8 +50,8 @@ test_extent_body(unsigned arena_ind) {
 	if (xallocx_success_a) {
 		expect_true(called_dalloc, "Expected dalloc call");
 		expect_true(called_decommit, "Expected decommit call");
-		expect_true(did_purge_lazy || did_purge_forced,
-		    "Expected purge");
+		expect_true(
+		    did_purge_lazy || did_purge_forced, "Expected purge");
 		expect_true(called_split, "Expected split call");
 	}
 	dallocx(p, flags);
@@ -72,8 +75,8 @@ test_extent_body(unsigned arena_ind) {
 	}
 	xallocx_success_c = (xallocx(p, large0 * 2, 0, flags) == large0 * 2);
 	if (did_split) {
-		expect_b_eq(did_decommit, did_commit,
-		    "Expected decommit/commit match");
+		expect_b_eq(
+		    did_decommit, did_commit, "Expected decommit/commit match");
 	}
 	if (xallocx_success_b && xallocx_success_c) {
 		expect_true(did_merge, "Expected merge");
@@ -90,33 +93,34 @@ test_extent_body(unsigned arena_ind) {
 
 static void
 test_manual_hook_auto_arena(void) {
-	unsigned narenas;
-	size_t old_size, new_size, sz;
-	size_t hooks_mib[3];
-	size_t hooks_miblen;
+	unsigned        narenas;
+	size_t          old_size, new_size, sz;
+	size_t          hooks_mib[3];
+	size_t          hooks_miblen;
 	extent_hooks_t *new_hooks, *old_hooks;
 
 	extent_hooks_prep();
 
 	sz = sizeof(unsigned);
 	/* Get number of auto arenas. */
-	expect_d_eq(mallctl("opt.narenas", (void *)&narenas, &sz, NULL, 0),
-	    0, "Unexpected mallctl() failure");
+	expect_d_eq(mallctl("opt.narenas", (void *)&narenas, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
 	if (narenas == 1) {
 		return;
 	}
 
 	/* Install custom extent hooks on arena 1 (might not be initialized). */
-	hooks_miblen = sizeof(hooks_mib)/sizeof(size_t);
-	expect_d_eq(mallctlnametomib("arena.0.extent_hooks", hooks_mib,
-	    &hooks_miblen), 0, "Unexpected mallctlnametomib() failure");
+	hooks_miblen = sizeof(hooks_mib) / sizeof(size_t);
+	expect_d_eq(
+	    mallctlnametomib("arena.0.extent_hooks", hooks_mib, &hooks_miblen),
+	    0, "Unexpected mallctlnametomib() failure");
 	hooks_mib[1] = 1;
 	old_size = sizeof(extent_hooks_t *);
 	new_hooks = &hooks;
 	new_size = sizeof(extent_hooks_t *);
 	expect_d_eq(mallctlbymib(hooks_mib, hooks_miblen, (void *)&old_hooks,
-	    &old_size, (void *)&new_hooks, new_size), 0,
-	    "Unexpected extent_hooks error");
+	                &old_size, (void *)&new_hooks, new_size),
+	    0, "Unexpected extent_hooks error");
 	static bool auto_arena_created = false;
 	if (old_hooks != &hooks) {
 		expect_b_eq(auto_arena_created, false,
@@ -127,10 +131,10 @@ test_manual_hook_auto_arena(void) {
 
 static void
 test_manual_hook_body(void) {
-	unsigned arena_ind;
-	size_t old_size, new_size, sz;
-	size_t hooks_mib[3];
-	size_t hooks_miblen;
+	unsigned        arena_ind;
+	size_t          old_size, new_size, sz;
+	size_t          hooks_mib[3];
+	size_t          hooks_miblen;
 	extent_hooks_t *new_hooks, *old_hooks;
 
 	extent_hooks_prep();
@@ -140,16 +144,17 @@ test_manual_hook_body(void) {
 	    0, "Unexpected mallctl() failure");
 
 	/* Install custom extent hooks. */
-	hooks_miblen = sizeof(hooks_mib)/sizeof(size_t);
-	expect_d_eq(mallctlnametomib("arena.0.extent_hooks", hooks_mib,
-	    &hooks_miblen), 0, "Unexpected mallctlnametomib() failure");
+	hooks_miblen = sizeof(hooks_mib) / sizeof(size_t);
+	expect_d_eq(
+	    mallctlnametomib("arena.0.extent_hooks", hooks_mib, &hooks_miblen),
+	    0, "Unexpected mallctlnametomib() failure");
 	hooks_mib[1] = (size_t)arena_ind;
 	old_size = sizeof(extent_hooks_t *);
 	new_hooks = &hooks;
 	new_size = sizeof(extent_hooks_t *);
 	expect_d_eq(mallctlbymib(hooks_mib, hooks_miblen, (void *)&old_hooks,
-	    &old_size, (void *)&new_hooks, new_size), 0,
-	    "Unexpected extent_hooks error");
+	                &old_size, (void *)&new_hooks, new_size),
+	    0, "Unexpected extent_hooks error");
 	expect_ptr_ne(old_hooks->alloc, extent_alloc_hook,
 	    "Unexpected extent_hooks error");
 	expect_ptr_ne(old_hooks->dalloc, extent_dalloc_hook,
@@ -173,10 +178,13 @@ test_manual_hook_body(void) {
 
 	/* Restore extent hooks. */
 	expect_d_eq(mallctlbymib(hooks_mib, hooks_miblen, NULL, NULL,
-	    (void *)&old_hooks, new_size), 0, "Unexpected extent_hooks error");
+	                (void *)&old_hooks, new_size),
+	    0, "Unexpected extent_hooks error");
 	expect_d_eq(mallctlbymib(hooks_mib, hooks_miblen, (void *)&old_hooks,
-	    &old_size, NULL, 0), 0, "Unexpected extent_hooks error");
-	expect_ptr_eq(old_hooks, default_hooks, "Unexpected extent_hooks error");
+	                &old_size, NULL, 0),
+	    0, "Unexpected extent_hooks error");
+	expect_ptr_eq(
+	    old_hooks, default_hooks, "Unexpected extent_hooks error");
 	expect_ptr_eq(old_hooks->alloc, default_hooks->alloc,
 	    "Unexpected extent_hooks error");
 	expect_ptr_eq(old_hooks->dalloc, default_hooks->dalloc,
@@ -213,8 +221,8 @@ TEST_BEGIN(test_extent_manual_hook) {
 TEST_END
 
 TEST_BEGIN(test_extent_auto_hook) {
-	unsigned arena_ind;
-	size_t new_size, sz;
+	unsigned        arena_ind;
+	size_t          new_size, sz;
 	extent_hooks_t *new_hooks;
 
 	extent_hooks_prep();
@@ -223,7 +231,8 @@ TEST_BEGIN(test_extent_auto_hook) {
 	new_hooks = &hooks;
 	new_size = sizeof(extent_hooks_t *);
 	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz,
-	    (void *)&new_hooks, new_size), 0, "Unexpected mallctl() failure");
+	                (void *)&new_hooks, new_size),
+	    0, "Unexpected mallctl() failure");
 
 	test_skip_if(is_background_thread_enabled());
 	test_extent_body(arena_ind);
@@ -231,19 +240,18 @@ TEST_BEGIN(test_extent_auto_hook) {
 TEST_END
 
 static void
-test_arenas_create_ext_base(arena_config_t config,
-	bool expect_hook_data, bool expect_hook_metadata)
-{
+test_arenas_create_ext_base(
+    arena_config_t config, bool expect_hook_data, bool expect_hook_metadata) {
 	unsigned arena, arena1;
-	void *ptr;
-	size_t sz = sizeof(unsigned);
+	void    *ptr;
+	size_t   sz = sizeof(unsigned);
 
 	extent_hooks_prep();
 
 	called_alloc = false;
-	expect_d_eq(mallctl("experimental.arenas_create_ext",
-	    (void *)&arena, &sz, &config, sizeof(arena_config_t)), 0,
-	    "Unexpected mallctl() failure");
+	expect_d_eq(mallctl("experimental.arenas_create_ext", (void *)&arena,
+	                &sz, &config, sizeof(arena_config_t)),
+	    0, "Unexpected mallctl() failure");
 	expect_b_eq(called_alloc, expect_hook_metadata,
 	    "expected hook metadata alloc mismatch");
 
@@ -279,9 +287,7 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_extent_manual_hook,
-	    test_extent_auto_hook,
+	return test(test_extent_manual_hook, test_extent_auto_hook,
 	    test_arenas_create_ext_with_ehooks_no_metadata,
 	    test_arenas_create_ext_with_ehooks_with_metadata);
 }
diff --git a/test/integration/malloc.c b/test/integration/malloc.c
index ef449163..a77e44a6 100644
--- a/test/integration/malloc.c
+++ b/test/integration/malloc.c
@@ -11,6 +11,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_zero_alloc);
+	return test(test_zero_alloc);
 }
diff --git a/test/integration/mallocx.c b/test/integration/mallocx.c
index fdf1e3f4..c7ed0fb9 100644
--- a/test/integration/mallocx.c
+++ b/test/integration/mallocx.c
@@ -3,7 +3,7 @@
 static unsigned
 get_nsizes_impl(const char *cmd) {
 	unsigned ret;
-	size_t z;
+	size_t   z;
 
 	z = sizeof(unsigned);
 	expect_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0,
@@ -25,12 +25,12 @@ get_size_impl(const char *cmd, size_t ind) {
 	size_t miblen = 4;
 
 	z = sizeof(size_t);
-	expect_d_eq(mallctlnametomib(cmd, mib, &miblen),
-	    0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
+	expect_d_eq(mallctlnametomib(cmd, mib, &miblen), 0,
+	    "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
 	mib[2] = ind;
 	z = sizeof(size_t);
-	expect_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0),
-	    0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0), 0,
+	    "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
 
 	return ret;
 }
@@ -64,36 +64,37 @@ JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
 TEST_BEGIN(test_overflow) {
 	size_t largemax;
 
-	largemax = get_large_size(get_nlarge()-1);
+	largemax = get_large_size(get_nlarge() - 1);
 
-	expect_ptr_null(mallocx(largemax+1, 0),
-	    "Expected OOM for mallocx(size=%#zx, 0)", largemax+1);
+	expect_ptr_null(mallocx(largemax + 1, 0),
+	    "Expected OOM for mallocx(size=%#zx, 0)", largemax + 1);
 
-	expect_ptr_null(mallocx(ZU(PTRDIFF_MAX)+1, 0),
-	    "Expected OOM for mallocx(size=%#zx, 0)", ZU(PTRDIFF_MAX)+1);
+	expect_ptr_null(mallocx(ZU(PTRDIFF_MAX) + 1, 0),
+	    "Expected OOM for mallocx(size=%#zx, 0)", ZU(PTRDIFF_MAX) + 1);
 
 	expect_ptr_null(mallocx(SIZE_T_MAX, 0),
 	    "Expected OOM for mallocx(size=%#zx, 0)", SIZE_T_MAX);
 
-	expect_ptr_null(mallocx(1, MALLOCX_ALIGN(ZU(PTRDIFF_MAX)+1)),
+	expect_ptr_null(mallocx(1, MALLOCX_ALIGN(ZU(PTRDIFF_MAX) + 1)),
 	    "Expected OOM for mallocx(size=1, MALLOCX_ALIGN(%#zx))",
-	    ZU(PTRDIFF_MAX)+1);
+	    ZU(PTRDIFF_MAX) + 1);
 }
 TEST_END
 
 static void *
 remote_alloc(void *arg) {
 	unsigned arena;
-	size_t sz = sizeof(unsigned);
+	size_t   sz = sizeof(unsigned);
 	expect_d_eq(mallctl("arenas.create", (void *)&arena, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 	size_t large_sz;
 	sz = sizeof(size_t);
-	expect_d_eq(mallctl("arenas.lextent.0.size", (void *)&large_sz, &sz,
-	    NULL, 0), 0, "Unexpected mallctl failure");
+	expect_d_eq(
+	    mallctl("arenas.lextent.0.size", (void *)&large_sz, &sz, NULL, 0),
+	    0, "Unexpected mallctl failure");
 
-	void *ptr = mallocx(large_sz, MALLOCX_ARENA(arena)
-	    | MALLOCX_TCACHE_NONE);
+	void *ptr = mallocx(
+	    large_sz, MALLOCX_ARENA(arena) | MALLOCX_TCACHE_NONE);
 	void **ret = (void **)arg;
 	*ret = ptr;
 
@@ -114,16 +115,16 @@ TEST_BEGIN(test_remote_free) {
 TEST_END
 
 TEST_BEGIN(test_oom) {
-	size_t largemax;
-	bool oom;
-	void *ptrs[3];
+	size_t   largemax;
+	bool     oom;
+	void    *ptrs[3];
 	unsigned i;
 
 	/*
 	 * It should be impossible to allocate three objects that each consume
 	 * nearly half the virtual address space.
 	 */
-	largemax = get_large_size(get_nlarge()-1);
+	largemax = get_large_size(get_nlarge() - 1);
 	oom = false;
 	for (i = 0; i < sizeof(ptrs) / sizeof(void *); i++) {
 		ptrs[i] = mallocx(largemax, MALLOCX_ARENA(0));
@@ -143,10 +144,10 @@ TEST_BEGIN(test_oom) {
 
 #if LG_SIZEOF_PTR == 3
 	expect_ptr_null(mallocx(0x8000000000000000ULL,
-	    MALLOCX_ALIGN(0x8000000000000000ULL)),
+	                    MALLOCX_ALIGN(0x8000000000000000ULL)),
 	    "Expected OOM for mallocx()");
-	expect_ptr_null(mallocx(0x8000000000000000ULL,
-	    MALLOCX_ALIGN(0x80000000)),
+	expect_ptr_null(
+	    mallocx(0x8000000000000000ULL, MALLOCX_ALIGN(0x80000000)),
 	    "Expected OOM for mallocx()");
 #else
 	expect_ptr_null(mallocx(0x80000000UL, MALLOCX_ALIGN(0x80000000UL)),
@@ -164,20 +165,20 @@ TEST_BEGIN(test_basic) {
 
 	for (sz = 1; sz < MAXSZ; sz = nallocx(sz, 0) + 1) {
 		size_t nsz, rsz;
-		void *p;
+		void  *p;
 		nsz = nallocx(sz, 0);
 		expect_zu_ne(nsz, 0, "Unexpected nallocx() error");
 		p = mallocx(sz, 0);
-		expect_ptr_not_null(p,
-		    "Unexpected mallocx(size=%zx, flags=0) error", sz);
+		expect_ptr_not_null(
+		    p, "Unexpected mallocx(size=%zx, flags=0) error", sz);
 		rsz = sallocx(p, 0);
 		expect_zu_ge(rsz, sz, "Real size smaller than expected");
 		expect_zu_eq(nsz, rsz, "nallocx()/sallocx() size mismatch");
 		dallocx(p, 0);
 
 		p = mallocx(sz, 0);
-		expect_ptr_not_null(p,
-		    "Unexpected mallocx(size=%zx, flags=0) error", sz);
+		expect_ptr_not_null(
+		    p, "Unexpected mallocx(size=%zx, flags=0) error", sz);
 		dallocx(p, 0);
 
 		nsz = nallocx(sz, MALLOCX_ZERO);
@@ -197,53 +198,57 @@ TEST_END
 
 TEST_BEGIN(test_alignment_and_size) {
 	const char *percpu_arena;
-	size_t sz = sizeof(percpu_arena);
+	size_t      sz = sizeof(percpu_arena);
 
-	if(mallctl("opt.percpu_arena", (void *)&percpu_arena, &sz, NULL, 0) ||
-	    strcmp(percpu_arena, "disabled") != 0) {
-		test_skip("test_alignment_and_size skipped: "
+	if (mallctl("opt.percpu_arena", (void *)&percpu_arena, &sz, NULL, 0)
+	    || strcmp(percpu_arena, "disabled") != 0) {
+		test_skip(
+		    "test_alignment_and_size skipped: "
 		    "not working with percpu arena.");
 	};
 #define MAXALIGN (((size_t)1) << 23)
 #define NITER 4
-	size_t nsz, rsz, alignment, total;
+	size_t   nsz, rsz, alignment, total;
 	unsigned i;
-	void *ps[NITER];
+	void    *ps[NITER];
 
 	for (i = 0; i < NITER; i++) {
 		ps[i] = NULL;
 	}
 
-	for (alignment = 8;
-	    alignment <= MAXALIGN;
-	    alignment <<= 1) {
+	for (alignment = 8; alignment <= MAXALIGN; alignment <<= 1) {
 		total = 0;
-		for (sz = 1;
-		    sz < 3 * alignment && sz < (1U << 31);
-		    sz += (alignment >> (LG_SIZEOF_PTR-1)) - 1) {
+		for (sz = 1; sz < 3 * alignment && sz < (1U << 31);
+		     sz += (alignment >> (LG_SIZEOF_PTR - 1)) - 1) {
 			for (i = 0; i < NITER; i++) {
-				nsz = nallocx(sz, MALLOCX_ALIGN(alignment) |
-				    MALLOCX_ZERO | MALLOCX_ARENA(0));
+				nsz = nallocx(sz,
+				    MALLOCX_ALIGN(alignment) | MALLOCX_ZERO
+				        | MALLOCX_ARENA(0));
 				expect_zu_ne(nsz, 0,
 				    "nallocx() error for alignment=%zu, "
-				    "size=%zu (%#zx)", alignment, sz, sz);
-				ps[i] = mallocx(sz, MALLOCX_ALIGN(alignment) |
-				    MALLOCX_ZERO | MALLOCX_ARENA(0));
+				    "size=%zu (%#zx)",
+				    alignment, sz, sz);
+				ps[i] = mallocx(sz,
+				    MALLOCX_ALIGN(alignment) | MALLOCX_ZERO
+				        | MALLOCX_ARENA(0));
 				expect_ptr_not_null(ps[i],
 				    "mallocx() error for alignment=%zu, "
-				    "size=%zu (%#zx)", alignment, sz, sz);
+				    "size=%zu (%#zx)",
+				    alignment, sz, sz);
 				rsz = sallocx(ps[i], 0);
 				expect_zu_ge(rsz, sz,
 				    "Real size smaller than expected for "
-				    "alignment=%zu, size=%zu", alignment, sz);
+				    "alignment=%zu, size=%zu",
+				    alignment, sz);
 				expect_zu_eq(nsz, rsz,
 				    "nallocx()/sallocx() size mismatch for "
-				    "alignment=%zu, size=%zu", alignment, sz);
-				expect_ptr_null(
-				    (void *)((uintptr_t)ps[i] & (alignment-1)),
-				    "%p inadequately aligned for"
-				    " alignment=%zu, size=%zu", ps[i],
+				    "alignment=%zu, size=%zu",
 				    alignment, sz);
+				expect_ptr_null((void *)((uintptr_t)ps[i]
+				                    & (alignment - 1)),
+				    "%p inadequately aligned for"
+				    " alignment=%zu, size=%zu",
+				    ps[i], alignment, sz);
 				total += rsz;
 				if (total >= (MAXALIGN << 1)) {
 					break;
@@ -265,10 +270,6 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_overflow,
-	    test_oom,
-	    test_remote_free,
-	    test_basic,
+	return test(test_overflow, test_oom, test_remote_free, test_basic,
 	    test_alignment_and_size);
 }
diff --git a/test/integration/overflow.c b/test/integration/overflow.c
index ce63327c..17282e84 100644
--- a/test/integration/overflow.c
+++ b/test/integration/overflow.c
@@ -12,13 +12,14 @@ JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
 
 TEST_BEGIN(test_overflow) {
 	unsigned nlextents;
-	size_t mib[4];
-	size_t sz, miblen, max_size_class;
-	void *p;
+	size_t   mib[4];
+	size_t   sz, miblen, max_size_class;
+	void    *p;
 
 	sz = sizeof(unsigned);
-	expect_d_eq(mallctl("arenas.nlextents", (void *)&nlextents, &sz, NULL,
-	    0), 0, "Unexpected mallctl() error");
+	expect_d_eq(
+	    mallctl("arenas.nlextents", (void *)&nlextents, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() error");
 
 	miblen = sizeof(mib) / sizeof(size_t);
 	expect_d_eq(mallctlnametomib("arenas.lextent.0.size", mib, &miblen), 0,
@@ -26,8 +27,9 @@ TEST_BEGIN(test_overflow) {
 	mib[2] = nlextents - 1;
 
 	sz = sizeof(size_t);
-	expect_d_eq(mallctlbymib(mib, miblen, (void *)&max_size_class, &sz,
-	    NULL, 0), 0, "Unexpected mallctlbymib() error");
+	expect_d_eq(
+	    mallctlbymib(mib, miblen, (void *)&max_size_class, &sz, NULL, 0), 0,
+	    "Unexpected mallctlbymib() error");
 
 	expect_ptr_null(malloc(max_size_class + 1),
 	    "Expected OOM due to over-sized allocation request");
@@ -54,6 +56,5 @@ JEMALLOC_DIAGNOSTIC_POP
 
 int
 main(void) {
-	return test(
-	    test_overflow);
+	return test(test_overflow);
 }
diff --git a/test/integration/posix_memalign.c b/test/integration/posix_memalign.c
index 2da0549b..e0df56f3 100644
--- a/test/integration/posix_memalign.c
+++ b/test/integration/posix_memalign.c
@@ -15,48 +15,44 @@ purge(void) {
 
 TEST_BEGIN(test_alignment_errors) {
 	size_t alignment;
-	void *p;
+	void  *p;
 
 	for (alignment = 0; alignment < sizeof(void *); alignment++) {
 		expect_d_eq(posix_memalign(&p, alignment, 1), EINVAL,
-		    "Expected error for invalid alignment %zu",
-		    alignment);
+		    "Expected error for invalid alignment %zu", alignment);
 	}
 
 	for (alignment = sizeof(size_t); alignment < MAXALIGN;
-	    alignment <<= 1) {
+	     alignment <<= 1) {
 		expect_d_ne(posix_memalign(&p, alignment + 1, 1), 0,
-		    "Expected error for invalid alignment %zu",
-		    alignment + 1);
+		    "Expected error for invalid alignment %zu", alignment + 1);
 	}
 }
 TEST_END
 
 TEST_BEGIN(test_oom_errors) {
 	size_t alignment, size;
-	void *p;
+	void  *p;
 
 #if LG_SIZEOF_PTR == 3
 	alignment = UINT64_C(0x8000000000000000);
-	size      = UINT64_C(0x8000000000000000);
+	size = UINT64_C(0x8000000000000000);
 #else
 	alignment = 0x80000000LU;
-	size      = 0x80000000LU;
+	size = 0x80000000LU;
 #endif
 	expect_d_ne(posix_memalign(&p, alignment, size), 0,
-	    "Expected error for posix_memalign(&p, %zu, %zu)",
-	    alignment, size);
+	    "Expected error for posix_memalign(&p, %zu, %zu)", alignment, size);
 
 #if LG_SIZEOF_PTR == 3
 	alignment = UINT64_C(0x4000000000000000);
-	size      = UINT64_C(0xc000000000000001);
+	size = UINT64_C(0xc000000000000001);
 #else
 	alignment = 0x40000000LU;
-	size      = 0xc0000001LU;
+	size = 0xc0000001LU;
 #endif
 	expect_d_ne(posix_memalign(&p, alignment, size), 0,
-	    "Expected error for posix_memalign(&p, %zu, %zu)",
-	    alignment, size);
+	    "Expected error for posix_memalign(&p, %zu, %zu)", alignment, size);
 
 	alignment = 0x10LU;
 #if LG_SIZEOF_PTR == 3
@@ -65,33 +61,29 @@ TEST_BEGIN(test_oom_errors) {
 	size = 0xfffffff0LU;
 #endif
 	expect_d_ne(posix_memalign(&p, alignment, size), 0,
-	    "Expected error for posix_memalign(&p, %zu, %zu)",
-	    alignment, size);
+	    "Expected error for posix_memalign(&p, %zu, %zu)", alignment, size);
 }
 TEST_END
 
 TEST_BEGIN(test_alignment_and_size) {
 #define NITER 4
-	size_t alignment, size, total;
+	size_t   alignment, size, total;
 	unsigned i;
-	int err;
-	void *ps[NITER];
+	int      err;
+	void    *ps[NITER];
 
 	for (i = 0; i < NITER; i++) {
 		ps[i] = NULL;
 	}
 
-	for (alignment = 8;
-	    alignment <= MAXALIGN;
-	    alignment <<= 1) {
+	for (alignment = 8; alignment <= MAXALIGN; alignment <<= 1) {
 		total = 0;
-		for (size = 0;
-		    size < 3 * alignment && size < (1U << 31);
-		    size += ((size == 0) ? 1 :
-		    (alignment >> (LG_SIZEOF_PTR-1)) - 1)) {
+		for (size = 0; size < 3 * alignment && size < (1U << 31);
+		     size += ((size == 0)
+		             ? 1
+		             : (alignment >> (LG_SIZEOF_PTR - 1)) - 1)) {
 			for (i = 0; i < NITER; i++) {
-				err = posix_memalign(&ps[i],
-				    alignment, size);
+				err = posix_memalign(&ps[i], alignment, size);
 				if (err) {
 					char buf[BUFERROR_BUF];
 
@@ -122,7 +114,5 @@ TEST_END
 int
 main(void) {
 	return test(
-	    test_alignment_errors,
-	    test_oom_errors,
-	    test_alignment_and_size);
+	    test_alignment_errors, test_oom_errors, test_alignment_and_size);
 }
diff --git a/test/integration/rallocx.c b/test/integration/rallocx.c
index 85d9238b..8e822df7 100644
--- a/test/integration/rallocx.c
+++ b/test/integration/rallocx.c
@@ -3,7 +3,7 @@
 static unsigned
 get_nsizes_impl(const char *cmd) {
 	unsigned ret;
-	size_t z;
+	size_t   z;
 
 	z = sizeof(unsigned);
 	expect_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0,
@@ -25,12 +25,12 @@ get_size_impl(const char *cmd, size_t ind) {
 	size_t miblen = 4;
 
 	z = sizeof(size_t);
-	expect_d_eq(mallctlnametomib(cmd, mib, &miblen),
-	    0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
+	expect_d_eq(mallctlnametomib(cmd, mib, &miblen), 0,
+	    "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
 	mib[2] = ind;
 	z = sizeof(size_t);
-	expect_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0),
-	    0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0), 0,
+	    "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
 
 	return ret;
 }
@@ -58,25 +58,26 @@ TEST_BEGIN(test_grow_and_shrink) {
 	szs[0] = sallocx(p, 0);
 
 	for (i = 0; i < NCYCLES; i++) {
-		for (j = 1; j < NSZS && szs[j-1] < MAXSZ; j++) {
-			q = rallocx(p, szs[j-1]+1, 0);
+		for (j = 1; j < NSZS && szs[j - 1] < MAXSZ; j++) {
+			q = rallocx(p, szs[j - 1] + 1, 0);
 			expect_ptr_not_null(q,
 			    "Unexpected rallocx() error for size=%zu-->%zu",
-			    szs[j-1], szs[j-1]+1);
+			    szs[j - 1], szs[j - 1] + 1);
 			szs[j] = sallocx(q, 0);
-			expect_zu_ne(szs[j], szs[j-1]+1,
-			    "Expected size to be at least: %zu", szs[j-1]+1);
+			expect_zu_ne(szs[j], szs[j - 1] + 1,
+			    "Expected size to be at least: %zu",
+			    szs[j - 1] + 1);
 			p = q;
 		}
 
 		for (j--; j > 0; j--) {
-			q = rallocx(p, szs[j-1], 0);
+			q = rallocx(p, szs[j - 1], 0);
 			expect_ptr_not_null(q,
 			    "Unexpected rallocx() error for size=%zu-->%zu",
-			    szs[j], szs[j-1]);
+			    szs[j], szs[j - 1]);
 			tsz = sallocx(q, 0);
-			expect_zu_eq(tsz, szs[j-1],
-			    "Expected size=%zu, got size=%zu", szs[j-1], tsz);
+			expect_zu_eq(tsz, szs[j - 1],
+			    "Expected size=%zu, got size=%zu", szs[j - 1], tsz);
 			p = q;
 		}
 	}
@@ -99,11 +100,12 @@ validate_fill(void *p, uint8_t c, size_t offset, size_t len) {
 	size_t i;
 
 	for (i = 0; i < len; i++) {
-		uint8_t b = buf[offset+i];
+		uint8_t b = buf[offset + i];
 		if (b != c) {
-			test_fail("Allocation at %p (len=%zu) contains %#x "
-			    "rather than %#x at offset %zu", p, len, b, c,
-			    offset+i);
+			test_fail(
+			    "Allocation at %p (len=%zu) contains %#x "
+			    "rather than %#x at offset %zu",
+			    p, len, b, c, offset + i);
 			ret = true;
 		}
 	}
@@ -118,35 +120,37 @@ TEST_BEGIN(test_zero) {
 	 */
 	void *volatile p, *volatile q;
 	size_t psz, qsz, i, j;
-	size_t start_sizes[] = {1, 3*1024, 63*1024, 4095*1024};
+	size_t start_sizes[] = {1, 3 * 1024, 63 * 1024, 4095 * 1024};
 #define FILL_BYTE 0xaaU
 #define RANGE 2048
 
-	for (i = 0; i < sizeof(start_sizes)/sizeof(size_t); i++) {
+	for (i = 0; i < sizeof(start_sizes) / sizeof(size_t); i++) {
 		size_t start_size = start_sizes[i];
 		p = mallocx(start_size, MALLOCX_ZERO);
 		expect_ptr_not_null(p, "Unexpected mallocx() error");
 		psz = sallocx(p, 0);
 
-		expect_false(validate_fill(p, 0, 0, psz),
-		    "Expected zeroed memory");
+		expect_false(
+		    validate_fill(p, 0, 0, psz), "Expected zeroed memory");
 		memset(p, FILL_BYTE, psz);
 		expect_false(validate_fill(p, FILL_BYTE, 0, psz),
 		    "Expected filled memory");
 
 		for (j = 1; j < RANGE; j++) {
-			q = rallocx(p, start_size+j, MALLOCX_ZERO);
+			q = rallocx(p, start_size + j, MALLOCX_ZERO);
 			expect_ptr_not_null(q, "Unexpected rallocx() error");
 			qsz = sallocx(q, 0);
 			if (q != p || qsz != psz) {
-				expect_false(validate_fill(q, FILL_BYTE, 0,
-				    psz), "Expected filled memory");
-				expect_false(validate_fill(q, 0, psz, qsz-psz),
+				expect_false(
+				    validate_fill(q, FILL_BYTE, 0, psz),
+				    "Expected filled memory");
+				expect_false(
+				    validate_fill(q, 0, psz, qsz - psz),
 				    "Expected zeroed memory");
 			}
 			if (psz != qsz) {
-				memset((void *)((uintptr_t)q+psz), FILL_BYTE,
-				    qsz-psz);
+				memset((void *)((uintptr_t)q + psz), FILL_BYTE,
+				    qsz - psz);
 				psz = qsz;
 			}
 			p = q;
@@ -160,7 +164,7 @@ TEST_BEGIN(test_zero) {
 TEST_END
 
 TEST_BEGIN(test_align) {
-	void *p, *q;
+	void  *p, *q;
 	size_t align;
 #define MAX_ALIGN (ZU(1) << 25)
 
@@ -170,12 +174,10 @@ TEST_BEGIN(test_align) {
 
 	for (align <<= 1; align <= MAX_ALIGN; align <<= 1) {
 		q = rallocx(p, 1, MALLOCX_ALIGN(align));
-		expect_ptr_not_null(q,
-		    "Unexpected rallocx() error for align=%zu", align);
-		expect_ptr_null(
-		    (void *)((uintptr_t)q & (align-1)),
-		    "%p inadequately aligned for align=%zu",
-		    q, align);
+		expect_ptr_not_null(
+		    q, "Unexpected rallocx() error for align=%zu", align);
+		expect_ptr_null((void *)((uintptr_t)q & (align - 1)),
+		    "%p inadequately aligned for align=%zu", q, align);
 		p = q;
 	}
 	dallocx(p, 0);
@@ -191,19 +193,19 @@ TEST_BEGIN(test_align_enum) {
 		for (size_t lg_size = LG_MIN; lg_size <= LG_MAX; ++lg_size) {
 			size_t size = 1 << lg_size;
 			for (size_t lg_align_next = LG_MIN;
-			    lg_align_next <= LG_MAX; ++lg_align_next) {
-				int flags = MALLOCX_LG_ALIGN(lg_align);
+			     lg_align_next <= LG_MAX; ++lg_align_next) {
+				int   flags = MALLOCX_LG_ALIGN(lg_align);
 				void *p = mallocx(1, flags);
-				assert_ptr_not_null(p,
-				    "Unexpected mallocx() error");
+				assert_ptr_not_null(
+				    p, "Unexpected mallocx() error");
 				assert_zu_eq(nallocx(1, flags),
 				    TEST_MALLOC_SIZE(p),
 				    "Wrong mallocx() usable size");
-				int flags_next =
-				    MALLOCX_LG_ALIGN(lg_align_next);
+				int flags_next = MALLOCX_LG_ALIGN(
+				    lg_align_next);
 				p = rallocx(p, size, flags_next);
-				assert_ptr_not_null(p,
-				    "Unexpected rallocx() error");
+				assert_ptr_not_null(
+				    p, "Unexpected rallocx() error");
 				expect_zu_eq(nallocx(size, flags_next),
 				    TEST_MALLOC_SIZE(p),
 				    "Wrong rallocx() usable size");
@@ -223,20 +225,20 @@ TEST_BEGIN(test_lg_align_and_zero) {
 	 */
 	void *volatile p, *volatile q;
 	unsigned lg_align;
-	size_t sz;
+	size_t   sz;
 #define MAX_LG_ALIGN 25
 #define MAX_VALIDATE (ZU(1) << 22)
 
 	lg_align = 0;
-	p = mallocx(1, MALLOCX_LG_ALIGN(lg_align)|MALLOCX_ZERO);
+	p = mallocx(1, MALLOCX_LG_ALIGN(lg_align) | MALLOCX_ZERO);
 	expect_ptr_not_null(p, "Unexpected mallocx() error");
 
 	for (lg_align++; lg_align <= MAX_LG_ALIGN; lg_align++) {
-		q = rallocx(p, 1, MALLOCX_LG_ALIGN(lg_align)|MALLOCX_ZERO);
-		expect_ptr_not_null(q,
-		    "Unexpected rallocx() error for lg_align=%u", lg_align);
+		q = rallocx(p, 1, MALLOCX_LG_ALIGN(lg_align) | MALLOCX_ZERO);
+		expect_ptr_not_null(
+		    q, "Unexpected rallocx() error for lg_align=%u", lg_align);
 		expect_ptr_null(
-		    (void *)((uintptr_t)q & ((ZU(1) << lg_align)-1)),
+		    (void *)((uintptr_t)q & ((ZU(1) << lg_align) - 1)),
 		    "%p inadequately aligned for lg_align=%u", q, lg_align);
 		sz = sallocx(q, 0);
 		if ((sz << 1) <= MAX_VALIDATE) {
@@ -245,9 +247,10 @@ TEST_BEGIN(test_lg_align_and_zero) {
 		} else {
 			expect_false(validate_fill(q, 0, 0, MAX_VALIDATE),
 			    "Expected zeroed memory");
-			expect_false(validate_fill(
-			    (void *)((uintptr_t)q+sz-MAX_VALIDATE),
-			    0, 0, MAX_VALIDATE), "Expected zeroed memory");
+			expect_false(validate_fill((void *)((uintptr_t)q + sz
+			                               - MAX_VALIDATE),
+			                 0, 0, MAX_VALIDATE),
+			    "Expected zeroed memory");
 		}
 		p = q;
 	}
@@ -269,25 +272,25 @@ JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
 
 TEST_BEGIN(test_overflow) {
 	size_t largemax;
-	void *p;
+	void  *p;
 
-	largemax = get_large_size(get_nlarge()-1);
+	largemax = get_large_size(get_nlarge() - 1);
 
 	p = mallocx(1, 0);
 	expect_ptr_not_null(p, "Unexpected mallocx() failure");
 
-	expect_ptr_null(rallocx(p, largemax+1, 0),
-	    "Expected OOM for rallocx(p, size=%#zx, 0)", largemax+1);
+	expect_ptr_null(rallocx(p, largemax + 1, 0),
+	    "Expected OOM for rallocx(p, size=%#zx, 0)", largemax + 1);
 
-	expect_ptr_null(rallocx(p, ZU(PTRDIFF_MAX)+1, 0),
-	    "Expected OOM for rallocx(p, size=%#zx, 0)", ZU(PTRDIFF_MAX)+1);
+	expect_ptr_null(rallocx(p, ZU(PTRDIFF_MAX) + 1, 0),
+	    "Expected OOM for rallocx(p, size=%#zx, 0)", ZU(PTRDIFF_MAX) + 1);
 
 	expect_ptr_null(rallocx(p, SIZE_T_MAX, 0),
 	    "Expected OOM for rallocx(p, size=%#zx, 0)", SIZE_T_MAX);
 
-	expect_ptr_null(rallocx(p, 1, MALLOCX_ALIGN(ZU(PTRDIFF_MAX)+1)),
+	expect_ptr_null(rallocx(p, 1, MALLOCX_ALIGN(ZU(PTRDIFF_MAX) + 1)),
 	    "Expected OOM for rallocx(p, size=1, MALLOCX_ALIGN(%#zx))",
-	    ZU(PTRDIFF_MAX)+1);
+	    ZU(PTRDIFF_MAX) + 1);
 
 	dallocx(p, 0);
 }
@@ -298,11 +301,6 @@ JEMALLOC_DIAGNOSTIC_POP
 
 int
 main(void) {
-	return test(
-	    test_grow_and_shrink,
-	    test_zero,
-	    test_align,
-	    test_align_enum,
-	    test_lg_align_and_zero,
-	    test_overflow);
+	return test(test_grow_and_shrink, test_zero, test_align,
+	    test_align_enum, test_lg_align_and_zero, test_overflow);
 }
diff --git a/test/integration/sdallocx.c b/test/integration/sdallocx.c
index ca014485..ec2fb938 100644
--- a/test/integration/sdallocx.c
+++ b/test/integration/sdallocx.c
@@ -10,26 +10,23 @@ TEST_BEGIN(test_basic) {
 TEST_END
 
 TEST_BEGIN(test_alignment_and_size) {
-	size_t nsz, sz, alignment, total;
+	size_t   nsz, sz, alignment, total;
 	unsigned i;
-	void *ps[NITER];
+	void    *ps[NITER];
 
 	for (i = 0; i < NITER; i++) {
 		ps[i] = NULL;
 	}
 
-	for (alignment = 8;
-	    alignment <= MAXALIGN;
-	    alignment <<= 1) {
+	for (alignment = 8; alignment <= MAXALIGN; alignment <<= 1) {
 		total = 0;
-		for (sz = 1;
-		    sz < 3 * alignment && sz < (1U << 31);
-		    sz += (alignment >> (LG_SIZEOF_PTR-1)) - 1) {
+		for (sz = 1; sz < 3 * alignment && sz < (1U << 31);
+		     sz += (alignment >> (LG_SIZEOF_PTR - 1)) - 1) {
 			for (i = 0; i < NITER; i++) {
-				nsz = nallocx(sz, MALLOCX_ALIGN(alignment) |
-				    MALLOCX_ZERO);
-				ps[i] = mallocx(sz, MALLOCX_ALIGN(alignment) |
-				    MALLOCX_ZERO);
+				nsz = nallocx(sz,
+				    MALLOCX_ALIGN(alignment) | MALLOCX_ZERO);
+				ps[i] = mallocx(sz,
+				    MALLOCX_ALIGN(alignment) | MALLOCX_ZERO);
 				total += nsz;
 				if (total >= (MAXALIGN << 1)) {
 					break;
@@ -49,7 +46,5 @@ TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_basic,
-	    test_alignment_and_size);
+	return test_no_reentrancy(test_basic, test_alignment_and_size);
 }
diff --git a/test/integration/slab_sizes.c b/test/integration/slab_sizes.c
index f6a66f21..f1ff67aa 100644
--- a/test/integration/slab_sizes.c
+++ b/test/integration/slab_sizes.c
@@ -4,10 +4,10 @@
 
 TEST_BEGIN(test_slab_sizes) {
 	unsigned nbins;
-	size_t page;
-	size_t sizemib[4];
-	size_t slabmib[4];
-	size_t len;
+	size_t   page;
+	size_t   sizemib[4];
+	size_t   slabmib[4];
+	size_t   len;
 
 	len = sizeof(nbins);
 	expect_d_eq(mallctl("arenas.nbins", &nbins, &len, NULL, 0), 0,
@@ -33,12 +33,14 @@ TEST_BEGIN(test_slab_sizes) {
 		len = sizeof(size_t);
 		sizemib[2] = i;
 		slabmib[2] = i;
-		expect_d_eq(mallctlbymib(sizemib, 4, (void *)&bin_size, &len,
-		    NULL, 0), 0, "bin size mallctlbymib failure");
+		expect_d_eq(
+		    mallctlbymib(sizemib, 4, (void *)&bin_size, &len, NULL, 0),
+		    0, "bin size mallctlbymib failure");
 
 		len = sizeof(size_t);
-		expect_d_eq(mallctlbymib(slabmib, 4, (void *)&slab_size, &len,
-		    NULL, 0), 0, "slab size mallctlbymib failure");
+		expect_d_eq(
+		    mallctlbymib(slabmib, 4, (void *)&slab_size, &len, NULL, 0),
+		    0, "slab size mallctlbymib failure");
 
 		if (bin_size < 100) {
 			/*
@@ -51,8 +53,7 @@ TEST_BEGIN(test_slab_sizes) {
 			expect_zu_ge(slab_size, biggest_slab_seen,
 			    "Slab sizes should go up");
 			biggest_slab_seen = slab_size;
-		} else if (
-		    (100 <= bin_size && bin_size < 128)
+		} else if ((100 <= bin_size && bin_size < 128)
 		    || (128 < bin_size && bin_size <= 200)) {
 			expect_zu_eq(slab_size, page,
 			    "Forced-small slabs should be small");
@@ -75,6 +76,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_slab_sizes);
+	return test(test_slab_sizes);
 }
diff --git a/test/integration/smallocx.c b/test/integration/smallocx.c
index 389319b7..186a6492 100644
--- a/test/integration/smallocx.c
+++ b/test/integration/smallocx.c
@@ -5,25 +5,24 @@
 #define STR(x) STR_HELPER(x)
 
 #ifndef JEMALLOC_VERSION_GID_IDENT
-  #error "JEMALLOC_VERSION_GID_IDENT not defined"
+#	error "JEMALLOC_VERSION_GID_IDENT not defined"
 #endif
 
-#define JOIN(x, y) x ## y
+#define JOIN(x, y) x##y
 #define JOIN2(x, y) JOIN(x, y)
 #define smallocx JOIN2(smallocx_, JEMALLOC_VERSION_GID_IDENT)
 
 typedef struct {
-	void *ptr;
+	void  *ptr;
 	size_t size;
 } smallocx_return_t;
 
-extern smallocx_return_t
-smallocx(size_t size, int flags);
+extern smallocx_return_t smallocx(size_t size, int flags);
 
 static unsigned
 get_nsizes_impl(const char *cmd) {
 	unsigned ret;
-	size_t z;
+	size_t   z;
 
 	z = sizeof(unsigned);
 	expect_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0,
@@ -45,12 +44,12 @@ get_size_impl(const char *cmd, size_t ind) {
 	size_t miblen = 4;
 
 	z = sizeof(size_t);
-	expect_d_eq(mallctlnametomib(cmd, mib, &miblen),
-	    0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
+	expect_d_eq(mallctlnametomib(cmd, mib, &miblen), 0,
+	    "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
 	mib[2] = ind;
 	z = sizeof(size_t);
-	expect_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0),
-	    0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0), 0,
+	    "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
 
 	return ret;
 }
@@ -84,36 +83,37 @@ JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
 TEST_BEGIN(test_overflow) {
 	size_t largemax;
 
-	largemax = get_large_size(get_nlarge()-1);
+	largemax = get_large_size(get_nlarge() - 1);
 
-	expect_ptr_null(smallocx(largemax+1, 0).ptr,
-	    "Expected OOM for smallocx(size=%#zx, 0)", largemax+1);
+	expect_ptr_null(smallocx(largemax + 1, 0).ptr,
+	    "Expected OOM for smallocx(size=%#zx, 0)", largemax + 1);
 
-	expect_ptr_null(smallocx(ZU(PTRDIFF_MAX)+1, 0).ptr,
-	    "Expected OOM for smallocx(size=%#zx, 0)", ZU(PTRDIFF_MAX)+1);
+	expect_ptr_null(smallocx(ZU(PTRDIFF_MAX) + 1, 0).ptr,
+	    "Expected OOM for smallocx(size=%#zx, 0)", ZU(PTRDIFF_MAX) + 1);
 
 	expect_ptr_null(smallocx(SIZE_T_MAX, 0).ptr,
 	    "Expected OOM for smallocx(size=%#zx, 0)", SIZE_T_MAX);
 
-	expect_ptr_null(smallocx(1, MALLOCX_ALIGN(ZU(PTRDIFF_MAX)+1)).ptr,
+	expect_ptr_null(smallocx(1, MALLOCX_ALIGN(ZU(PTRDIFF_MAX) + 1)).ptr,
 	    "Expected OOM for smallocx(size=1, MALLOCX_ALIGN(%#zx))",
-	    ZU(PTRDIFF_MAX)+1);
+	    ZU(PTRDIFF_MAX) + 1);
 }
 TEST_END
 
 static void *
 remote_alloc(void *arg) {
 	unsigned arena;
-	size_t sz = sizeof(unsigned);
+	size_t   sz = sizeof(unsigned);
 	expect_d_eq(mallctl("arenas.create", (void *)&arena, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 	size_t large_sz;
 	sz = sizeof(size_t);
-	expect_d_eq(mallctl("arenas.lextent.0.size", (void *)&large_sz, &sz,
-	    NULL, 0), 0, "Unexpected mallctl failure");
+	expect_d_eq(
+	    mallctl("arenas.lextent.0.size", (void *)&large_sz, &sz, NULL, 0),
+	    0, "Unexpected mallctl failure");
 
-	smallocx_return_t r
-	    = smallocx(large_sz, MALLOCX_ARENA(arena) | MALLOCX_TCACHE_NONE);
+	smallocx_return_t r = smallocx(
+	    large_sz, MALLOCX_ARENA(arena) | MALLOCX_TCACHE_NONE);
 	void *ptr = r.ptr;
 	expect_zu_eq(r.size,
 	    nallocx(large_sz, MALLOCX_ARENA(arena) | MALLOCX_TCACHE_NONE),
@@ -138,16 +138,16 @@ TEST_BEGIN(test_remote_free) {
 TEST_END
 
 TEST_BEGIN(test_oom) {
-	size_t largemax;
-	bool oom;
-	void *ptrs[3];
+	size_t   largemax;
+	bool     oom;
+	void    *ptrs[3];
 	unsigned i;
 
 	/*
 	 * It should be impossible to allocate three objects that each consume
 	 * nearly half the virtual address space.
 	 */
-	largemax = get_large_size(get_nlarge()-1);
+	largemax = get_large_size(get_nlarge() - 1);
 	oom = false;
 	for (i = 0; i < sizeof(ptrs) / sizeof(void *); i++) {
 		ptrs[i] = smallocx(largemax, 0).ptr;
@@ -167,10 +167,11 @@ TEST_BEGIN(test_oom) {
 
 #if LG_SIZEOF_PTR == 3
 	expect_ptr_null(smallocx(0x8000000000000000ULL,
-	    MALLOCX_ALIGN(0x8000000000000000ULL)).ptr,
+	                    MALLOCX_ALIGN(0x8000000000000000ULL))
+	                    .ptr,
 	    "Expected OOM for smallocx()");
-	expect_ptr_null(smallocx(0x8000000000000000ULL,
-	    MALLOCX_ALIGN(0x80000000)).ptr,
+	expect_ptr_null(
+	    smallocx(0x8000000000000000ULL, MALLOCX_ALIGN(0x80000000)).ptr,
 	    "Expected OOM for smallocx()");
 #else
 	expect_ptr_null(smallocx(0x80000000UL, MALLOCX_ALIGN(0x80000000UL)).ptr,
@@ -188,15 +189,15 @@ TEST_BEGIN(test_basic) {
 
 	for (sz = 1; sz < MAXSZ; sz = nallocx(sz, 0) + 1) {
 		smallocx_return_t ret;
-		size_t nsz, rsz, smz;
-		void *p;
+		size_t            nsz, rsz, smz;
+		void             *p;
 		nsz = nallocx(sz, 0);
 		expect_zu_ne(nsz, 0, "Unexpected nallocx() error");
 		ret = smallocx(sz, 0);
 		p = ret.ptr;
 		smz = ret.size;
-		expect_ptr_not_null(p,
-		    "Unexpected smallocx(size=%zx, flags=0) error", sz);
+		expect_ptr_not_null(
+		    p, "Unexpected smallocx(size=%zx, flags=0) error", sz);
 		rsz = sallocx(p, 0);
 		expect_zu_ge(rsz, sz, "Real size smaller than expected");
 		expect_zu_eq(nsz, rsz, "nallocx()/sallocx() size mismatch");
@@ -206,8 +207,8 @@ TEST_BEGIN(test_basic) {
 		ret = smallocx(sz, 0);
 		p = ret.ptr;
 		smz = ret.size;
-		expect_ptr_not_null(p,
-		    "Unexpected smallocx(size=%zx, flags=0) error", sz);
+		expect_ptr_not_null(
+		    p, "Unexpected smallocx(size=%zx, flags=0) error", sz);
 		dallocx(p, 0);
 
 		nsz = nallocx(sz, MALLOCX_ZERO);
@@ -230,58 +231,61 @@ TEST_END
 
 TEST_BEGIN(test_alignment_and_size) {
 	const char *percpu_arena;
-	size_t sz = sizeof(percpu_arena);
+	size_t      sz = sizeof(percpu_arena);
 
-	if(mallctl("opt.percpu_arena", (void *)&percpu_arena, &sz, NULL, 0) ||
-	    strcmp(percpu_arena, "disabled") != 0) {
-		test_skip("test_alignment_and_size skipped: "
+	if (mallctl("opt.percpu_arena", (void *)&percpu_arena, &sz, NULL, 0)
+	    || strcmp(percpu_arena, "disabled") != 0) {
+		test_skip(
+		    "test_alignment_and_size skipped: "
 		    "not working with percpu arena.");
 	};
 #define MAXALIGN (((size_t)1) << 23)
 #define NITER 4
-	size_t nsz, rsz, smz, alignment, total;
+	size_t   nsz, rsz, smz, alignment, total;
 	unsigned i;
-	void *ps[NITER];
+	void    *ps[NITER];
 
 	for (i = 0; i < NITER; i++) {
 		ps[i] = NULL;
 	}
 
-	for (alignment = 8;
-	    alignment <= MAXALIGN;
-	    alignment <<= 1) {
+	for (alignment = 8; alignment <= MAXALIGN; alignment <<= 1) {
 		total = 0;
-		for (sz = 1;
-		    sz < 3 * alignment && sz < (1U << 31);
-		    sz += (alignment >> (LG_SIZEOF_PTR-1)) - 1) {
+		for (sz = 1; sz < 3 * alignment && sz < (1U << 31);
+		     sz += (alignment >> (LG_SIZEOF_PTR - 1)) - 1) {
 			for (i = 0; i < NITER; i++) {
-				nsz = nallocx(sz, MALLOCX_ALIGN(alignment) |
-				    MALLOCX_ZERO);
+				nsz = nallocx(sz,
+				    MALLOCX_ALIGN(alignment) | MALLOCX_ZERO);
 				expect_zu_ne(nsz, 0,
 				    "nallocx() error for alignment=%zu, "
-				    "size=%zu (%#zx)", alignment, sz, sz);
-				smallocx_return_t ret
-				    = smallocx(sz, MALLOCX_ALIGN(alignment) | MALLOCX_ZERO);
+				    "size=%zu (%#zx)",
+				    alignment, sz, sz);
+				smallocx_return_t ret = smallocx(sz,
+				    MALLOCX_ALIGN(alignment) | MALLOCX_ZERO);
 				ps[i] = ret.ptr;
 				expect_ptr_not_null(ps[i],
 				    "smallocx() error for alignment=%zu, "
-				    "size=%zu (%#zx)", alignment, sz, sz);
+				    "size=%zu (%#zx)",
+				    alignment, sz, sz);
 				rsz = sallocx(ps[i], 0);
 				smz = ret.size;
 				expect_zu_ge(rsz, sz,
 				    "Real size smaller than expected for "
-				    "alignment=%zu, size=%zu", alignment, sz);
+				    "alignment=%zu, size=%zu",
+				    alignment, sz);
 				expect_zu_eq(nsz, rsz,
 				    "nallocx()/sallocx() size mismatch for "
-				    "alignment=%zu, size=%zu", alignment, sz);
+				    "alignment=%zu, size=%zu",
+				    alignment, sz);
 				expect_zu_eq(nsz, smz,
 				    "nallocx()/smallocx() size mismatch for "
-				    "alignment=%zu, size=%zu", alignment, sz);
-				expect_ptr_null(
-				    (void *)((uintptr_t)ps[i] & (alignment-1)),
-				    "%p inadequately aligned for"
-				    " alignment=%zu, size=%zu", ps[i],
+				    "alignment=%zu, size=%zu",
 				    alignment, sz);
+				expect_ptr_null((void *)((uintptr_t)ps[i]
+				                    & (alignment - 1)),
+				    "%p inadequately aligned for"
+				    " alignment=%zu, size=%zu",
+				    ps[i], alignment, sz);
 				total += rsz;
 				if (total >= (MAXALIGN << 1)) {
 					break;
@@ -303,10 +307,6 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_overflow,
-	    test_oom,
-	    test_remote_free,
-	    test_basic,
+	return test(test_overflow, test_oom, test_remote_free, test_basic,
 	    test_alignment_and_size);
 }
diff --git a/test/integration/thread_arena.c b/test/integration/thread_arena.c
index 4a6abf64..48062183 100644
--- a/test/integration/thread_arena.c
+++ b/test/integration/thread_arena.c
@@ -5,10 +5,10 @@
 void *
 thd_start(void *arg) {
 	unsigned main_arena_ind = *(unsigned *)arg;
-	void *p;
+	void    *p;
 	unsigned arena_ind;
-	size_t size;
-	int err;
+	size_t   size;
+	int      err;
 
 	p = malloc(1);
 	expect_ptr_not_null(p, "Error in malloc()");
@@ -16,7 +16,7 @@ thd_start(void *arg) {
 
 	size = sizeof(arena_ind);
 	if ((err = mallctl("thread.arena", (void *)&arena_ind, &size,
-	    (void *)&main_arena_ind, sizeof(main_arena_ind)))) {
+	         (void *)&main_arena_ind, sizeof(main_arena_ind)))) {
 		char buf[BUFERROR_BUF];
 
 		buferror(err, buf, sizeof(buf));
@@ -24,8 +24,8 @@ thd_start(void *arg) {
 	}
 
 	size = sizeof(arena_ind);
-	if ((err = mallctl("thread.arena", (void *)&arena_ind, &size, NULL,
-	    0))) {
+	if ((err = mallctl(
+	         "thread.arena", (void *)&arena_ind, &size, NULL, 0))) {
 		char buf[BUFERROR_BUF];
 
 		buferror(err, buf, sizeof(buf));
@@ -46,28 +46,28 @@ mallctl_failure(int err) {
 }
 
 TEST_BEGIN(test_thread_arena) {
-	void *p;
-	int err;
-	thd_t thds[NTHREADS];
+	void    *p;
+	int      err;
+	thd_t    thds[NTHREADS];
 	unsigned i;
 
 	p = malloc(1);
 	expect_ptr_not_null(p, "Error in malloc()");
 
 	unsigned arena_ind, old_arena_ind;
-	size_t sz = sizeof(unsigned);
+	size_t   sz = sizeof(unsigned);
 	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
 	    0, "Arena creation failure");
 
 	size_t size = sizeof(arena_ind);
 	if ((err = mallctl("thread.arena", (void *)&old_arena_ind, &size,
-	    (void *)&arena_ind, sizeof(arena_ind))) != 0) {
+	         (void *)&arena_ind, sizeof(arena_ind)))
+	    != 0) {
 		mallctl_failure(err);
 	}
 
 	for (i = 0; i < NTHREADS; i++) {
-		thd_create(&thds[i], thd_start,
-		    (void *)&arena_ind);
+		thd_create(&thds[i], thd_start, (void *)&arena_ind);
 	}
 
 	for (i = 0; i < NTHREADS; i++) {
@@ -81,6 +81,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_thread_arena);
+	return test(test_thread_arena);
 }
diff --git a/test/integration/thread_tcache_enabled.c b/test/integration/thread_tcache_enabled.c
index d44dbe90..3c7c95f6 100644
--- a/test/integration/thread_tcache_enabled.c
+++ b/test/integration/thread_tcache_enabled.c
@@ -2,60 +2,69 @@
 
 void *
 thd_start(void *arg) {
-	bool e0, e1;
+	bool   e0, e1;
 	size_t sz = sizeof(bool);
-	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz, NULL,
-	    0), 0, "Unexpected mallctl failure");
+	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz, NULL, 0),
+	    0, "Unexpected mallctl failure");
 
 	if (e0) {
 		e1 = false;
 		expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
-		    (void *)&e1, sz), 0, "Unexpected mallctl() error");
+		                (void *)&e1, sz),
+		    0, "Unexpected mallctl() error");
 		expect_true(e0, "tcache should be enabled");
 	}
 
 	e1 = true;
-	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
-	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
+	expect_d_eq(
+	    mallctl("thread.tcache.enabled", (void *)&e0, &sz, (void *)&e1, sz),
+	    0, "Unexpected mallctl() error");
 	expect_false(e0, "tcache should be disabled");
 
 	e1 = true;
-	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
-	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
+	expect_d_eq(
+	    mallctl("thread.tcache.enabled", (void *)&e0, &sz, (void *)&e1, sz),
+	    0, "Unexpected mallctl() error");
 	expect_true(e0, "tcache should be enabled");
 
 	e1 = false;
-	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
-	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
+	expect_d_eq(
+	    mallctl("thread.tcache.enabled", (void *)&e0, &sz, (void *)&e1, sz),
+	    0, "Unexpected mallctl() error");
 	expect_true(e0, "tcache should be enabled");
 
 	e1 = false;
-	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
-	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
+	expect_d_eq(
+	    mallctl("thread.tcache.enabled", (void *)&e0, &sz, (void *)&e1, sz),
+	    0, "Unexpected mallctl() error");
 	expect_false(e0, "tcache should be disabled");
 
 	free(malloc(1));
 	e1 = true;
-	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
-	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
+	expect_d_eq(
+	    mallctl("thread.tcache.enabled", (void *)&e0, &sz, (void *)&e1, sz),
+	    0, "Unexpected mallctl() error");
 	expect_false(e0, "tcache should be disabled");
 
 	free(malloc(1));
 	e1 = true;
-	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
-	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
+	expect_d_eq(
+	    mallctl("thread.tcache.enabled", (void *)&e0, &sz, (void *)&e1, sz),
+	    0, "Unexpected mallctl() error");
 	expect_true(e0, "tcache should be enabled");
 
 	free(malloc(1));
 	e1 = false;
-	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
-	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
+	expect_d_eq(
+	    mallctl("thread.tcache.enabled", (void *)&e0, &sz, (void *)&e1, sz),
+	    0, "Unexpected mallctl() error");
 	expect_true(e0, "tcache should be enabled");
 
 	free(malloc(1));
 	e1 = false;
-	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
-	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
+	expect_d_eq(
+	    mallctl("thread.tcache.enabled", (void *)&e0, &sz, (void *)&e1, sz),
+	    0, "Unexpected mallctl() error");
 	expect_false(e0, "tcache should be disabled");
 
 	free(malloc(1));
@@ -78,10 +87,6 @@ TEST_END
 int
 main(void) {
 	/* Run tests multiple times to check for bad interactions. */
-	return test(
-	    test_main_thread,
-	    test_subthread,
-	    test_main_thread,
-	    test_subthread,
-	    test_main_thread);
+	return test(test_main_thread, test_subthread, test_main_thread,
+	    test_subthread, test_main_thread);
 }
diff --git a/test/integration/xallocx.c b/test/integration/xallocx.c
index 13708548..9b5ebcde 100644
--- a/test/integration/xallocx.c
+++ b/test/integration/xallocx.c
@@ -11,15 +11,16 @@ arena_ind(void) {
 
 	if (ind == 0) {
 		size_t sz = sizeof(ind);
-		expect_d_eq(mallctl("arenas.create", (void *)&ind, &sz, NULL,
-		    0), 0, "Unexpected mallctl failure creating arena");
+		expect_d_eq(
+		    mallctl("arenas.create", (void *)&ind, &sz, NULL, 0), 0,
+		    "Unexpected mallctl failure creating arena");
 	}
 
 	return ind;
 }
 
 TEST_BEGIN(test_same_size) {
-	void *p;
+	void  *p;
 	size_t sz, tsz;
 
 	p = mallocx(42, 0);
@@ -34,14 +35,14 @@ TEST_BEGIN(test_same_size) {
 TEST_END
 
 TEST_BEGIN(test_extra_no_move) {
-	void *p;
+	void  *p;
 	size_t sz, tsz;
 
 	p = mallocx(42, 0);
 	expect_ptr_not_null(p, "Unexpected mallocx() error");
 	sz = sallocx(p, 0);
 
-	tsz = xallocx(p, sz, sz-42, 0);
+	tsz = xallocx(p, sz, sz - 42, 0);
 	expect_zu_eq(tsz, sz, "Unexpected size change: %zu --> %zu", sz, tsz);
 
 	dallocx(p, 0);
@@ -49,7 +50,7 @@ TEST_BEGIN(test_extra_no_move) {
 TEST_END
 
 TEST_BEGIN(test_no_move_fail) {
-	void *p;
+	void  *p;
 	size_t sz, tsz;
 
 	p = mallocx(42, 0);
@@ -66,7 +67,7 @@ TEST_END
 static unsigned
 get_nsizes_impl(const char *cmd) {
 	unsigned ret;
-	size_t z;
+	size_t   z;
 
 	z = sizeof(unsigned);
 	expect_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0,
@@ -93,12 +94,12 @@ get_size_impl(const char *cmd, size_t ind) {
 	size_t miblen = 4;
 
 	z = sizeof(size_t);
-	expect_d_eq(mallctlnametomib(cmd, mib, &miblen),
-	    0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
+	expect_d_eq(mallctlnametomib(cmd, mib, &miblen), 0,
+	    "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
 	mib[2] = ind;
 	z = sizeof(size_t);
-	expect_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0),
-	    0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0), 0,
+	    "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
 
 	return ret;
 }
@@ -115,25 +116,25 @@ get_large_size(size_t ind) {
 
 TEST_BEGIN(test_size) {
 	size_t small0, largemax;
-	void *p;
+	void  *p;
 
 	/* Get size classes. */
 	small0 = get_small_size(0);
-	largemax = get_large_size(get_nlarge()-1);
+	largemax = get_large_size(get_nlarge() - 1);
 
 	p = mallocx(small0, 0);
 	expect_ptr_not_null(p, "Unexpected mallocx() error");
 
 	/* Test smallest supported size. */
-	expect_zu_eq(xallocx(p, 1, 0, 0), small0,
-	    "Unexpected xallocx() behavior");
+	expect_zu_eq(
+	    xallocx(p, 1, 0, 0), small0, "Unexpected xallocx() behavior");
 
 	/* Test largest supported size. */
 	expect_zu_le(xallocx(p, largemax, 0, 0), largemax,
 	    "Unexpected xallocx() behavior");
 
 	/* Test size overflow. */
-	expect_zu_le(xallocx(p, largemax+1, 0, 0), largemax,
+	expect_zu_le(xallocx(p, largemax + 1, 0, 0), largemax,
 	    "Unexpected xallocx() behavior");
 	expect_zu_le(xallocx(p, SIZE_T_MAX, 0, 0), largemax,
 	    "Unexpected xallocx() behavior");
@@ -144,29 +145,29 @@ TEST_END
 
 TEST_BEGIN(test_size_extra_overflow) {
 	size_t small0, largemax;
-	void *p;
+	void  *p;
 
 	/* Get size classes. */
 	small0 = get_small_size(0);
-	largemax = get_large_size(get_nlarge()-1);
+	largemax = get_large_size(get_nlarge() - 1);
 
 	p = mallocx(small0, 0);
 	expect_ptr_not_null(p, "Unexpected mallocx() error");
 
 	/* Test overflows that can be resolved by clamping extra. */
-	expect_zu_le(xallocx(p, largemax-1, 2, 0), largemax,
+	expect_zu_le(xallocx(p, largemax - 1, 2, 0), largemax,
 	    "Unexpected xallocx() behavior");
 	expect_zu_le(xallocx(p, largemax, 1, 0), largemax,
 	    "Unexpected xallocx() behavior");
 
 	/* Test overflow such that largemax-size underflows. */
-	expect_zu_le(xallocx(p, largemax+1, 2, 0), largemax,
+	expect_zu_le(xallocx(p, largemax + 1, 2, 0), largemax,
 	    "Unexpected xallocx() behavior");
-	expect_zu_le(xallocx(p, largemax+2, 3, 0), largemax,
+	expect_zu_le(xallocx(p, largemax + 2, 3, 0), largemax,
 	    "Unexpected xallocx() behavior");
-	expect_zu_le(xallocx(p, SIZE_T_MAX-2, 2, 0), largemax,
+	expect_zu_le(xallocx(p, SIZE_T_MAX - 2, 2, 0), largemax,
 	    "Unexpected xallocx() behavior");
-	expect_zu_le(xallocx(p, SIZE_T_MAX-1, 1, 0), largemax,
+	expect_zu_le(xallocx(p, SIZE_T_MAX - 1, 1, 0), largemax,
 	    "Unexpected xallocx() behavior");
 
 	dallocx(p, 0);
@@ -175,21 +176,21 @@ TEST_END
 
 TEST_BEGIN(test_extra_small) {
 	size_t small0, small1, largemax;
-	void *p;
+	void  *p;
 
 	/* Get size classes. */
 	small0 = get_small_size(0);
 	small1 = get_small_size(1);
-	largemax = get_large_size(get_nlarge()-1);
+	largemax = get_large_size(get_nlarge() - 1);
 
 	p = mallocx(small0, 0);
 	expect_ptr_not_null(p, "Unexpected mallocx() error");
 
-	expect_zu_eq(xallocx(p, small1, 0, 0), small0,
-	    "Unexpected xallocx() behavior");
+	expect_zu_eq(
+	    xallocx(p, small1, 0, 0), small0, "Unexpected xallocx() behavior");
 
-	expect_zu_eq(xallocx(p, small1, 0, 0), small0,
-	    "Unexpected xallocx() behavior");
+	expect_zu_eq(
+	    xallocx(p, small1, 0, 0), small0, "Unexpected xallocx() behavior");
 
 	expect_zu_eq(xallocx(p, small0, small1 - small0, 0), small0,
 	    "Unexpected xallocx() behavior");
@@ -205,16 +206,16 @@ TEST_BEGIN(test_extra_small) {
 TEST_END
 
 TEST_BEGIN(test_extra_large) {
-	int flags = MALLOCX_ARENA(arena_ind());
+	int    flags = MALLOCX_ARENA(arena_ind());
 	size_t smallmax, large1, large2, large3, largemax;
-	void *p;
+	void  *p;
 
 	/* Get size classes. */
-	smallmax = get_small_size(get_nsmall()-1);
+	smallmax = get_small_size(get_nsmall() - 1);
 	large1 = get_large_size(1);
 	large2 = get_large_size(2);
 	large3 = get_large_size(3);
-	largemax = get_large_size(get_nlarge()-1);
+	largemax = get_large_size(get_nlarge() - 1);
 
 	p = mallocx(large3, flags);
 	expect_ptr_not_null(p, "Unexpected mallocx() error");
@@ -246,7 +247,7 @@ TEST_BEGIN(test_extra_large) {
 	/* Test size increase with zero extra. */
 	expect_zu_le(xallocx(p, large3, 0, flags), large3,
 	    "Unexpected xallocx() behavior");
-	expect_zu_le(xallocx(p, largemax+1, 0, flags), large3,
+	expect_zu_le(xallocx(p, largemax + 1, 0, flags), large3,
 	    "Unexpected xallocx() behavior");
 
 	expect_zu_ge(xallocx(p, large1, 0, flags), large1,
@@ -276,8 +277,8 @@ TEST_END
 static void
 print_filled_extents(const void *p, uint8_t c, size_t len) {
 	const uint8_t *pc = (const uint8_t *)p;
-	size_t i, range0;
-	uint8_t c0;
+	size_t         i, range0;
+	uint8_t        c0;
 
 	malloc_printf("  p=%p, c=%#x, len=%zu:", p, c, len);
 	range0 = 0;
@@ -295,10 +296,10 @@ print_filled_extents(const void *p, uint8_t c, size_t len) {
 static bool
 validate_fill(const void *p, uint8_t c, size_t offset, size_t len) {
 	const uint8_t *pc = (const uint8_t *)p;
-	bool err;
-	size_t i;
+	bool           err;
+	size_t         i;
 
-	for (i = offset, err = false; i < offset+len; i++) {
+	for (i = offset, err = false; i < offset + len; i++) {
 		if (pc[i] != c) {
 			err = true;
 		}
@@ -313,16 +314,16 @@ validate_fill(const void *p, uint8_t c, size_t offset, size_t len) {
 
 static void
 test_zero(size_t szmin, size_t szmax) {
-	int flags = MALLOCX_ARENA(arena_ind()) | MALLOCX_ZERO;
+	int    flags = MALLOCX_ARENA(arena_ind()) | MALLOCX_ZERO;
 	size_t sz, nsz;
-	void *p;
+	void  *p;
 #define FILL_BYTE 0x7aU
 
 	sz = szmax;
 	p = mallocx(sz, flags);
 	expect_ptr_not_null(p, "Unexpected mallocx() error");
-	expect_false(validate_fill(p, 0x00, 0, sz), "Memory not filled: sz=%zu",
-	    sz);
+	expect_false(
+	    validate_fill(p, 0x00, 0, sz), "Memory not filled: sz=%zu", sz);
 
 	/*
 	 * Fill with non-zero so that non-debug builds are more likely to detect
@@ -342,16 +343,16 @@ test_zero(size_t szmin, size_t szmax) {
 	    "Memory not filled: sz=%zu", sz);
 
 	for (sz = szmin; sz < szmax; sz = nsz) {
-		nsz = nallocx(sz+1, flags);
-		if (xallocx(p, sz+1, 0, flags) != nsz) {
-			p = rallocx(p, sz+1, flags);
+		nsz = nallocx(sz + 1, flags);
+		if (xallocx(p, sz + 1, 0, flags) != nsz) {
+			p = rallocx(p, sz + 1, flags);
 			expect_ptr_not_null(p, "Unexpected rallocx() failure");
 		}
 		expect_false(validate_fill(p, FILL_BYTE, 0, sz),
 		    "Memory not filled: sz=%zu", sz);
-		expect_false(validate_fill(p, 0x00, sz, nsz-sz),
-		    "Memory not filled: sz=%zu, nsz-sz=%zu", sz, nsz-sz);
-		memset((void *)((uintptr_t)p + sz), FILL_BYTE, nsz-sz);
+		expect_false(validate_fill(p, 0x00, sz, nsz - sz),
+		    "Memory not filled: sz=%zu, nsz-sz=%zu", sz, nsz - sz);
+		memset((void *)((uintptr_t)p + sz), FILL_BYTE, nsz - sz);
 		expect_false(validate_fill(p, FILL_BYTE, 0, nsz),
 		    "Memory not filled: nsz=%zu", nsz);
 	}
@@ -372,13 +373,7 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_same_size,
-	    test_extra_no_move,
-	    test_no_move_fail,
-	    test_size,
-	    test_size_extra_overflow,
-	    test_extra_small,
-	    test_extra_large,
-	    test_zero_large);
+	return test(test_same_size, test_extra_no_move, test_no_move_fail,
+	    test_size, test_size_extra_overflow, test_extra_small,
+	    test_extra_large, test_zero_large);
 }
diff --git a/test/src/SFMT.c b/test/src/SFMT.c
index c05e2183..87b1fd1c 100644
--- a/test/src/SFMT.c
+++ b/test/src/SFMT.c
@@ -50,19 +50,19 @@
 #include "test/SFMT-params.h"
 
 #if defined(JEMALLOC_BIG_ENDIAN) && !defined(BIG_ENDIAN64)
-#define BIG_ENDIAN64 1
+#	define BIG_ENDIAN64 1
 #endif
 #if defined(__BIG_ENDIAN__) && !defined(__amd64) && !defined(BIG_ENDIAN64)
-#define BIG_ENDIAN64 1
+#	define BIG_ENDIAN64 1
 #endif
 #if defined(HAVE_ALTIVEC) && !defined(BIG_ENDIAN64)
-#define BIG_ENDIAN64 1
+#	define BIG_ENDIAN64 1
 #endif
 #if defined(ONLY64) && !defined(BIG_ENDIAN64)
-  #if defined(__GNUC__)
-    #error "-DONLY64 must be specified with -DBIG_ENDIAN64"
-  #endif
-#undef ONLY64
+#	if defined(__GNUC__)
+#		error "-DONLY64 must be specified with -DBIG_ENDIAN64"
+#	endif
+#	undef ONLY64
 #endif
 /*------------------------------------------------------
   128-bit SIMD data type for Altivec, SSE2 or standard C
@@ -70,8 +70,8 @@
 #if defined(HAVE_ALTIVEC)
 /** 128-bit data structure */
 union W128_T {
-    vector unsigned int s;
-    uint32_t u[4];
+	vector unsigned int s;
+	uint32_t            u[4];
 };
 /** 128-bit data type */
 typedef union W128_T w128_t;
@@ -79,8 +79,8 @@ typedef union W128_T w128_t;
 #elif defined(HAVE_SSE2)
 /** 128-bit data structure */
 union W128_T {
-    __m128i si;
-    uint32_t u[4];
+	__m128i  si;
+	uint32_t u[4];
 };
 /** 128-bit data type */
 typedef union W128_T w128_t;
@@ -89,7 +89,7 @@ typedef union W128_T w128_t;
 
 /** 128-bit data structure */
 struct W128_T {
-    uint32_t u[4];
+	uint32_t u[4];
 };
 /** 128-bit data type */
 typedef struct W128_T w128_t;
@@ -97,13 +97,13 @@ typedef struct W128_T w128_t;
 #endif
 
 struct sfmt_s {
-    /** the 128-bit internal state array */
-    w128_t sfmt[N];
-    /** index counter to the 32-bit internal state array */
-    int idx;
-    /** a flag: it is 0 if and only if the internal state is not yet
+	/** the 128-bit internal state array */
+	w128_t sfmt[N];
+	/** index counter to the 32-bit internal state array */
+	int idx;
+	/** a flag: it is 0 if and only if the internal state is not yet
      * initialized. */
-    int initialized;
+	int initialized;
 };
 
 /*--------------------------------------
@@ -119,22 +119,22 @@ static uint32_t parity[4] = {PARITY1, PARITY2, PARITY3, PARITY4};
   ----------------*/
 static inline int idxof(int i);
 #if (!defined(HAVE_ALTIVEC)) && (!defined(HAVE_SSE2))
-static inline void rshift128(w128_t *out,  w128_t const *in, int shift);
-static inline void lshift128(w128_t *out,  w128_t const *in, int shift);
+static inline void rshift128(w128_t *out, w128_t const *in, int shift);
+static inline void lshift128(w128_t *out, w128_t const *in, int shift);
 #endif
-static inline void gen_rand_all(sfmt_t *ctx);
-static inline void gen_rand_array(sfmt_t *ctx, w128_t *array, int size);
+static inline void     gen_rand_all(sfmt_t *ctx);
+static inline void     gen_rand_array(sfmt_t *ctx, w128_t *array, int size);
 static inline uint32_t func1(uint32_t x);
 static inline uint32_t func2(uint32_t x);
-static void period_certification(sfmt_t *ctx);
+static void            period_certification(sfmt_t *ctx);
 #if defined(BIG_ENDIAN64) && !defined(ONLY64)
 static inline void swap(w128_t *array, int size);
 #endif
 
 #if defined(HAVE_ALTIVEC)
-  #include "test/SFMT-alti.h"
+#	include "test/SFMT-alti.h"
 #elif defined(HAVE_SSE2)
-  #include "test/SFMT-sse2.h"
+#	include "test/SFMT-sse2.h"
 #endif
 
 /**
@@ -142,12 +142,14 @@ static inline void swap(w128_t *array, int size);
  * in BIG ENDIAN machine.
  */
 #ifdef ONLY64
-static inline int idxof(int i) {
-    return i ^ 1;
+static inline int
+idxof(int i) {
+	return i ^ 1;
 }
 #else
-static inline int idxof(int i) {
-    return i;
+static inline int
+idxof(int i) {
+	return i;
 }
 #endif
 /**
@@ -159,37 +161,39 @@ static inline int idxof(int i) {
  * @param shift the shift value
  */
 #if (!defined(HAVE_ALTIVEC)) && (!defined(HAVE_SSE2))
-#ifdef ONLY64
-static inline void rshift128(w128_t *out, w128_t const *in, int shift) {
-    uint64_t th, tl, oh, ol;
+#	ifdef ONLY64
+static inline void
+rshift128(w128_t *out, w128_t const *in, int shift) {
+	uint64_t th, tl, oh, ol;
 
-    th = ((uint64_t)in->u[2] << 32) | ((uint64_t)in->u[3]);
-    tl = ((uint64_t)in->u[0] << 32) | ((uint64_t)in->u[1]);
+	th = ((uint64_t)in->u[2] << 32) | ((uint64_t)in->u[3]);
+	tl = ((uint64_t)in->u[0] << 32) | ((uint64_t)in->u[1]);
 
-    oh = th >> (shift * 8);
-    ol = tl >> (shift * 8);
-    ol |= th << (64 - shift * 8);
-    out->u[0] = (uint32_t)(ol >> 32);
-    out->u[1] = (uint32_t)ol;
-    out->u[2] = (uint32_t)(oh >> 32);
-    out->u[3] = (uint32_t)oh;
+	oh = th >> (shift * 8);
+	ol = tl >> (shift * 8);
+	ol |= th << (64 - shift * 8);
+	out->u[0] = (uint32_t)(ol >> 32);
+	out->u[1] = (uint32_t)ol;
+	out->u[2] = (uint32_t)(oh >> 32);
+	out->u[3] = (uint32_t)oh;
 }
-#else
-static inline void rshift128(w128_t *out, w128_t const *in, int shift) {
-    uint64_t th, tl, oh, ol;
+#	else
+static inline void
+rshift128(w128_t *out, w128_t const *in, int shift) {
+	uint64_t th, tl, oh, ol;
 
-    th = ((uint64_t)in->u[3] << 32) | ((uint64_t)in->u[2]);
-    tl = ((uint64_t)in->u[1] << 32) | ((uint64_t)in->u[0]);
+	th = ((uint64_t)in->u[3] << 32) | ((uint64_t)in->u[2]);
+	tl = ((uint64_t)in->u[1] << 32) | ((uint64_t)in->u[0]);
 
-    oh = th >> (shift * 8);
-    ol = tl >> (shift * 8);
-    ol |= th << (64 - shift * 8);
-    out->u[1] = (uint32_t)(ol >> 32);
-    out->u[0] = (uint32_t)ol;
-    out->u[3] = (uint32_t)(oh >> 32);
-    out->u[2] = (uint32_t)oh;
+	oh = th >> (shift * 8);
+	ol = tl >> (shift * 8);
+	ol |= th << (64 - shift * 8);
+	out->u[1] = (uint32_t)(ol >> 32);
+	out->u[0] = (uint32_t)ol;
+	out->u[3] = (uint32_t)(oh >> 32);
+	out->u[2] = (uint32_t)oh;
 }
-#endif
+#	endif
 /**
  * This function simulates SIMD 128-bit left shift by the standard C.
  * The 128-bit integer given in in is shifted by (shift * 8) bits.
@@ -198,37 +202,39 @@ static inline void rshift128(w128_t *out, w128_t const *in, int shift) {
  * @param in the 128-bit data to be shifted
  * @param shift the shift value
  */
-#ifdef ONLY64
-static inline void lshift128(w128_t *out, w128_t const *in, int shift) {
-    uint64_t th, tl, oh, ol;
+#	ifdef ONLY64
+static inline void
+lshift128(w128_t *out, w128_t const *in, int shift) {
+	uint64_t th, tl, oh, ol;
 
-    th = ((uint64_t)in->u[2] << 32) | ((uint64_t)in->u[3]);
-    tl = ((uint64_t)in->u[0] << 32) | ((uint64_t)in->u[1]);
+	th = ((uint64_t)in->u[2] << 32) | ((uint64_t)in->u[3]);
+	tl = ((uint64_t)in->u[0] << 32) | ((uint64_t)in->u[1]);
 
-    oh = th << (shift * 8);
-    ol = tl << (shift * 8);
-    oh |= tl >> (64 - shift * 8);
-    out->u[0] = (uint32_t)(ol >> 32);
-    out->u[1] = (uint32_t)ol;
-    out->u[2] = (uint32_t)(oh >> 32);
-    out->u[3] = (uint32_t)oh;
+	oh = th << (shift * 8);
+	ol = tl << (shift * 8);
+	oh |= tl >> (64 - shift * 8);
+	out->u[0] = (uint32_t)(ol >> 32);
+	out->u[1] = (uint32_t)ol;
+	out->u[2] = (uint32_t)(oh >> 32);
+	out->u[3] = (uint32_t)oh;
 }
-#else
-static inline void lshift128(w128_t *out, w128_t const *in, int shift) {
-    uint64_t th, tl, oh, ol;
+#	else
+static inline void
+lshift128(w128_t *out, w128_t const *in, int shift) {
+	uint64_t th, tl, oh, ol;
 
-    th = ((uint64_t)in->u[3] << 32) | ((uint64_t)in->u[2]);
-    tl = ((uint64_t)in->u[1] << 32) | ((uint64_t)in->u[0]);
+	th = ((uint64_t)in->u[3] << 32) | ((uint64_t)in->u[2]);
+	tl = ((uint64_t)in->u[1] << 32) | ((uint64_t)in->u[0]);
 
-    oh = th << (shift * 8);
-    ol = tl << (shift * 8);
-    oh |= tl >> (64 - shift * 8);
-    out->u[1] = (uint32_t)(ol >> 32);
-    out->u[0] = (uint32_t)ol;
-    out->u[3] = (uint32_t)(oh >> 32);
-    out->u[2] = (uint32_t)oh;
+	oh = th << (shift * 8);
+	ol = tl << (shift * 8);
+	oh |= tl >> (64 - shift * 8);
+	out->u[1] = (uint32_t)(ol >> 32);
+	out->u[0] = (uint32_t)ol;
+	out->u[3] = (uint32_t)(oh >> 32);
+	out->u[2] = (uint32_t)oh;
 }
-#endif
+#	endif
 #endif
 
 /**
@@ -240,41 +246,41 @@ static inline void lshift128(w128_t *out, w128_t const *in, int shift) {
  * @param d a 128-bit part of the internal state array
  */
 #if (!defined(HAVE_ALTIVEC)) && (!defined(HAVE_SSE2))
-#ifdef ONLY64
-static inline void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *c,
-				w128_t *d) {
-    w128_t x;
-    w128_t y;
+#	ifdef ONLY64
+static inline void
+do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *c, w128_t *d) {
+	w128_t x;
+	w128_t y;
 
-    lshift128(&x, a, SL2);
-    rshift128(&y, c, SR2);
-    r->u[0] = a->u[0] ^ x.u[0] ^ ((b->u[0] >> SR1) & MSK2) ^ y.u[0]
-	^ (d->u[0] << SL1);
-    r->u[1] = a->u[1] ^ x.u[1] ^ ((b->u[1] >> SR1) & MSK1) ^ y.u[1]
-	^ (d->u[1] << SL1);
-    r->u[2] = a->u[2] ^ x.u[2] ^ ((b->u[2] >> SR1) & MSK4) ^ y.u[2]
-	^ (d->u[2] << SL1);
-    r->u[3] = a->u[3] ^ x.u[3] ^ ((b->u[3] >> SR1) & MSK3) ^ y.u[3]
-	^ (d->u[3] << SL1);
+	lshift128(&x, a, SL2);
+	rshift128(&y, c, SR2);
+	r->u[0] = a->u[0] ^ x.u[0] ^ ((b->u[0] >> SR1) & MSK2) ^ y.u[0]
+	    ^ (d->u[0] << SL1);
+	r->u[1] = a->u[1] ^ x.u[1] ^ ((b->u[1] >> SR1) & MSK1) ^ y.u[1]
+	    ^ (d->u[1] << SL1);
+	r->u[2] = a->u[2] ^ x.u[2] ^ ((b->u[2] >> SR1) & MSK4) ^ y.u[2]
+	    ^ (d->u[2] << SL1);
+	r->u[3] = a->u[3] ^ x.u[3] ^ ((b->u[3] >> SR1) & MSK3) ^ y.u[3]
+	    ^ (d->u[3] << SL1);
 }
-#else
-static inline void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *c,
-				w128_t *d) {
-    w128_t x;
-    w128_t y;
+#	else
+static inline void
+do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *c, w128_t *d) {
+	w128_t x;
+	w128_t y;
 
-    lshift128(&x, a, SL2);
-    rshift128(&y, c, SR2);
-    r->u[0] = a->u[0] ^ x.u[0] ^ ((b->u[0] >> SR1) & MSK1) ^ y.u[0]
-	^ (d->u[0] << SL1);
-    r->u[1] = a->u[1] ^ x.u[1] ^ ((b->u[1] >> SR1) & MSK2) ^ y.u[1]
-	^ (d->u[1] << SL1);
-    r->u[2] = a->u[2] ^ x.u[2] ^ ((b->u[2] >> SR1) & MSK3) ^ y.u[2]
-	^ (d->u[2] << SL1);
-    r->u[3] = a->u[3] ^ x.u[3] ^ ((b->u[3] >> SR1) & MSK4) ^ y.u[3]
-	^ (d->u[3] << SL1);
+	lshift128(&x, a, SL2);
+	rshift128(&y, c, SR2);
+	r->u[0] = a->u[0] ^ x.u[0] ^ ((b->u[0] >> SR1) & MSK1) ^ y.u[0]
+	    ^ (d->u[0] << SL1);
+	r->u[1] = a->u[1] ^ x.u[1] ^ ((b->u[1] >> SR1) & MSK2) ^ y.u[1]
+	    ^ (d->u[1] << SL1);
+	r->u[2] = a->u[2] ^ x.u[2] ^ ((b->u[2] >> SR1) & MSK3) ^ y.u[2]
+	    ^ (d->u[2] << SL1);
+	r->u[3] = a->u[3] ^ x.u[3] ^ ((b->u[3] >> SR1) & MSK4) ^ y.u[3]
+	    ^ (d->u[3] << SL1);
 }
-#endif
+#	endif
 #endif
 
 #if (!defined(HAVE_ALTIVEC)) && (!defined(HAVE_SSE2))
@@ -282,24 +288,25 @@ static inline void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *c,
  * This function fills the internal state array with pseudorandom
  * integers.
  */
-static inline void gen_rand_all(sfmt_t *ctx) {
-    int i;
-    w128_t *r1, *r2;
+static inline void
+gen_rand_all(sfmt_t *ctx) {
+	int     i;
+	w128_t *r1, *r2;
 
-    r1 = &ctx->sfmt[N - 2];
-    r2 = &ctx->sfmt[N - 1];
-    for (i = 0; i < N - POS1; i++) {
-	do_recursion(&ctx->sfmt[i], &ctx->sfmt[i], &ctx->sfmt[i + POS1], r1,
-	  r2);
-	r1 = r2;
-	r2 = &ctx->sfmt[i];
-    }
-    for (; i < N; i++) {
-	do_recursion(&ctx->sfmt[i], &ctx->sfmt[i], &ctx->sfmt[i + POS1 - N], r1,
-	  r2);
-	r1 = r2;
-	r2 = &ctx->sfmt[i];
-    }
+	r1 = &ctx->sfmt[N - 2];
+	r2 = &ctx->sfmt[N - 1];
+	for (i = 0; i < N - POS1; i++) {
+		do_recursion(
+		    &ctx->sfmt[i], &ctx->sfmt[i], &ctx->sfmt[i + POS1], r1, r2);
+		r1 = r2;
+		r2 = &ctx->sfmt[i];
+	}
+	for (; i < N; i++) {
+		do_recursion(&ctx->sfmt[i], &ctx->sfmt[i],
+		    &ctx->sfmt[i + POS1 - N], r1, r2);
+		r1 = r2;
+		r2 = &ctx->sfmt[i];
+	}
 }
 
 /**
@@ -309,52 +316,58 @@ static inline void gen_rand_all(sfmt_t *ctx) {
  * @param array an 128-bit array to be filled by pseudorandom numbers.
  * @param size number of 128-bit pseudorandom numbers to be generated.
  */
-static inline void gen_rand_array(sfmt_t *ctx, w128_t *array, int size) {
-    int i, j;
-    w128_t *r1, *r2;
+static inline void
+gen_rand_array(sfmt_t *ctx, w128_t *array, int size) {
+	int     i, j;
+	w128_t *r1, *r2;
 
-    r1 = &ctx->sfmt[N - 2];
-    r2 = &ctx->sfmt[N - 1];
-    for (i = 0; i < N - POS1; i++) {
-	do_recursion(&array[i], &ctx->sfmt[i], &ctx->sfmt[i + POS1], r1, r2);
-	r1 = r2;
-	r2 = &array[i];
-    }
-    for (; i < N; i++) {
-	do_recursion(&array[i], &ctx->sfmt[i], &array[i + POS1 - N], r1, r2);
-	r1 = r2;
-	r2 = &array[i];
-    }
-    for (; i < size - N; i++) {
-	do_recursion(&array[i], &array[i - N], &array[i + POS1 - N], r1, r2);
-	r1 = r2;
-	r2 = &array[i];
-    }
-    for (j = 0; j < 2 * N - size; j++) {
-	ctx->sfmt[j] = array[j + size - N];
-    }
-    for (; i < size; i++, j++) {
-	do_recursion(&array[i], &array[i - N], &array[i + POS1 - N], r1, r2);
-	r1 = r2;
-	r2 = &array[i];
-	ctx->sfmt[j] = array[i];
-    }
+	r1 = &ctx->sfmt[N - 2];
+	r2 = &ctx->sfmt[N - 1];
+	for (i = 0; i < N - POS1; i++) {
+		do_recursion(
+		    &array[i], &ctx->sfmt[i], &ctx->sfmt[i + POS1], r1, r2);
+		r1 = r2;
+		r2 = &array[i];
+	}
+	for (; i < N; i++) {
+		do_recursion(
+		    &array[i], &ctx->sfmt[i], &array[i + POS1 - N], r1, r2);
+		r1 = r2;
+		r2 = &array[i];
+	}
+	for (; i < size - N; i++) {
+		do_recursion(
+		    &array[i], &array[i - N], &array[i + POS1 - N], r1, r2);
+		r1 = r2;
+		r2 = &array[i];
+	}
+	for (j = 0; j < 2 * N - size; j++) {
+		ctx->sfmt[j] = array[j + size - N];
+	}
+	for (; i < size; i++, j++) {
+		do_recursion(
+		    &array[i], &array[i - N], &array[i + POS1 - N], r1, r2);
+		r1 = r2;
+		r2 = &array[i];
+		ctx->sfmt[j] = array[i];
+	}
 }
 #endif
 
 #if defined(BIG_ENDIAN64) && !defined(ONLY64) && !defined(HAVE_ALTIVEC)
-static inline void swap(w128_t *array, int size) {
-    int i;
-    uint32_t x, y;
+static inline void
+swap(w128_t *array, int size) {
+	int      i;
+	uint32_t x, y;
 
-    for (i = 0; i < size; i++) {
-	x = array[i].u[0];
-	y = array[i].u[2];
-	array[i].u[0] = array[i].u[1];
-	array[i].u[2] = array[i].u[3];
-	array[i].u[1] = x;
-	array[i].u[3] = y;
-    }
+	for (i = 0; i < size; i++) {
+		x = array[i].u[0];
+		y = array[i].u[2];
+		array[i].u[0] = array[i].u[1];
+		array[i].u[2] = array[i].u[3];
+		array[i].u[1] = x;
+		array[i].u[3] = y;
+	}
 }
 #endif
 /**
@@ -363,8 +376,9 @@ static inline void swap(w128_t *array, int size) {
  * @param x 32-bit integer
  * @return 32-bit integer
  */
-static uint32_t func1(uint32_t x) {
-    return (x ^ (x >> 27)) * (uint32_t)1664525UL;
+static uint32_t
+func1(uint32_t x) {
+	return (x ^ (x >> 27)) * (uint32_t)1664525UL;
 }
 
 /**
@@ -373,39 +387,41 @@ static uint32_t func1(uint32_t x) {
  * @param x 32-bit integer
  * @return 32-bit integer
  */
-static uint32_t func2(uint32_t x) {
-    return (x ^ (x >> 27)) * (uint32_t)1566083941UL;
+static uint32_t
+func2(uint32_t x) {
+	return (x ^ (x >> 27)) * (uint32_t)1566083941UL;
 }
 
 /**
  * This function certificate the period of 2^{MEXP}
  */
-static void period_certification(sfmt_t *ctx) {
-    int inner = 0;
-    int i, j;
-    uint32_t work;
-    uint32_t *psfmt32 = &ctx->sfmt[0].u[0];
+static void
+period_certification(sfmt_t *ctx) {
+	int       inner = 0;
+	int       i, j;
+	uint32_t  work;
+	uint32_t *psfmt32 = &ctx->sfmt[0].u[0];
 
-    for (i = 0; i < 4; i++)
-	inner ^= psfmt32[idxof(i)] & parity[i];
-    for (i = 16; i > 0; i >>= 1)
-	inner ^= inner >> i;
-    inner &= 1;
-    /* check OK */
-    if (inner == 1) {
-	return;
-    }
-    /* check NG, and modification */
-    for (i = 0; i < 4; i++) {
-	work = 1;
-	for (j = 0; j < 32; j++) {
-	    if ((work & parity[i]) != 0) {
-		psfmt32[idxof(i)] ^= work;
+	for (i = 0; i < 4; i++)
+		inner ^= psfmt32[idxof(i)] & parity[i];
+	for (i = 16; i > 0; i >>= 1)
+		inner ^= inner >> i;
+	inner &= 1;
+	/* check OK */
+	if (inner == 1) {
 		return;
-	    }
-	    work = work << 1;
 	}
-    }
+	/* check NG, and modification */
+	for (i = 0; i < 4; i++) {
+		work = 1;
+		for (j = 0; j < 32; j++) {
+			if ((work & parity[i]) != 0) {
+				psfmt32[idxof(i)] ^= work;
+				return;
+			}
+			work = work << 1;
+		}
+	}
 }
 
 /*----------------
@@ -416,8 +432,9 @@ static void period_certification(sfmt_t *ctx) {
  * The string shows the word size, the Mersenne exponent,
  * and all parameters of this generator.
  */
-const char *get_idstring(void) {
-    return IDSTR;
+const char *
+get_idstring(void) {
+	return IDSTR;
 }
 
 /**
@@ -425,8 +442,9 @@ const char *get_idstring(void) {
  * fill_array32() function.
  * @return minimum size of array used for fill_array32() function.
  */
-int get_min_array_size32(void) {
-    return N32;
+int
+get_min_array_size32(void) {
+	return N32;
 }
 
 /**
@@ -434,8 +452,9 @@ int get_min_array_size32(void) {
  * fill_array64() function.
  * @return minimum size of array used for fill_array64() function.
  */
-int get_min_array_size64(void) {
-    return N64;
+int
+get_min_array_size64(void) {
+	return N64;
 }
 
 #ifndef ONLY64
@@ -444,32 +463,34 @@ int get_min_array_size64(void) {
  * init_gen_rand or init_by_array must be called before this function.
  * @return 32-bit pseudorandom number
  */
-uint32_t gen_rand32(sfmt_t *ctx) {
-    uint32_t r;
-    uint32_t *psfmt32 = &ctx->sfmt[0].u[0];
+uint32_t
+gen_rand32(sfmt_t *ctx) {
+	uint32_t  r;
+	uint32_t *psfmt32 = &ctx->sfmt[0].u[0];
 
-    assert(ctx->initialized);
-    if (ctx->idx >= N32) {
-	gen_rand_all(ctx);
-	ctx->idx = 0;
-    }
-    r = psfmt32[ctx->idx++];
-    return r;
+	assert(ctx->initialized);
+	if (ctx->idx >= N32) {
+		gen_rand_all(ctx);
+		ctx->idx = 0;
+	}
+	r = psfmt32[ctx->idx++];
+	return r;
 }
 
 /* Generate a random integer in [0..limit). */
-uint32_t gen_rand32_range(sfmt_t *ctx, uint32_t limit) {
-    uint32_t ret, above;
+uint32_t
+gen_rand32_range(sfmt_t *ctx, uint32_t limit) {
+	uint32_t ret, above;
 
-    above = 0xffffffffU - (0xffffffffU % limit);
-    while (1) {
-	ret = gen_rand32(ctx);
-	if (ret < above) {
-	    ret %= limit;
-	    break;
+	above = 0xffffffffU - (0xffffffffU % limit);
+	while (1) {
+		ret = gen_rand32(ctx);
+		if (ret < above) {
+			ret %= limit;
+			break;
+		}
 	}
-    }
-    return ret;
+	return ret;
 }
 #endif
 /**
@@ -479,47 +500,49 @@ uint32_t gen_rand32_range(sfmt_t *ctx, uint32_t limit) {
  * unless an initialization is again executed.
  * @return 64-bit pseudorandom number
  */
-uint64_t gen_rand64(sfmt_t *ctx) {
+uint64_t
+gen_rand64(sfmt_t *ctx) {
 #if defined(BIG_ENDIAN64) && !defined(ONLY64)
-    uint32_t r1, r2;
-    uint32_t *psfmt32 = &ctx->sfmt[0].u[0];
+	uint32_t  r1, r2;
+	uint32_t *psfmt32 = &ctx->sfmt[0].u[0];
 #else
-    uint64_t r;
-    uint64_t *psfmt64 = (uint64_t *)&ctx->sfmt[0].u[0];
+	uint64_t  r;
+	uint64_t *psfmt64 = (uint64_t *)&ctx->sfmt[0].u[0];
 #endif
 
-    assert(ctx->initialized);
-    assert(ctx->idx % 2 == 0);
+	assert(ctx->initialized);
+	assert(ctx->idx % 2 == 0);
 
-    if (ctx->idx >= N32) {
-	gen_rand_all(ctx);
-	ctx->idx = 0;
-    }
+	if (ctx->idx >= N32) {
+		gen_rand_all(ctx);
+		ctx->idx = 0;
+	}
 #if defined(BIG_ENDIAN64) && !defined(ONLY64)
-    r1 = psfmt32[ctx->idx];
-    r2 = psfmt32[ctx->idx + 1];
-    ctx->idx += 2;
-    return ((uint64_t)r2 << 32) | r1;
+	r1 = psfmt32[ctx->idx];
+	r2 = psfmt32[ctx->idx + 1];
+	ctx->idx += 2;
+	return ((uint64_t)r2 << 32) | r1;
 #else
-    r = psfmt64[ctx->idx / 2];
-    ctx->idx += 2;
-    return r;
+	r = psfmt64[ctx->idx / 2];
+	ctx->idx += 2;
+	return r;
 #endif
 }
 
 /* Generate a random integer in [0..limit). */
-uint64_t gen_rand64_range(sfmt_t *ctx, uint64_t limit) {
-    uint64_t ret, above;
+uint64_t
+gen_rand64_range(sfmt_t *ctx, uint64_t limit) {
+	uint64_t ret, above;
 
-    above = KQU(0xffffffffffffffff) - (KQU(0xffffffffffffffff) % limit);
-    while (1) {
-	ret = gen_rand64(ctx);
-	if (ret < above) {
-	    ret %= limit;
-	    break;
+	above = KQU(0xffffffffffffffff) - (KQU(0xffffffffffffffff) % limit);
+	while (1) {
+		ret = gen_rand64(ctx);
+		if (ret < above) {
+			ret %= limit;
+			break;
+		}
 	}
-    }
-    return ret;
+	return ret;
 }
 
 #ifndef ONLY64
@@ -548,14 +571,15 @@ uint64_t gen_rand64_range(sfmt_t *ctx, uint64_t limit) {
  * memory. Mac OSX doesn't have these functions, but \b malloc of OSX
  * returns the pointer to the aligned memory block.
  */
-void fill_array32(sfmt_t *ctx, uint32_t *array, int size) {
-    assert(ctx->initialized);
-    assert(ctx->idx == N32);
-    assert(size % 4 == 0);
-    assert(size >= N32);
+void
+fill_array32(sfmt_t *ctx, uint32_t *array, int size) {
+	assert(ctx->initialized);
+	assert(ctx->idx == N32);
+	assert(size % 4 == 0);
+	assert(size >= N32);
 
-    gen_rand_array(ctx, (w128_t *)array, size / 4);
-    ctx->idx = N32;
+	gen_rand_array(ctx, (w128_t *)array, size / 4);
+	ctx->idx = N32;
 }
 #endif
 
@@ -584,17 +608,18 @@ void fill_array32(sfmt_t *ctx, uint32_t *array, int size) {
  * memory. Mac OSX doesn't have these functions, but \b malloc of OSX
  * returns the pointer to the aligned memory block.
  */
-void fill_array64(sfmt_t *ctx, uint64_t *array, int size) {
-    assert(ctx->initialized);
-    assert(ctx->idx == N32);
-    assert(size % 2 == 0);
-    assert(size >= N64);
+void
+fill_array64(sfmt_t *ctx, uint64_t *array, int size) {
+	assert(ctx->initialized);
+	assert(ctx->idx == N32);
+	assert(size % 2 == 0);
+	assert(size >= N64);
 
-    gen_rand_array(ctx, (w128_t *)array, size / 2);
-    ctx->idx = N32;
+	gen_rand_array(ctx, (w128_t *)array, size / 2);
+	ctx->idx = N32;
 
 #if defined(BIG_ENDIAN64) && !defined(ONLY64)
-    swap((w128_t *)array, size /2);
+	swap((w128_t *)array, size / 2);
 #endif
 }
 
@@ -604,29 +629,31 @@ void fill_array64(sfmt_t *ctx, uint64_t *array, int size) {
  *
  * @param seed a 32-bit integer used as the seed.
  */
-sfmt_t *init_gen_rand(uint32_t seed) {
-    void *p;
-    sfmt_t *ctx;
-    int i;
-    uint32_t *psfmt32;
+sfmt_t *
+init_gen_rand(uint32_t seed) {
+	void     *p;
+	sfmt_t   *ctx;
+	int       i;
+	uint32_t *psfmt32;
 
-    if (posix_memalign(&p, sizeof(w128_t), sizeof(sfmt_t)) != 0) {
-	return NULL;
-    }
-    ctx = (sfmt_t *)p;
-    psfmt32 = &ctx->sfmt[0].u[0];
+	if (posix_memalign(&p, sizeof(w128_t), sizeof(sfmt_t)) != 0) {
+		return NULL;
+	}
+	ctx = (sfmt_t *)p;
+	psfmt32 = &ctx->sfmt[0].u[0];
 
-    psfmt32[idxof(0)] = seed;
-    for (i = 1; i < N32; i++) {
-	psfmt32[idxof(i)] = 1812433253UL * (psfmt32[idxof(i - 1)]
-					    ^ (psfmt32[idxof(i - 1)] >> 30))
-	    + i;
-    }
-    ctx->idx = N32;
-    period_certification(ctx);
-    ctx->initialized = 1;
+	psfmt32[idxof(0)] = seed;
+	for (i = 1; i < N32; i++) {
+		psfmt32[idxof(i)] = 1812433253UL
+		        * (psfmt32[idxof(i - 1)]
+		            ^ (psfmt32[idxof(i - 1)] >> 30))
+		    + i;
+	}
+	ctx->idx = N32;
+	period_certification(ctx);
+	ctx->initialized = 1;
 
-    return ctx;
+	return ctx;
 }
 
 /**
@@ -635,85 +662,87 @@ sfmt_t *init_gen_rand(uint32_t seed) {
  * @param init_key the array of 32-bit integers, used as a seed.
  * @param key_length the length of init_key.
  */
-sfmt_t *init_by_array(uint32_t *init_key, int key_length) {
-    void *p;
-    sfmt_t *ctx;
-    int i, j, count;
-    uint32_t r;
-    int lag;
-    int mid;
-    int size = N * 4;
-    uint32_t *psfmt32;
+sfmt_t *
+init_by_array(uint32_t *init_key, int key_length) {
+	void     *p;
+	sfmt_t   *ctx;
+	int       i, j, count;
+	uint32_t  r;
+	int       lag;
+	int       mid;
+	int       size = N * 4;
+	uint32_t *psfmt32;
 
-    if (posix_memalign(&p, sizeof(w128_t), sizeof(sfmt_t)) != 0) {
-	return NULL;
-    }
-    ctx = (sfmt_t *)p;
-    psfmt32 = &ctx->sfmt[0].u[0];
+	if (posix_memalign(&p, sizeof(w128_t), sizeof(sfmt_t)) != 0) {
+		return NULL;
+	}
+	ctx = (sfmt_t *)p;
+	psfmt32 = &ctx->sfmt[0].u[0];
 
-    if (size >= 623) {
-	lag = 11;
-    } else if (size >= 68) {
-	lag = 7;
-    } else if (size >= 39) {
-	lag = 5;
-    } else {
-	lag = 3;
-    }
-    mid = (size - lag) / 2;
+	if (size >= 623) {
+		lag = 11;
+	} else if (size >= 68) {
+		lag = 7;
+	} else if (size >= 39) {
+		lag = 5;
+	} else {
+		lag = 3;
+	}
+	mid = (size - lag) / 2;
 
-    memset(ctx->sfmt, 0x8b, sizeof(ctx->sfmt));
-    if (key_length + 1 > N32) {
-	count = key_length + 1;
-    } else {
-	count = N32;
-    }
-    r = func1(psfmt32[idxof(0)] ^ psfmt32[idxof(mid)]
-	      ^ psfmt32[idxof(N32 - 1)]);
-    psfmt32[idxof(mid)] += r;
-    r += key_length;
-    psfmt32[idxof(mid + lag)] += r;
-    psfmt32[idxof(0)] = r;
+	memset(ctx->sfmt, 0x8b, sizeof(ctx->sfmt));
+	if (key_length + 1 > N32) {
+		count = key_length + 1;
+	} else {
+		count = N32;
+	}
+	r = func1(
+	    psfmt32[idxof(0)] ^ psfmt32[idxof(mid)] ^ psfmt32[idxof(N32 - 1)]);
+	psfmt32[idxof(mid)] += r;
+	r += key_length;
+	psfmt32[idxof(mid + lag)] += r;
+	psfmt32[idxof(0)] = r;
 
-    count--;
-    for (i = 1, j = 0; (j < count) && (j < key_length); j++) {
-	r = func1(psfmt32[idxof(i)] ^ psfmt32[idxof((i + mid) % N32)]
-		  ^ psfmt32[idxof((i + N32 - 1) % N32)]);
-	psfmt32[idxof((i + mid) % N32)] += r;
-	r += init_key[j] + i;
-	psfmt32[idxof((i + mid + lag) % N32)] += r;
-	psfmt32[idxof(i)] = r;
-	i = (i + 1) % N32;
-    }
-    for (; j < count; j++) {
-	r = func1(psfmt32[idxof(i)] ^ psfmt32[idxof((i + mid) % N32)]
-		  ^ psfmt32[idxof((i + N32 - 1) % N32)]);
-	psfmt32[idxof((i + mid) % N32)] += r;
-	r += i;
-	psfmt32[idxof((i + mid + lag) % N32)] += r;
-	psfmt32[idxof(i)] = r;
-	i = (i + 1) % N32;
-    }
-    for (j = 0; j < N32; j++) {
-	r = func2(psfmt32[idxof(i)] + psfmt32[idxof((i + mid) % N32)]
-		  + psfmt32[idxof((i + N32 - 1) % N32)]);
-	psfmt32[idxof((i + mid) % N32)] ^= r;
-	r -= i;
-	psfmt32[idxof((i + mid + lag) % N32)] ^= r;
-	psfmt32[idxof(i)] = r;
-	i = (i + 1) % N32;
-    }
+	count--;
+	for (i = 1, j = 0; (j < count) && (j < key_length); j++) {
+		r = func1(psfmt32[idxof(i)] ^ psfmt32[idxof((i + mid) % N32)]
+		    ^ psfmt32[idxof((i + N32 - 1) % N32)]);
+		psfmt32[idxof((i + mid) % N32)] += r;
+		r += init_key[j] + i;
+		psfmt32[idxof((i + mid + lag) % N32)] += r;
+		psfmt32[idxof(i)] = r;
+		i = (i + 1) % N32;
+	}
+	for (; j < count; j++) {
+		r = func1(psfmt32[idxof(i)] ^ psfmt32[idxof((i + mid) % N32)]
+		    ^ psfmt32[idxof((i + N32 - 1) % N32)]);
+		psfmt32[idxof((i + mid) % N32)] += r;
+		r += i;
+		psfmt32[idxof((i + mid + lag) % N32)] += r;
+		psfmt32[idxof(i)] = r;
+		i = (i + 1) % N32;
+	}
+	for (j = 0; j < N32; j++) {
+		r = func2(psfmt32[idxof(i)] + psfmt32[idxof((i + mid) % N32)]
+		    + psfmt32[idxof((i + N32 - 1) % N32)]);
+		psfmt32[idxof((i + mid) % N32)] ^= r;
+		r -= i;
+		psfmt32[idxof((i + mid + lag) % N32)] ^= r;
+		psfmt32[idxof(i)] = r;
+		i = (i + 1) % N32;
+	}
 
-    ctx->idx = N32;
-    period_certification(ctx);
-    ctx->initialized = 1;
+	ctx->idx = N32;
+	period_certification(ctx);
+	ctx->initialized = 1;
 
-    return ctx;
+	return ctx;
 }
 
-void fini_gen_rand(sfmt_t *ctx) {
-    assert(ctx != NULL);
+void
+fini_gen_rand(sfmt_t *ctx) {
+	assert(ctx != NULL);
 
-    ctx->initialized = 0;
-    free(ctx);
+	ctx->initialized = 0;
+	free(ctx);
 }
diff --git a/test/src/mtx.c b/test/src/mtx.c
index 6cb3ecd5..05c922bf 100644
--- a/test/src/mtx.c
+++ b/test/src/mtx.c
@@ -1,14 +1,14 @@
 #include "test/jemalloc_test.h"
 
 #if defined(_WIN32) && !defined(_CRT_SPINCOUNT)
-#define _CRT_SPINCOUNT 4000
+#	define _CRT_SPINCOUNT 4000
 #endif
 
 bool
 mtx_init(mtx_t *mtx) {
 #ifdef _WIN32
-	if (!InitializeCriticalSectionAndSpinCount(&mtx->lock,
-	    _CRT_SPINCOUNT)) {
+	if (!InitializeCriticalSectionAndSpinCount(
+	        &mtx->lock, _CRT_SPINCOUNT)) {
 		return true;
 	}
 #elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
diff --git a/test/src/sleep.c b/test/src/sleep.c
index 2234b4bc..96b9b7bf 100644
--- a/test/src/sleep.c
+++ b/test/src/sleep.c
@@ -6,7 +6,7 @@
  */
 void
 sleep_ns(unsigned ns) {
-	assert(ns <= 1000*1000*1000);
+	assert(ns <= 1000 * 1000 * 1000);
 
 #ifdef _WIN32
 	Sleep(ns / 1000 / 1000);
@@ -14,7 +14,7 @@ sleep_ns(unsigned ns) {
 	{
 		struct timespec timeout;
 
-		if (ns < 1000*1000*1000) {
+		if (ns < 1000 * 1000 * 1000) {
 			timeout.tv_sec = 0;
 			timeout.tv_nsec = ns;
 		} else {
diff --git a/test/src/test.c b/test/src/test.c
index a21356d5..6eb84338 100644
--- a/test/src/test.c
+++ b/test/src/test.c
@@ -2,10 +2,10 @@
 
 /* Test status state. */
 
-static unsigned		test_count = 0;
-static test_status_t	test_counts[test_status_count] = {0, 0, 0};
-static test_status_t	test_status = test_status_pass;
-static const char *	test_name = "";
+static unsigned      test_count = 0;
+static test_status_t test_counts[test_status_count] = {0, 0, 0};
+static test_status_t test_status = test_status_pass;
+static const char   *test_name = "";
 
 /* Reentrancy testing helpers. */
 
@@ -89,10 +89,14 @@ test_fail(const char *format, ...) {
 static const char *
 test_status_string(test_status_t current_status) {
 	switch (current_status) {
-	case test_status_pass: return "pass";
-	case test_status_skip: return "skip";
-	case test_status_fail: return "fail";
-	default: not_reached();
+	case test_status_pass:
+		return "pass";
+	case test_status_skip:
+		return "skip";
+	case test_status_fail:
+		return "fail";
+	default:
+		not_reached();
 	}
 }
 
@@ -173,19 +177,16 @@ p_test_impl(bool do_malloc_init, bool do_reentrant, test_t *t, va_list ap) {
 		}
 	}
 
-	bool colored = test_counts[test_status_fail] != 0 &&
-	    isatty(STDERR_FILENO);
+	bool colored = test_counts[test_status_fail] != 0
+	    && isatty(STDERR_FILENO);
 	const char *color_start = colored ? "\033[1;31m" : "";
 	const char *color_end = colored ? "\033[0m" : "";
 	malloc_printf("%s--- %s: %u/%u, %s: %u/%u, %s: %u/%u ---\n%s",
-	    color_start,
-	    test_status_string(test_status_pass),
+	    color_start, test_status_string(test_status_pass),
 	    test_counts[test_status_pass], test_count,
-	    test_status_string(test_status_skip),
-	    test_counts[test_status_skip], test_count,
-	    test_status_string(test_status_fail),
-	    test_counts[test_status_fail], test_count,
-	    color_end);
+	    test_status_string(test_status_skip), test_counts[test_status_skip],
+	    test_count, test_status_string(test_status_fail),
+	    test_counts[test_status_fail], test_count, color_end);
 
 	return ret;
 }
@@ -193,7 +194,7 @@ p_test_impl(bool do_malloc_init, bool do_reentrant, test_t *t, va_list ap) {
 test_status_t
 p_test(test_t *t, ...) {
 	test_status_t ret;
-	va_list ap;
+	va_list       ap;
 
 	ret = test_status_pass;
 	va_start(ap, t);
@@ -206,7 +207,7 @@ p_test(test_t *t, ...) {
 test_status_t
 p_test_no_reentrancy(test_t *t, ...) {
 	test_status_t ret;
-	va_list ap;
+	va_list       ap;
 
 	ret = test_status_pass;
 	va_start(ap, t);
@@ -219,7 +220,7 @@ p_test_no_reentrancy(test_t *t, ...) {
 test_status_t
 p_test_no_malloc_init(test_t *t, ...) {
 	test_status_t ret;
-	va_list ap;
+	va_list       ap;
 
 	ret = test_status_pass;
 	va_start(ap, t);
@@ -235,12 +236,12 @@ p_test_no_malloc_init(test_t *t, ...) {
 
 void
 p_test_fail(bool may_abort, const char *prefix, const char *message) {
-	bool colored = test_counts[test_status_fail] != 0 &&
-	    isatty(STDERR_FILENO);
+	bool colored = test_counts[test_status_fail] != 0
+	    && isatty(STDERR_FILENO);
 	const char *color_start = colored ? "\033[1;31m" : "";
 	const char *color_end = colored ? "\033[0m" : "";
-	malloc_cprintf(NULL, NULL, "%s%s%s\n%s", color_start, prefix, message,
-	    color_end);
+	malloc_cprintf(
+	    NULL, NULL, "%s%s%s\n%s", color_start, prefix, message, color_end);
 	test_status = test_status_fail;
 	if (may_abort) {
 		abort();
diff --git a/test/src/thd.c b/test/src/thd.c
index 8f91a595..634dc262 100644
--- a/test/src/thd.c
+++ b/test/src/thd.c
@@ -14,7 +14,7 @@ void
 thd_join(thd_t thd, void **ret) {
 	if (WaitForSingleObject(thd, INFINITE) == WAIT_OBJECT_0 && ret) {
 		DWORD exit_code;
-		GetExitCodeThread(thd, (LPDWORD) &exit_code);
+		GetExitCodeThread(thd, (LPDWORD)&exit_code);
 		*ret = (void *)(uintptr_t)exit_code;
 	}
 }
@@ -44,7 +44,8 @@ thd_setname(const char *name) {
 
 bool
 thd_has_setname(void) {
-#if defined(JEMALLOC_HAVE_PTHREAD_SETNAME_NP) || defined(JEMALLOC_HAVE_PTHREAD_SET_NAME_NP)
+#if defined(JEMALLOC_HAVE_PTHREAD_SETNAME_NP)                                  \
+    || defined(JEMALLOC_HAVE_PTHREAD_SET_NAME_NP)
 	return true;
 #else
 	return false;
diff --git a/test/src/timer.c b/test/src/timer.c
index 94528a34..017bf5a5 100644
--- a/test/src/timer.c
+++ b/test/src/timer.c
@@ -25,8 +25,8 @@ timer_ratio(timedelta_t *a, timedelta_t *b, char *buf, size_t buflen) {
 	uint64_t t0 = timer_usec(a);
 	uint64_t t1 = timer_usec(b);
 	uint64_t mult;
-	size_t i = 0;
-	size_t j, n;
+	size_t   i = 0;
+	size_t   j, n;
 
 	/*
  	* The time difference could be 0 if the two clock readings are
@@ -36,11 +36,11 @@ timer_ratio(timedelta_t *a, timedelta_t *b, char *buf, size_t buflen) {
  	* Thus, bump t1 if it is 0 to avoid dividing 0.
  	*/
 	if (t1 == 0) {
-	    t1 = 1;
+		t1 = 1;
 	}
 
 	/* Whole. */
-	n = malloc_snprintf(&buf[i], buflen-i, "%"FMTu64, t0 / t1);
+	n = malloc_snprintf(&buf[i], buflen - i, "%" FMTu64, t0 / t1);
 	i += n;
 	if (i >= buflen) {
 		return;
@@ -51,15 +51,17 @@ timer_ratio(timedelta_t *a, timedelta_t *b, char *buf, size_t buflen) {
 	}
 
 	/* Decimal. */
-	n = malloc_snprintf(&buf[i], buflen-i, ".");
+	n = malloc_snprintf(&buf[i], buflen - i, ".");
 	i += n;
 
 	/* Fraction. */
-	while (i < buflen-1) {
-		uint64_t round = (i+1 == buflen-1 && ((t0 * mult * 10 / t1) % 10
-		    >= 5)) ? 1 : 0;
-		n = malloc_snprintf(&buf[i], buflen-i,
-		    "%"FMTu64, (t0 * mult / t1) % 10 + round);
+	while (i < buflen - 1) {
+		uint64_t round = (i + 1 == buflen - 1
+		                     && ((t0 * mult * 10 / t1) % 10 >= 5))
+		    ? 1
+		    : 0;
+		n = malloc_snprintf(&buf[i], buflen - i, "%" FMTu64,
+		    (t0 * mult / t1) % 10 + round);
 		i += n;
 		mult *= 10;
 	}
diff --git a/test/stress/batch_alloc.c b/test/stress/batch_alloc.c
index 6b973bb1..46ed0bf7 100644
--- a/test/stress/batch_alloc.c
+++ b/test/stress/batch_alloc.c
@@ -10,9 +10,9 @@ static size_t miblen = MIBLEN;
 #define HUGE_BATCH (1000 * 1000)
 #define HUGE_BATCH_ITER 100
 #define LEN (100 * 1000 * 1000)
-static void *batch_ptrs[LEN];
+static void  *batch_ptrs[LEN];
 static size_t batch_ptrs_next = 0;
-static void *item_ptrs[LEN];
+static void  *item_ptrs[LEN];
 static size_t item_ptrs_next = 0;
 
 #define SIZE 7
@@ -22,17 +22,18 @@ struct batch_alloc_packet_s {
 	void **ptrs;
 	size_t num;
 	size_t size;
-	int flags;
+	int    flags;
 };
 
 static void
 batch_alloc_wrapper(size_t batch) {
-	batch_alloc_packet_t batch_alloc_packet =
-	    {batch_ptrs + batch_ptrs_next, batch, SIZE, 0};
+	batch_alloc_packet_t batch_alloc_packet = {
+	    batch_ptrs + batch_ptrs_next, batch, SIZE, 0};
 	size_t filled;
 	size_t len = sizeof(size_t);
 	assert_d_eq(mallctlbymib(mib, miblen, &filled, &len,
-	    &batch_alloc_packet, sizeof(batch_alloc_packet)), 0, "");
+	                &batch_alloc_packet, sizeof(batch_alloc_packet)),
+	    0, "");
 	assert_zu_eq(filled, batch, "");
 }
 
@@ -94,9 +95,9 @@ compare_without_free(size_t batch, size_t iter,
 	batch_ptrs_next = 0;
 	release_and_clear(item_ptrs, item_ptrs_next);
 	item_ptrs_next = 0;
-	compare_funcs(0, iter,
-	    "batch allocation", batch_alloc_without_free_func,
-	    "item allocation", item_alloc_without_free_func);
+	compare_funcs(0, iter, "batch allocation",
+	    batch_alloc_without_free_func, "item allocation",
+	    item_alloc_without_free_func);
 	release_and_clear(batch_ptrs, batch_ptrs_next);
 	batch_ptrs_next = 0;
 	release_and_clear(item_ptrs, item_ptrs_next);
@@ -116,8 +117,7 @@ compare_with_free(size_t batch, size_t iter,
 	}
 	batch_ptrs_next = 0;
 	item_ptrs_next = 0;
-	compare_funcs(0, iter,
-	    "batch allocation", batch_alloc_with_free_func,
+	compare_funcs(0, iter, "batch allocation", batch_alloc_with_free_func,
 	    "item allocation", item_alloc_with_free_func);
 	batch_ptrs_next = 0;
 	item_ptrs_next = 0;
@@ -187,12 +187,11 @@ TEST_BEGIN(test_huge_batch_with_free) {
 }
 TEST_END
 
-int main(void) {
-	assert_d_eq(mallctlnametomib("experimental.batch_alloc", mib, &miblen),
-	    0, "");
-	return test_no_reentrancy(
-	    test_tiny_batch_without_free,
-	    test_tiny_batch_with_free,
-	    test_huge_batch_without_free,
+int
+main(void) {
+	assert_d_eq(
+	    mallctlnametomib("experimental.batch_alloc", mib, &miblen), 0, "");
+	return test_no_reentrancy(test_tiny_batch_without_free,
+	    test_tiny_batch_with_free, test_huge_batch_without_free,
 	    test_huge_batch_with_free);
 }
diff --git a/test/stress/cpp/microbench.cpp b/test/stress/cpp/microbench.cpp
index 7422d1ca..0c4697a6 100644
--- a/test/stress/cpp/microbench.cpp
+++ b/test/stress/cpp/microbench.cpp
@@ -3,7 +3,7 @@
 
 static void
 malloc_free(void) {
-	void* p = malloc(1);
+	void *p = malloc(1);
 	expect_ptr_not_null((void *)p, "Unexpected malloc failure");
 	p = no_opt_ptr(p);
 	free((void *)p);
@@ -11,7 +11,7 @@ malloc_free(void) {
 
 static void
 new_delete(void) {
-	void* p = ::operator new(1);
+	void *p = ::operator new(1);
 	expect_ptr_not_null((void *)p, "Unexpected new failure");
 	p = no_opt_ptr(p);
 	::operator delete((void *)p);
@@ -19,7 +19,7 @@ new_delete(void) {
 
 static void
 malloc_free_array(void) {
-	void* p = malloc(sizeof(int)*8);
+	void *p = malloc(sizeof(int) * 8);
 	expect_ptr_not_null((void *)p, "Unexpected malloc failure");
 	p = no_opt_ptr(p);
 	free((void *)p);
@@ -27,7 +27,7 @@ malloc_free_array(void) {
 
 static void
 new_delete_array(void) {
-	int* p = new int[8];
+	int *p = new int[8];
 	expect_ptr_not_null((void *)p, "Unexpected new[] failure");
 	p = (int *)no_opt_ptr((void *)p);
 	delete[] (int *)p;
@@ -36,7 +36,7 @@ new_delete_array(void) {
 #if __cpp_sized_deallocation >= 201309
 static void
 new_sized_delete(void) {
-	void* p = ::operator new(1);
+	void *p = ::operator new(1);
 	expect_ptr_not_null((void *)p, "Unexpected new failure");
 	p = no_opt_ptr(p);
 	::operator delete((void *)p, 1);
@@ -44,45 +44,41 @@ new_sized_delete(void) {
 
 static void
 malloc_sdallocx(void) {
-	void* p = malloc(1);
+	void *p = malloc(1);
 	expect_ptr_not_null((void *)p, "Unexpected malloc failure");
 	p = no_opt_ptr(p);
-        sdallocx((void *)p, 1, 0);
+	sdallocx((void *)p, 1, 0);
 }
 #endif
 
 TEST_BEGIN(test_free_vs_delete) {
-	compare_funcs(10*1000*1000, 100*1000*1000,
-	    "malloc_free", (void *)malloc_free,
-	    "new_delete", (void *)new_delete);
+	compare_funcs(10 * 1000 * 1000, 100 * 1000 * 1000, "malloc_free",
+	    (void *)malloc_free, "new_delete", (void *)new_delete);
 }
 TEST_END
 
 TEST_BEGIN(test_free_array_vs_delete_array) {
-	compare_funcs(10*1000*1000, 100*1000*1000,
-	    "malloc_free_array", (void *)malloc_free_array,
-	    "delete_array", (void *)new_delete_array);
+	compare_funcs(10 * 1000 * 1000, 100 * 1000 * 1000, "malloc_free_array",
+	    (void *)malloc_free_array, "delete_array",
+	    (void *)new_delete_array);
 }
 TEST_END
 
-
 TEST_BEGIN(test_sized_delete_vs_sdallocx) {
 #if __cpp_sized_deallocation >= 201309
-	compare_funcs(10*1000*1000, 100*1000*1000,
-	    "new_size_delete", (void *)new_sized_delete,
-	    "malloc_sdallocx", (void *)malloc_sdallocx);
+	compare_funcs(10 * 1000 * 1000, 100 * 1000 * 1000, "new_size_delete",
+	    (void *)new_sized_delete, "malloc_sdallocx",
+	    (void *)malloc_sdallocx);
 #else
-	malloc_printf("Skipping test_sized_delete_vs_sdallocx since \
+	malloc_printf(
+	    "Skipping test_sized_delete_vs_sdallocx since \
 	    sized deallocation is not enabled.\n");
 #endif
 }
 TEST_END
 
-
 int
 main() {
-	return test_no_reentrancy(
-	    test_free_vs_delete,
-	    test_free_array_vs_delete_array,
-	    test_sized_delete_vs_sdallocx);
+	return test_no_reentrancy(test_free_vs_delete,
+	    test_free_array_vs_delete_array, test_sized_delete_vs_sdallocx);
 }
diff --git a/test/stress/fill_flush.c b/test/stress/fill_flush.c
index 546bcc0b..c7b13404 100644
--- a/test/stress/fill_flush.c
+++ b/test/stress/fill_flush.c
@@ -35,9 +35,9 @@ item_alloc_dalloc_small(void) {
 }
 
 TEST_BEGIN(test_array_vs_item_small) {
-	compare_funcs(1 * 1000, 10 * 1000,
-	    "array of small allocations", array_alloc_dalloc_small,
-	    "small item allocation", item_alloc_dalloc_small);
+	compare_funcs(1 * 1000, 10 * 1000, "array of small allocations",
+	    array_alloc_dalloc_small, "small item allocation",
+	    item_alloc_dalloc_small);
 }
 TEST_END
 
@@ -64,14 +64,14 @@ item_alloc_dalloc_large(void) {
 }
 
 TEST_BEGIN(test_array_vs_item_large) {
-	compare_funcs(100, 1000,
-	    "array of large allocations", array_alloc_dalloc_large,
-	    "large item allocation", item_alloc_dalloc_large);
+	compare_funcs(100, 1000, "array of large allocations",
+	    array_alloc_dalloc_large, "large item allocation",
+	    item_alloc_dalloc_large);
 }
 TEST_END
 
-int main(void) {
+int
+main(void) {
 	return test_no_reentrancy(
-	    test_array_vs_item_small,
-	    test_array_vs_item_large);
+	    test_array_vs_item_small, test_array_vs_item_large);
 }
diff --git a/test/stress/hookbench.c b/test/stress/hookbench.c
index 97e90b0e..455e4c56 100644
--- a/test/stress/hookbench.c
+++ b/test/stress/hookbench.c
@@ -2,19 +2,16 @@
 
 static void
 noop_alloc_hook(void *extra, hook_alloc_t type, void *result,
-    uintptr_t result_raw, uintptr_t args_raw[3]) {
-}
+    uintptr_t result_raw, uintptr_t args_raw[3]) {}
 
 static void
-noop_dalloc_hook(void *extra, hook_dalloc_t type, void *address,
-    uintptr_t args_raw[3]) {
-}
+noop_dalloc_hook(
+    void *extra, hook_dalloc_t type, void *address, uintptr_t args_raw[3]) {}
 
 static void
 noop_expand_hook(void *extra, hook_expand_t type, void *address,
     size_t old_usize, size_t new_usize, uintptr_t result_raw,
-    uintptr_t args_raw[4]) {
-}
+    uintptr_t args_raw[4]) {}
 
 static void
 malloc_free_loop(int iters) {
@@ -26,23 +23,23 @@ malloc_free_loop(int iters) {
 
 static void
 test_hooked(int iters) {
-	hooks_t hooks = {&noop_alloc_hook, &noop_dalloc_hook, &noop_expand_hook,
-		NULL};
+	hooks_t hooks = {
+	    &noop_alloc_hook, &noop_dalloc_hook, &noop_expand_hook, NULL};
 
-	int err;
-	void *handles[HOOK_MAX];
+	int    err;
+	void  *handles[HOOK_MAX];
 	size_t sz = sizeof(handles[0]);
 
 	for (int i = 0; i < HOOK_MAX; i++) {
-		err = mallctl("experimental.hooks.install", &handles[i],
-		    &sz, &hooks, sizeof(hooks));
+		err = mallctl("experimental.hooks.install", &handles[i], &sz,
+		    &hooks, sizeof(hooks));
 		assert(err == 0);
 
 		timedelta_t timer;
 		timer_start(&timer);
 		malloc_free_loop(iters);
 		timer_stop(&timer);
-		malloc_printf("With %d hook%s: %"FMTu64"us\n", i + 1,
+		malloc_printf("With %d hook%s: %" FMTu64 "us\n", i + 1,
 		    i + 1 == 1 ? "" : "s", timer_usec(&timer));
 	}
 	for (int i = 0; i < HOOK_MAX; i++) {
@@ -59,7 +56,7 @@ test_unhooked(int iters) {
 	malloc_free_loop(iters);
 	timer_stop(&timer);
 
-	malloc_printf("Without hooks: %"FMTu64"us\n", timer_usec(&timer));
+	malloc_printf("Without hooks: %" FMTu64 "us\n", timer_usec(&timer));
 }
 
 int
diff --git a/test/stress/large_microbench.c b/test/stress/large_microbench.c
index 44a60c53..785ed836 100644
--- a/test/stress/large_microbench.c
+++ b/test/stress/large_microbench.c
@@ -22,14 +22,12 @@ small_mallocx_free(void) {
 }
 
 TEST_BEGIN(test_large_vs_small) {
-	compare_funcs(100*1000, 1*1000*1000, "large mallocx",
+	compare_funcs(100 * 1000, 1 * 1000 * 1000, "large mallocx",
 	    large_mallocx_free, "small mallocx", small_mallocx_free);
 }
 TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_large_vs_small);
+	return test_no_reentrancy(test_large_vs_small);
 }
-
diff --git a/test/stress/mallctl.c b/test/stress/mallctl.c
index d29b3118..b4c0f560 100644
--- a/test/stress/mallctl.c
+++ b/test/stress/mallctl.c
@@ -4,8 +4,8 @@
 static void
 mallctl_short(void) {
 	const char *version;
-	size_t sz = sizeof(version);
-	int err = mallctl("version", &version, &sz, NULL, 0);
+	size_t      sz = sizeof(version);
+	int         err = mallctl("version", &version, &sz, NULL, 0);
 	assert_d_eq(err, 0, "mallctl failure");
 }
 
@@ -13,19 +13,19 @@ size_t mib_short[1];
 
 static void
 mallctlbymib_short(void) {
-	size_t miblen = sizeof(mib_short)/sizeof(mib_short[0]);
+	size_t      miblen = sizeof(mib_short) / sizeof(mib_short[0]);
 	const char *version;
-	size_t sz = sizeof(version);
+	size_t      sz = sizeof(version);
 	int err = mallctlbymib(mib_short, miblen, &version, &sz, NULL, 0);
 	assert_d_eq(err, 0, "mallctlbymib failure");
 }
 
 TEST_BEGIN(test_mallctl_vs_mallctlbymib_short) {
-	size_t miblen = sizeof(mib_short)/sizeof(mib_short[0]);
+	size_t miblen = sizeof(mib_short) / sizeof(mib_short[0]);
 
 	int err = mallctlnametomib("version", mib_short, &miblen);
 	assert_d_eq(err, 0, "mallctlnametomib failure");
-	compare_funcs(10*1000*1000, 10*1000*1000, "mallctl_short",
+	compare_funcs(10 * 1000 * 1000, 10 * 1000 * 1000, "mallctl_short",
 	    mallctl_short, "mallctlbymib_short", mallctlbymib_short);
 }
 TEST_END
@@ -33,9 +33,9 @@ TEST_END
 static void
 mallctl_long(void) {
 	uint64_t nmalloc;
-	size_t sz = sizeof(nmalloc);
-	int err = mallctl("stats.arenas.0.bins.0.nmalloc", &nmalloc, &sz, NULL,
-	    0);
+	size_t   sz = sizeof(nmalloc);
+	int      err = mallctl(
+            "stats.arenas.0.bins.0.nmalloc", &nmalloc, &sz, NULL, 0);
 	assert_d_eq(err, 0, "mallctl failure");
 }
 
@@ -43,10 +43,10 @@ size_t mib_long[6];
 
 static void
 mallctlbymib_long(void) {
-	size_t miblen = sizeof(mib_long)/sizeof(mib_long[0]);
+	size_t   miblen = sizeof(mib_long) / sizeof(mib_long[0]);
 	uint64_t nmalloc;
-	size_t sz = sizeof(nmalloc);
-	int err = mallctlbymib(mib_long, miblen, &nmalloc, &sz, NULL, 0);
+	size_t   sz = sizeof(nmalloc);
+	int      err = mallctlbymib(mib_long, miblen, &nmalloc, &sz, NULL, 0);
 	assert_d_eq(err, 0, "mallctlbymib failure");
 }
 
@@ -57,18 +57,17 @@ TEST_BEGIN(test_mallctl_vs_mallctlbymib_long) {
 	 */
 	test_skip_if(!config_stats);
 
-	size_t miblen = sizeof(mib_long)/sizeof(mib_long[0]);
-	int err = mallctlnametomib("stats.arenas.0.bins.0.nmalloc", mib_long,
-	    &miblen);
+	size_t miblen = sizeof(mib_long) / sizeof(mib_long[0]);
+	int    err = mallctlnametomib(
+            "stats.arenas.0.bins.0.nmalloc", mib_long, &miblen);
 	assert_d_eq(err, 0, "mallctlnametomib failure");
-	compare_funcs(10*1000*1000, 10*1000*1000, "mallctl_long",
+	compare_funcs(10 * 1000 * 1000, 10 * 1000 * 1000, "mallctl_long",
 	    mallctl_long, "mallctlbymib_long", mallctlbymib_long);
 }
 TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_mallctl_vs_mallctlbymib_short,
+	return test_no_reentrancy(test_mallctl_vs_mallctlbymib_short,
 	    test_mallctl_vs_mallctlbymib_long);
 }
diff --git a/test/stress/microbench.c b/test/stress/microbench.c
index 89479b7e..3d261a92 100644
--- a/test/stress/microbench.c
+++ b/test/stress/microbench.c
@@ -25,7 +25,7 @@ mallocx_free(void) {
 }
 
 TEST_BEGIN(test_malloc_vs_mallocx) {
-	compare_funcs(10*1000*1000, 100*1000*1000, "malloc",
+	compare_funcs(10 * 1000 * 1000, 100 * 1000 * 1000, "malloc",
 	    malloc_free, "mallocx", mallocx_free);
 }
 TEST_END
@@ -53,14 +53,14 @@ malloc_sdallocx(void) {
 }
 
 TEST_BEGIN(test_free_vs_dallocx) {
-	compare_funcs(10*1000*1000, 100*1000*1000, "free", malloc_free,
+	compare_funcs(10 * 1000 * 1000, 100 * 1000 * 1000, "free", malloc_free,
 	    "dallocx", malloc_dallocx);
 }
 TEST_END
 
 TEST_BEGIN(test_dallocx_vs_sdallocx) {
-	compare_funcs(10*1000*1000, 100*1000*1000, "dallocx", malloc_dallocx,
-	    "sdallocx", malloc_sdallocx);
+	compare_funcs(10 * 1000 * 1000, 100 * 1000 * 1000, "dallocx",
+	    malloc_dallocx, "sdallocx", malloc_sdallocx);
 }
 TEST_END
 
@@ -94,7 +94,7 @@ malloc_sallocx_free(void) {
 }
 
 TEST_BEGIN(test_mus_vs_sallocx) {
-	compare_funcs(10*1000*1000, 100*1000*1000, "malloc_usable_size",
+	compare_funcs(10 * 1000 * 1000, 100 * 1000 * 1000, "malloc_usable_size",
 	    malloc_mus_free, "sallocx", malloc_sallocx_free);
 }
 TEST_END
@@ -116,17 +116,14 @@ malloc_nallocx_free(void) {
 }
 
 TEST_BEGIN(test_sallocx_vs_nallocx) {
-	compare_funcs(10*1000*1000, 100*1000*1000, "sallocx",
+	compare_funcs(10 * 1000 * 1000, 100 * 1000 * 1000, "sallocx",
 	    malloc_sallocx_free, "nallocx", malloc_nallocx_free);
 }
 TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_malloc_vs_mallocx,
-	    test_free_vs_dallocx,
-	    test_dallocx_vs_sdallocx,
-	    test_mus_vs_sallocx,
+	return test_no_reentrancy(test_malloc_vs_mallocx, test_free_vs_dallocx,
+	    test_dallocx_vs_sdallocx, test_mus_vs_sallocx,
 	    test_sallocx_vs_nallocx);
 }
diff --git a/test/unit/SFMT.c b/test/unit/SFMT.c
index b9f85dd9..8dbb61ed 100644
--- a/test/unit/SFMT.c
+++ b/test/unit/SFMT.c
@@ -40,1424 +40,1343 @@
 #define COUNT_1 1000
 #define COUNT_2 700
 
-static const uint32_t init_gen_rand_32_expected[] = {
-	3440181298U, 1564997079U, 1510669302U, 2930277156U, 1452439940U,
-	3796268453U,  423124208U, 2143818589U, 3827219408U, 2987036003U,
-	2674978610U, 1536842514U, 2027035537U, 2534897563U, 1686527725U,
-	 545368292U, 1489013321U, 1370534252U, 4231012796U, 3994803019U,
-	1764869045U,  824597505U,  862581900U, 2469764249U,  812862514U,
-	 359318673U,  116957936U, 3367389672U, 2327178354U, 1898245200U,
-	3206507879U, 2378925033U, 1040214787U, 2524778605U, 3088428700U,
-	1417665896U,  964324147U, 2282797708U, 2456269299U,  313400376U,
-	2245093271U, 1015729427U, 2694465011U, 3246975184U, 1992793635U,
-	 463679346U, 3721104591U, 3475064196U,  856141236U, 1499559719U,
-	3522818941U, 3721533109U, 1954826617U, 1282044024U, 1543279136U,
-	1301863085U, 2669145051U, 4221477354U, 3896016841U, 3392740262U,
-	 462466863U, 1037679449U, 1228140306U,  922298197U, 1205109853U,
-	1872938061U, 3102547608U, 2742766808U, 1888626088U, 4028039414U,
-	 157593879U, 1136901695U, 4038377686U, 3572517236U, 4231706728U,
-	2997311961U, 1189931652U, 3981543765U, 2826166703U,   87159245U,
-	1721379072U, 3897926942U, 1790395498U, 2569178939U, 1047368729U,
-	2340259131U, 3144212906U, 2301169789U, 2442885464U, 3034046771U,
-	3667880593U, 3935928400U, 2372805237U, 1666397115U, 2460584504U,
-	 513866770U, 3810869743U, 2147400037U, 2792078025U, 2941761810U,
-	3212265810U,  984692259U,  346590253U, 1804179199U, 3298543443U,
-	 750108141U, 2880257022U,  243310542U, 1869036465U, 1588062513U,
-	2983949551U, 1931450364U, 4034505847U, 2735030199U, 1628461061U,
-	2539522841U,  127965585U, 3992448871U,  913388237U,  559130076U,
-	1202933193U, 4087643167U, 2590021067U, 2256240196U, 1746697293U,
-	1013913783U, 1155864921U, 2715773730U,  915061862U, 1948766573U,
-	2322882854U, 3761119102U, 1343405684U, 3078711943U, 3067431651U,
-	3245156316U, 3588354584U, 3484623306U, 3899621563U, 4156689741U,
-	3237090058U, 3880063844U,  862416318U, 4039923869U, 2303788317U,
-	3073590536U,  701653667U, 2131530884U, 3169309950U, 2028486980U,
-	 747196777U, 3620218225U,  432016035U, 1449580595U, 2772266392U,
-	 444224948U, 1662832057U, 3184055582U, 3028331792U, 1861686254U,
-	1104864179U,  342430307U, 1350510923U, 3024656237U, 1028417492U,
-	2870772950U,  290847558U, 3675663500U,  508431529U, 4264340390U,
-	2263569913U, 1669302976U,  519511383U, 2706411211U, 3764615828U,
-	3883162495U, 4051445305U, 2412729798U, 3299405164U, 3991911166U,
-	2348767304U, 2664054906U, 3763609282U,  593943581U, 3757090046U,
-	2075338894U, 2020550814U, 4287452920U, 4290140003U, 1422957317U,
-	2512716667U, 2003485045U, 2307520103U, 2288472169U, 3940751663U,
-	4204638664U, 2892583423U, 1710068300U, 3904755993U, 2363243951U,
-	3038334120U,  547099465U,  771105860U, 3199983734U, 4282046461U,
-	2298388363U,  934810218U, 2837827901U, 3952500708U, 2095130248U,
-	3083335297U,   26885281U, 3932155283U, 1531751116U, 1425227133U,
-	 495654159U, 3279634176U, 3855562207U, 3957195338U, 4159985527U,
-	 893375062U, 1875515536U, 1327247422U, 3754140693U, 1028923197U,
-	1729880440U,  805571298U,  448971099U, 2726757106U, 2749436461U,
-	2485987104U,  175337042U, 3235477922U, 3882114302U, 2020970972U,
-	 943926109U, 2762587195U, 1904195558U, 3452650564U,  108432281U,
-	3893463573U, 3977583081U, 2636504348U, 1110673525U, 3548479841U,
-	4258854744U,  980047703U, 4057175418U, 3890008292U,  145653646U,
-	3141868989U, 3293216228U, 1194331837U, 1254570642U, 3049934521U,
-	2868313360U, 2886032750U, 1110873820U,  279553524U, 3007258565U,
-	1104807822U, 3186961098U,  315764646U, 2163680838U, 3574508994U,
-	3099755655U,  191957684U, 3642656737U, 3317946149U, 3522087636U,
-	 444526410U,  779157624U, 1088229627U, 1092460223U, 1856013765U,
-	3659877367U,  368270451U,  503570716U, 3000984671U, 2742789647U,
-	 928097709U, 2914109539U,  308843566U, 2816161253U, 3667192079U,
-	2762679057U, 3395240989U, 2928925038U, 1491465914U, 3458702834U,
-	3787782576U, 2894104823U, 1296880455U, 1253636503U,  989959407U,
-	2291560361U, 2776790436U, 1913178042U, 1584677829U,  689637520U,
-	1898406878U,  688391508U, 3385234998U,  845493284U, 1943591856U,
-	2720472050U,  222695101U, 1653320868U, 2904632120U, 4084936008U,
-	1080720688U, 3938032556U,  387896427U, 2650839632U,   99042991U,
-	1720913794U, 1047186003U, 1877048040U, 2090457659U,  517087501U,
-	4172014665U, 2129713163U, 2413533132U, 2760285054U, 4129272496U,
-	1317737175U, 2309566414U, 2228873332U, 3889671280U, 1110864630U,
-	3576797776U, 2074552772U,  832002644U, 3097122623U, 2464859298U,
-	2679603822U, 1667489885U, 3237652716U, 1478413938U, 1719340335U,
-	2306631119U,  639727358U, 3369698270U,  226902796U, 2099920751U,
-	1892289957U, 2201594097U, 3508197013U, 3495811856U, 3900381493U,
-	 841660320U, 3974501451U, 3360949056U, 1676829340U,  728899254U,
-	2047809627U, 2390948962U,  670165943U, 3412951831U, 4189320049U,
-	1911595255U, 2055363086U,  507170575U,  418219594U, 4141495280U,
-	2692088692U, 4203630654U, 3540093932U,  791986533U, 2237921051U,
-	2526864324U, 2956616642U, 1394958700U, 1983768223U, 1893373266U,
-	 591653646U,  228432437U, 1611046598U, 3007736357U, 1040040725U,
-	2726180733U, 2789804360U, 4263568405U,  829098158U, 3847722805U,
-	1123578029U, 1804276347U,  997971319U, 4203797076U, 4185199713U,
-	2811733626U, 2343642194U, 2985262313U, 1417930827U, 3759587724U,
-	1967077982U, 1585223204U, 1097475516U, 1903944948U,  740382444U,
-	1114142065U, 1541796065U, 1718384172U, 1544076191U, 1134682254U,
-	3519754455U, 2866243923U,  341865437U,  645498576U, 2690735853U,
-	1046963033U, 2493178460U, 1187604696U, 1619577821U,  488503634U,
-	3255768161U, 2306666149U, 1630514044U, 2377698367U, 2751503746U,
-	3794467088U, 1796415981U, 3657173746U,  409136296U, 1387122342U,
-	1297726519U,  219544855U, 4270285558U,  437578827U, 1444698679U,
-	2258519491U,  963109892U, 3982244073U, 3351535275U,  385328496U,
-	1804784013U,  698059346U, 3920535147U,  708331212U,  784338163U,
-	 785678147U, 1238376158U, 1557298846U, 2037809321U,  271576218U,
-	4145155269U, 1913481602U, 2763691931U,  588981080U, 1201098051U,
-	3717640232U, 1509206239U,  662536967U, 3180523616U, 1133105435U,
-	2963500837U, 2253971215U, 3153642623U, 1066925709U, 2582781958U,
-	3034720222U, 1090798544U, 2942170004U, 4036187520U,  686972531U,
-	2610990302U, 2641437026U, 1837562420U,  722096247U, 1315333033U,
-	2102231203U, 3402389208U, 3403698140U, 1312402831U, 2898426558U,
-	 814384596U,  385649582U, 1916643285U, 1924625106U, 2512905582U,
-	2501170304U, 4275223366U, 2841225246U, 1467663688U, 3563567847U,
-	2969208552U,  884750901U,  102992576U,  227844301U, 3681442994U,
-	3502881894U, 4034693299U, 1166727018U, 1697460687U, 1737778332U,
-	1787161139U, 1053003655U, 1215024478U, 2791616766U, 2525841204U,
-	1629323443U,    3233815U, 2003823032U, 3083834263U, 2379264872U,
-	3752392312U, 1287475550U, 3770904171U, 3004244617U, 1502117784U,
-	 918698423U, 2419857538U, 3864502062U, 1751322107U, 2188775056U,
-	4018728324U,  983712955U,  440071928U, 3710838677U, 2001027698U,
-	3994702151U,   22493119U, 3584400918U, 3446253670U, 4254789085U,
-	1405447860U, 1240245579U, 1800644159U, 1661363424U, 3278326132U,
-	3403623451U,   67092802U, 2609352193U, 3914150340U, 1814842761U,
-	3610830847U,  591531412U, 3880232807U, 1673505890U, 2585326991U,
-	1678544474U, 3148435887U, 3457217359U, 1193226330U, 2816576908U,
-	 154025329U,  121678860U, 1164915738U,  973873761U,  269116100U,
-	  52087970U,  744015362U,  498556057U,   94298882U, 1563271621U,
-	2383059628U, 4197367290U, 3958472990U, 2592083636U, 2906408439U,
-	1097742433U, 3924840517U,  264557272U, 2292287003U, 3203307984U,
-	4047038857U, 3820609705U, 2333416067U, 1839206046U, 3600944252U,
-	3412254904U,  583538222U, 2390557166U, 4140459427U, 2810357445U,
-	 226777499U, 2496151295U, 2207301712U, 3283683112U,  611630281U,
-	1933218215U, 3315610954U, 3889441987U, 3719454256U, 3957190521U,
-	1313998161U, 2365383016U, 3146941060U, 1801206260U,  796124080U,
-	2076248581U, 1747472464U, 3254365145U,  595543130U, 3573909503U,
-	3758250204U, 2020768540U, 2439254210U,   93368951U, 3155792250U,
-	2600232980U, 3709198295U, 3894900440U, 2971850836U, 1578909644U,
-	1443493395U, 2581621665U, 3086506297U, 2443465861U,  558107211U,
-	1519367835U,  249149686U,  908102264U, 2588765675U, 1232743965U,
-	1001330373U, 3561331654U, 2259301289U, 1564977624U, 3835077093U,
-	 727244906U, 4255738067U, 1214133513U, 2570786021U, 3899704621U,
-	1633861986U, 1636979509U, 1438500431U,   58463278U, 2823485629U,
-	2297430187U, 2926781924U, 3371352948U, 1864009023U, 2722267973U,
-	1444292075U,  437703973U, 1060414512U,  189705863U,  910018135U,
-	4077357964U,  884213423U, 2644986052U, 3973488374U, 1187906116U,
-	2331207875U,  780463700U, 3713351662U, 3854611290U,  412805574U,
-	2978462572U, 2176222820U,  829424696U, 2790788332U, 2750819108U,
-	1594611657U, 3899878394U, 3032870364U, 1702887682U, 1948167778U,
-	  14130042U,  192292500U,  947227076U,   90719497U, 3854230320U,
-	 784028434U, 2142399787U, 1563449646U, 2844400217U,  819143172U,
-	2883302356U, 2328055304U, 1328532246U, 2603885363U, 3375188924U,
-	 933941291U, 3627039714U, 2129697284U, 2167253953U, 2506905438U,
-	1412424497U, 2981395985U, 1418359660U, 2925902456U,   52752784U,
-	3713667988U, 3924669405U,  648975707U, 1145520213U, 4018650664U,
-	3805915440U, 2380542088U, 2013260958U, 3262572197U, 2465078101U,
-	1114540067U, 3728768081U, 2396958768U,  590672271U,  904818725U,
-	4263660715U,  700754408U, 1042601829U, 4094111823U, 4274838909U,
-	2512692617U, 2774300207U, 2057306915U, 3470942453U,   99333088U,
-	1142661026U, 2889931380U,   14316674U, 2201179167U,  415289459U,
-	 448265759U, 3515142743U, 3254903683U,  246633281U, 1184307224U,
-	2418347830U, 2092967314U, 2682072314U, 2558750234U, 2000352263U,
-	1544150531U,  399010405U, 1513946097U,  499682937U,  461167460U,
-	3045570638U, 1633669705U,  851492362U, 4052801922U, 2055266765U,
-	 635556996U,  368266356U, 2385737383U, 3218202352U, 2603772408U,
-	 349178792U,  226482567U, 3102426060U, 3575998268U, 2103001871U,
-	3243137071U,  225500688U, 1634718593U, 4283311431U, 4292122923U,
-	3842802787U,  811735523U,  105712518U,  663434053U, 1855889273U,
-	2847972595U, 1196355421U, 2552150115U, 4254510614U, 3752181265U,
-	3430721819U, 3828705396U, 3436287905U, 3441964937U, 4123670631U,
-	 353001539U,  459496439U, 3799690868U, 1293777660U, 2761079737U,
-	 498096339U, 3398433374U, 4080378380U, 2304691596U, 2995729055U,
-	4134660419U, 3903444024U, 3576494993U,  203682175U, 3321164857U,
-	2747963611U,   79749085U, 2992890370U, 1240278549U, 1772175713U,
-	2111331972U, 2655023449U, 1683896345U, 2836027212U, 3482868021U,
-	2489884874U,  756853961U, 2298874501U, 4013448667U, 4143996022U,
-	2948306858U, 4132920035U, 1283299272U,  995592228U, 3450508595U,
-	1027845759U, 1766942720U, 3861411826U, 1446861231U,   95974993U,
-	3502263554U, 1487532194U,  601502472U, 4129619129U,  250131773U,
-	2050079547U, 3198903947U, 3105589778U, 4066481316U, 3026383978U,
-	2276901713U,  365637751U, 2260718426U, 1394775634U, 1791172338U,
-	2690503163U, 2952737846U, 1568710462U,  732623190U, 2980358000U,
-	1053631832U, 1432426951U, 3229149635U, 1854113985U, 3719733532U,
-	3204031934U,  735775531U,  107468620U, 3734611984U,  631009402U,
-	3083622457U, 4109580626U,  159373458U, 1301970201U, 4132389302U,
-	1293255004U,  847182752U, 4170022737U,   96712900U, 2641406755U,
-	1381727755U,  405608287U, 4287919625U, 1703554290U, 3589580244U,
-	2911403488U,    2166565U, 2647306451U, 2330535117U, 1200815358U,
-	1165916754U,  245060911U, 4040679071U, 3684908771U, 2452834126U,
-	2486872773U, 2318678365U, 2940627908U, 1837837240U, 3447897409U,
-	4270484676U, 1495388728U, 3754288477U, 4204167884U, 1386977705U,
-	2692224733U, 3076249689U, 4109568048U, 4170955115U, 4167531356U,
-	4020189950U, 4261855038U, 3036907575U, 3410399885U, 3076395737U,
-	1046178638U,  144496770U,  230725846U, 3349637149U,   17065717U,
-	2809932048U, 2054581785U, 3608424964U, 3259628808U,  134897388U,
-	3743067463U,  257685904U, 3795656590U, 1562468719U, 3589103904U,
-	3120404710U,  254684547U, 2653661580U, 3663904795U, 2631942758U,
-	1063234347U, 2609732900U, 2332080715U, 3521125233U, 1180599599U,
-	1935868586U, 4110970440U,  296706371U, 2128666368U, 1319875791U,
-	1570900197U, 3096025483U, 1799882517U, 1928302007U, 1163707758U,
-	1244491489U, 3533770203U,  567496053U, 2757924305U, 2781639343U,
-	2818420107U,  560404889U, 2619609724U, 4176035430U, 2511289753U,
-	2521842019U, 3910553502U, 2926149387U, 3302078172U, 4237118867U,
-	 330725126U,  367400677U,  888239854U,  545570454U, 4259590525U,
-	 134343617U, 1102169784U, 1647463719U, 3260979784U, 1518840883U,
-	3631537963U, 3342671457U, 1301549147U, 2083739356U,  146593792U,
-	3217959080U,  652755743U, 2032187193U, 3898758414U, 1021358093U,
-	4037409230U, 2176407931U, 3427391950U, 2883553603U,  985613827U,
-	3105265092U, 3423168427U, 3387507672U,  467170288U, 2141266163U,
-	3723870208U,  916410914U, 1293987799U, 2652584950U,  769160137U,
-	3205292896U, 1561287359U, 1684510084U, 3136055621U, 3765171391U,
-	 639683232U, 2639569327U, 1218546948U, 4263586685U, 3058215773U,
-	2352279820U,  401870217U, 2625822463U, 1529125296U, 2981801895U,
-	1191285226U, 4027725437U, 3432700217U, 4098835661U,  971182783U,
-	2443861173U, 3881457123U, 3874386651U,  457276199U, 2638294160U,
-	4002809368U,  421169044U, 1112642589U, 3076213779U, 3387033971U,
-	2499610950U, 3057240914U, 1662679783U,  461224431U, 1168395933U
-};
-static const uint32_t init_by_array_32_expected[] = {
-	2920711183U, 3885745737U, 3501893680U,  856470934U, 1421864068U,
-	 277361036U, 1518638004U, 2328404353U, 3355513634U,   64329189U,
-	1624587673U, 3508467182U, 2481792141U, 3706480799U, 1925859037U,
-	2913275699U,  882658412U,  384641219U,  422202002U, 1873384891U,
-	2006084383U, 3924929912U, 1636718106U, 3108838742U, 1245465724U,
-	4195470535U,  779207191U, 1577721373U, 1390469554U, 2928648150U,
-	 121399709U, 3170839019U, 4044347501U,  953953814U, 3821710850U,
-	3085591323U, 3666535579U, 3577837737U, 2012008410U, 3565417471U,
-	4044408017U,  433600965U, 1637785608U, 1798509764U,  860770589U,
-	3081466273U, 3982393409U, 2451928325U, 3437124742U, 4093828739U,
-	3357389386U, 2154596123U,  496568176U, 2650035164U, 2472361850U,
-	   3438299U, 2150366101U, 1577256676U, 3802546413U, 1787774626U,
-	4078331588U, 3706103141U,  170391138U, 3806085154U, 1680970100U,
-	1961637521U, 3316029766U,  890610272U, 1453751581U, 1430283664U,
-	3051057411U, 3597003186U,  542563954U, 3796490244U, 1690016688U,
-	3448752238U,  440702173U,  347290497U, 1121336647U, 2540588620U,
-	 280881896U, 2495136428U,  213707396U,   15104824U, 2946180358U,
-	 659000016U,  566379385U, 2614030979U, 2855760170U,  334526548U,
-	2315569495U, 2729518615U,  564745877U, 1263517638U, 3157185798U,
-	1604852056U, 1011639885U, 2950579535U, 2524219188U,  312951012U,
-	1528896652U, 1327861054U, 2846910138U, 3966855905U, 2536721582U,
-	 855353911U, 1685434729U, 3303978929U, 1624872055U, 4020329649U,
-	3164802143U, 1642802700U, 1957727869U, 1792352426U, 3334618929U,
-	2631577923U, 3027156164U,  842334259U, 3353446843U, 1226432104U,
-	1742801369U, 3552852535U, 3471698828U, 1653910186U, 3380330939U,
-	2313782701U, 3351007196U, 2129839995U, 1800682418U, 4085884420U,
-	1625156629U, 3669701987U,  615211810U, 3294791649U, 4131143784U,
-	2590843588U, 3207422808U, 3275066464U,  561592872U, 3957205738U,
-	3396578098U,   48410678U, 3505556445U, 1005764855U, 3920606528U,
-	2936980473U, 2378918600U, 2404449845U, 1649515163U,  701203563U,
-	3705256349U,   83714199U, 3586854132U,  922978446U, 2863406304U,
-	3523398907U, 2606864832U, 2385399361U, 3171757816U, 4262841009U,
-	3645837721U, 1169579486U, 3666433897U, 3174689479U, 1457866976U,
-	3803895110U, 3346639145U, 1907224409U, 1978473712U, 1036712794U,
-	 980754888U, 1302782359U, 1765252468U,  459245755U, 3728923860U,
-	1512894209U, 2046491914U,  207860527U,  514188684U, 2288713615U,
-	1597354672U, 3349636117U, 2357291114U, 3995796221U,  945364213U,
-	1893326518U, 3770814016U, 1691552714U, 2397527410U,  967486361U,
-	 776416472U, 4197661421U,  951150819U, 1852770983U, 4044624181U,
-	1399439738U, 4194455275U, 2284037669U, 1550734958U, 3321078108U,
-	1865235926U, 2912129961U, 2664980877U, 1357572033U, 2600196436U,
-	2486728200U, 2372668724U, 1567316966U, 2374111491U, 1839843570U,
-	  20815612U, 3727008608U, 3871996229U,  824061249U, 1932503978U,
-	3404541726U,  758428924U, 2609331364U, 1223966026U, 1299179808U,
-	 648499352U, 2180134401U,  880821170U, 3781130950U,  113491270U,
-	1032413764U, 4185884695U, 2490396037U, 1201932817U, 4060951446U,
-	4165586898U, 1629813212U, 2887821158U,  415045333U,  628926856U,
-	2193466079U, 3391843445U, 2227540681U, 1907099846U, 2848448395U,
-	1717828221U, 1372704537U, 1707549841U, 2294058813U, 2101214437U,
-	2052479531U, 1695809164U, 3176587306U, 2632770465U,   81634404U,
-	1603220563U,  644238487U,  302857763U,  897352968U, 2613146653U,
-	1391730149U, 4245717312U, 4191828749U, 1948492526U, 2618174230U,
-	3992984522U, 2178852787U, 3596044509U, 3445573503U, 2026614616U,
-	 915763564U, 3415689334U, 2532153403U, 3879661562U, 2215027417U,
-	3111154986U, 2929478371U,  668346391U, 1152241381U, 2632029711U,
-	3004150659U, 2135025926U,  948690501U, 2799119116U, 4228829406U,
-	1981197489U, 4209064138U,  684318751U, 3459397845U,  201790843U,
-	4022541136U, 3043635877U,  492509624U, 3263466772U, 1509148086U,
-	 921459029U, 3198857146U,  705479721U, 3835966910U, 3603356465U,
-	 576159741U, 1742849431U,  594214882U, 2055294343U, 3634861861U,
-	 449571793U, 3246390646U, 3868232151U, 1479156585U, 2900125656U,
-	2464815318U, 3960178104U, 1784261920U,   18311476U, 3627135050U,
-	 644609697U,  424968996U,  919890700U, 2986824110U,  816423214U,
-	4003562844U, 1392714305U, 1757384428U, 2569030598U,  995949559U,
-	3875659880U, 2933807823U, 2752536860U, 2993858466U, 4030558899U,
-	2770783427U, 2775406005U, 2777781742U, 1931292655U,  472147933U,
-	3865853827U, 2726470545U, 2668412860U, 2887008249U,  408979190U,
-	3578063323U, 3242082049U, 1778193530U,   27981909U, 2362826515U,
-	 389875677U, 1043878156U,  581653903U, 3830568952U,  389535942U,
-	3713523185U, 2768373359U, 2526101582U, 1998618197U, 1160859704U,
-	3951172488U, 1098005003U,  906275699U, 3446228002U, 2220677963U,
-	2059306445U,  132199571U,  476838790U, 1868039399U, 3097344807U,
-	 857300945U,  396345050U, 2835919916U, 1782168828U, 1419519470U,
-	4288137521U,  819087232U,  596301494U,  872823172U, 1526888217U,
-	 805161465U, 1116186205U, 2829002754U, 2352620120U,  620121516U,
-	 354159268U, 3601949785U,  209568138U, 1352371732U, 2145977349U,
-	4236871834U, 1539414078U, 3558126206U, 3224857093U, 4164166682U,
-	3817553440U, 3301780278U, 2682696837U, 3734994768U, 1370950260U,
-	1477421202U, 2521315749U, 1330148125U, 1261554731U, 2769143688U,
-	3554756293U, 4235882678U, 3254686059U, 3530579953U, 1215452615U,
-	3574970923U, 4057131421U,  589224178U, 1000098193U,  171190718U,
-	2521852045U, 2351447494U, 2284441580U, 2646685513U, 3486933563U,
-	3789864960U, 1190528160U, 1702536782U, 1534105589U, 4262946827U,
-	2726686826U, 3584544841U, 2348270128U, 2145092281U, 2502718509U,
-	1027832411U, 3571171153U, 1287361161U, 4011474411U, 3241215351U,
-	2419700818U,  971242709U, 1361975763U, 1096842482U, 3271045537U,
-	  81165449U,  612438025U, 3912966678U, 1356929810U,  733545735U,
-	 537003843U, 1282953084U,  884458241U,  588930090U, 3930269801U,
-	2961472450U, 1219535534U, 3632251943U,  268183903U, 1441240533U,
-	3653903360U, 3854473319U, 2259087390U, 2548293048U, 2022641195U,
-	2105543911U, 1764085217U, 3246183186U,  482438805U,  888317895U,
-	2628314765U, 2466219854U,  717546004U, 2322237039U,  416725234U,
-	1544049923U, 1797944973U, 3398652364U, 3111909456U,  485742908U,
-	2277491072U, 1056355088U, 3181001278U,  129695079U, 2693624550U,
-	1764438564U, 3797785470U,  195503713U, 3266519725U, 2053389444U,
-	1961527818U, 3400226523U, 3777903038U, 2597274307U, 4235851091U,
-	4094406648U, 2171410785U, 1781151386U, 1378577117U,  654643266U,
-	3424024173U, 3385813322U,  679385799U,  479380913U,  681715441U,
-	3096225905U,  276813409U, 3854398070U, 2721105350U,  831263315U,
-	3276280337U, 2628301522U, 3984868494U, 1466099834U, 2104922114U,
-	1412672743U,  820330404U, 3491501010U,  942735832U,  710652807U,
-	3972652090U,  679881088U,   40577009U, 3705286397U, 2815423480U,
-	3566262429U,  663396513U, 3777887429U, 4016670678U,  404539370U,
-	1142712925U, 1140173408U, 2913248352U, 2872321286U,  263751841U,
-	3175196073U, 3162557581U, 2878996619U,   75498548U, 3836833140U,
-	3284664959U, 1157523805U,  112847376U,  207855609U, 1337979698U,
-	1222578451U,  157107174U,  901174378U, 3883717063U, 1618632639U,
-	1767889440U, 4264698824U, 1582999313U,  884471997U, 2508825098U,
-	3756370771U, 2457213553U, 3565776881U, 3709583214U,  915609601U,
-	 460833524U, 1091049576U,   85522880U,    2553251U,  132102809U,
-	2429882442U, 2562084610U, 1386507633U, 4112471229U,   21965213U,
-	1981516006U, 2418435617U, 3054872091U, 4251511224U, 2025783543U,
-	1916911512U, 2454491136U, 3938440891U, 3825869115U, 1121698605U,
-	3463052265U,  802340101U, 1912886800U, 4031997367U, 3550640406U,
-	1596096923U,  610150600U,  431464457U, 2541325046U,  486478003U,
-	 739704936U, 2862696430U, 3037903166U, 1129749694U, 2611481261U,
-	1228993498U,  510075548U, 3424962587U, 2458689681U,  818934833U,
-	4233309125U, 1608196251U, 3419476016U, 1858543939U, 2682166524U,
-	3317854285U,  631986188U, 3008214764U,  613826412U, 3567358221U,
-	3512343882U, 1552467474U, 3316162670U, 1275841024U, 4142173454U,
-	 565267881U,  768644821U,  198310105U, 2396688616U, 1837659011U,
-	 203429334U,  854539004U, 4235811518U, 3338304926U, 3730418692U,
-	3852254981U, 3032046452U, 2329811860U, 2303590566U, 2696092212U,
-	3894665932U,  145835667U,  249563655U, 1932210840U, 2431696407U,
-	3312636759U,  214962629U, 2092026914U, 3020145527U, 4073039873U,
-	2739105705U, 1308336752U,  855104522U, 2391715321U,   67448785U,
-	 547989482U,  854411802U, 3608633740U,  431731530U,  537375589U,
-	3888005760U,  696099141U,  397343236U, 1864511780U,   44029739U,
-	1729526891U, 1993398655U, 2010173426U, 2591546756U,  275223291U,
-	1503900299U, 4217765081U, 2185635252U, 1122436015U, 3550155364U,
-	 681707194U, 3260479338U,  933579397U, 2983029282U, 2505504587U,
-	2667410393U, 2962684490U, 4139721708U, 2658172284U, 2452602383U,
-	2607631612U, 1344296217U, 3075398709U, 2949785295U, 1049956168U,
-	3917185129U, 2155660174U, 3280524475U, 1503827867U,  674380765U,
-	1918468193U, 3843983676U,  634358221U, 2538335643U, 1873351298U,
-	3368723763U, 2129144130U, 3203528633U, 3087174986U, 2691698871U,
-	2516284287U,   24437745U, 1118381474U, 2816314867U, 2448576035U,
-	4281989654U,  217287825U,  165872888U, 2628995722U, 3533525116U,
-	2721669106U,  872340568U, 3429930655U, 3309047304U, 3916704967U,
-	3270160355U, 1348884255U, 1634797670U,  881214967U, 4259633554U,
-	 174613027U, 1103974314U, 1625224232U, 2678368291U, 1133866707U,
-	3853082619U, 4073196549U, 1189620777U,  637238656U,  930241537U,
-	4042750792U, 3842136042U, 2417007212U, 2524907510U, 1243036827U,
-	1282059441U, 3764588774U, 1394459615U, 2323620015U, 1166152231U,
-	3307479609U, 3849322257U, 3507445699U, 4247696636U,  758393720U,
-	 967665141U, 1095244571U, 1319812152U,  407678762U, 2640605208U,
-	2170766134U, 3663594275U, 4039329364U, 2512175520U,  725523154U,
-	2249807004U, 3312617979U, 2414634172U, 1278482215U,  349206484U,
-	1573063308U, 1196429124U, 3873264116U, 2400067801U,  268795167U,
-	 226175489U, 2961367263U, 1968719665U,   42656370U, 1010790699U,
-	 561600615U, 2422453992U, 3082197735U, 1636700484U, 3977715296U,
-	3125350482U, 3478021514U, 2227819446U, 1540868045U, 3061908980U,
-	1087362407U, 3625200291U,  361937537U,  580441897U, 1520043666U,
-	2270875402U, 1009161260U, 2502355842U, 4278769785U,  473902412U,
-	1057239083U, 1905829039U, 1483781177U, 2080011417U, 1207494246U,
-	1806991954U, 2194674403U, 3455972205U,  807207678U, 3655655687U,
-	 674112918U,  195425752U, 3917890095U, 1874364234U, 1837892715U,
-	3663478166U, 1548892014U, 2570748714U, 2049929836U, 2167029704U,
-	 697543767U, 3499545023U, 3342496315U, 1725251190U, 3561387469U,
-	2905606616U, 1580182447U, 3934525927U, 4103172792U, 1365672522U,
-	1534795737U, 3308667416U, 2841911405U, 3943182730U, 4072020313U,
-	3494770452U, 3332626671U,   55327267U,  478030603U,  411080625U,
-	3419529010U, 1604767823U, 3513468014U,  570668510U,  913790824U,
-	2283967995U,  695159462U, 3825542932U, 4150698144U, 1829758699U,
-	 202895590U, 1609122645U, 1267651008U, 2910315509U, 2511475445U,
-	2477423819U, 3932081579U,  900879979U, 2145588390U, 2670007504U,
-	 580819444U, 1864996828U, 2526325979U, 1019124258U,  815508628U,
-	2765933989U, 1277301341U, 3006021786U,  855540956U,  288025710U,
-	1919594237U, 2331223864U,  177452412U, 2475870369U, 2689291749U,
-	 865194284U,  253432152U, 2628531804U, 2861208555U, 2361597573U,
-	1653952120U, 1039661024U, 2159959078U, 3709040440U, 3564718533U,
-	2596878672U, 2041442161U,   31164696U, 2662962485U, 3665637339U,
-	1678115244U, 2699839832U, 3651968520U, 3521595541U,  458433303U,
-	2423096824U,   21831741U,  380011703U, 2498168716U,  861806087U,
-	1673574843U, 4188794405U, 2520563651U, 2632279153U, 2170465525U,
-	4171949898U, 3886039621U, 1661344005U, 3424285243U,  992588372U,
-	2500984144U, 2993248497U, 3590193895U, 1535327365U,  515645636U,
-	 131633450U, 3729760261U, 1613045101U, 3254194278U,   15889678U,
-	1493590689U,  244148718U, 2991472662U, 1401629333U,  777349878U,
-	2501401703U, 4285518317U, 3794656178U,  955526526U, 3442142820U,
-	3970298374U,  736025417U, 2737370764U, 1271509744U,  440570731U,
-	 136141826U, 1596189518U,  923399175U,  257541519U, 3505774281U,
-	2194358432U, 2518162991U, 1379893637U, 2667767062U, 3748146247U,
-	1821712620U, 3923161384U, 1947811444U, 2392527197U, 4127419685U,
-	1423694998U, 4156576871U, 1382885582U, 3420127279U, 3617499534U,
-	2994377493U, 4038063986U, 1918458672U, 2983166794U, 4200449033U,
-	 353294540U, 1609232588U,  243926648U, 2332803291U,  507996832U,
-	2392838793U, 4075145196U, 2060984340U, 4287475136U,   88232602U,
-	2491531140U, 4159725633U, 2272075455U,  759298618U,  201384554U,
-	 838356250U, 1416268324U,  674476934U,   90795364U,  141672229U,
-	3660399588U, 4196417251U, 3249270244U, 3774530247U,   59587265U,
-	3683164208U,   19392575U, 1463123697U, 1882205379U,  293780489U,
-	2553160622U, 2933904694U,  675638239U, 2851336944U, 1435238743U,
-	2448730183U,  804436302U, 2119845972U,  322560608U, 4097732704U,
-	2987802540U,  641492617U, 2575442710U, 4217822703U, 3271835300U,
-	2836418300U, 3739921620U, 2138378768U, 2879771855U, 4294903423U,
-	3121097946U, 2603440486U, 2560820391U, 1012930944U, 2313499967U,
-	 584489368U, 3431165766U,  897384869U, 2062537737U, 2847889234U,
-	3742362450U, 2951174585U, 4204621084U, 1109373893U, 3668075775U,
-	2750138839U, 3518055702U,  733072558U, 4169325400U,  788493625U
-};
-static const uint64_t init_gen_rand_64_expected[] = {
-	KQU(16924766246869039260), KQU( 8201438687333352714),
-	KQU( 2265290287015001750), KQU(18397264611805473832),
-	KQU( 3375255223302384358), KQU( 6345559975416828796),
-	KQU(18229739242790328073), KQU( 7596792742098800905),
-	KQU(  255338647169685981), KQU( 2052747240048610300),
-	KQU(18328151576097299343), KQU(12472905421133796567),
-	KQU(11315245349717600863), KQU(16594110197775871209),
-	KQU(15708751964632456450), KQU(10452031272054632535),
-	KQU(11097646720811454386), KQU( 4556090668445745441),
-	KQU(17116187693090663106), KQU(14931526836144510645),
-	KQU( 9190752218020552591), KQU( 9625800285771901401),
-	KQU(13995141077659972832), KQU( 5194209094927829625),
-	KQU( 4156788379151063303), KQU( 8523452593770139494),
-	KQU(14082382103049296727), KQU( 2462601863986088483),
-	KQU( 3030583461592840678), KQU( 5221622077872827681),
-	KQU( 3084210671228981236), KQU(13956758381389953823),
-	KQU(13503889856213423831), KQU(15696904024189836170),
-	KQU( 4612584152877036206), KQU( 6231135538447867881),
-	KQU(10172457294158869468), KQU( 6452258628466708150),
-	KQU(14044432824917330221), KQU(  370168364480044279),
-	KQU(10102144686427193359), KQU(  667870489994776076),
-	KQU( 2732271956925885858), KQU(18027788905977284151),
-	KQU(15009842788582923859), KQU( 7136357960180199542),
-	KQU(15901736243475578127), KQU(16951293785352615701),
-	KQU(10551492125243691632), KQU(17668869969146434804),
-	KQU(13646002971174390445), KQU( 9804471050759613248),
-	KQU( 5511670439655935493), KQU(18103342091070400926),
-	KQU(17224512747665137533), KQU(15534627482992618168),
-	KQU( 1423813266186582647), KQU(15821176807932930024),
-	KQU(   30323369733607156), KQU(11599382494723479403),
-	KQU(  653856076586810062), KQU( 3176437395144899659),
-	KQU(14028076268147963917), KQU(16156398271809666195),
-	KQU( 3166955484848201676), KQU( 5746805620136919390),
-	KQU(17297845208891256593), KQU(11691653183226428483),
-	KQU(17900026146506981577), KQU(15387382115755971042),
-	KQU(16923567681040845943), KQU( 8039057517199388606),
-	KQU(11748409241468629263), KQU(  794358245539076095),
-	KQU(13438501964693401242), KQU(14036803236515618962),
-	KQU( 5252311215205424721), KQU(17806589612915509081),
-	KQU( 6802767092397596006), KQU(14212120431184557140),
-	KQU( 1072951366761385712), KQU(13098491780722836296),
-	KQU( 9466676828710797353), KQU(12673056849042830081),
-	KQU(12763726623645357580), KQU(16468961652999309493),
-	KQU(15305979875636438926), KQU(17444713151223449734),
-	KQU( 5692214267627883674), KQU(13049589139196151505),
-	KQU(  880115207831670745), KQU( 1776529075789695498),
-	KQU(16695225897801466485), KQU(10666901778795346845),
-	KQU( 6164389346722833869), KQU( 2863817793264300475),
-	KQU( 9464049921886304754), KQU( 3993566636740015468),
-	KQU( 9983749692528514136), KQU(16375286075057755211),
-	KQU(16042643417005440820), KQU(11445419662923489877),
-	KQU( 7999038846885158836), KQU( 6721913661721511535),
-	KQU( 5363052654139357320), KQU( 1817788761173584205),
-	KQU(13290974386445856444), KQU( 4650350818937984680),
-	KQU( 8219183528102484836), KQU( 1569862923500819899),
-	KQU( 4189359732136641860), KQU(14202822961683148583),
-	KQU( 4457498315309429058), KQU(13089067387019074834),
-	KQU(11075517153328927293), KQU(10277016248336668389),
-	KQU( 7070509725324401122), KQU(17808892017780289380),
-	KQU(13143367339909287349), KQU( 1377743745360085151),
-	KQU( 5749341807421286485), KQU(14832814616770931325),
-	KQU( 7688820635324359492), KQU(10960474011539770045),
-	KQU(   81970066653179790), KQU(12619476072607878022),
-	KQU( 4419566616271201744), KQU(15147917311750568503),
-	KQU( 5549739182852706345), KQU( 7308198397975204770),
-	KQU(13580425496671289278), KQU(17070764785210130301),
-	KQU( 8202832846285604405), KQU( 6873046287640887249),
-	KQU( 6927424434308206114), KQU( 6139014645937224874),
-	KQU(10290373645978487639), KQU(15904261291701523804),
-	KQU( 9628743442057826883), KQU(18383429096255546714),
-	KQU( 4977413265753686967), KQU( 7714317492425012869),
-	KQU( 9025232586309926193), KQU(14627338359776709107),
-	KQU(14759849896467790763), KQU(10931129435864423252),
-	KQU( 4588456988775014359), KQU(10699388531797056724),
-	KQU(  468652268869238792), KQU( 5755943035328078086),
-	KQU( 2102437379988580216), KQU( 9986312786506674028),
-	KQU( 2654207180040945604), KQU( 8726634790559960062),
-	KQU(  100497234871808137), KQU( 2800137176951425819),
-	KQU( 6076627612918553487), KQU( 5780186919186152796),
-	KQU( 8179183595769929098), KQU( 6009426283716221169),
-	KQU( 2796662551397449358), KQU( 1756961367041986764),
-	KQU( 6972897917355606205), KQU(14524774345368968243),
-	KQU( 2773529684745706940), KQU( 4853632376213075959),
-	KQU( 4198177923731358102), KQU( 8271224913084139776),
-	KQU( 2741753121611092226), KQU(16782366145996731181),
-	KQU(15426125238972640790), KQU(13595497100671260342),
-	KQU( 3173531022836259898), KQU( 6573264560319511662),
-	KQU(18041111951511157441), KQU( 2351433581833135952),
-	KQU( 3113255578908173487), KQU( 1739371330877858784),
-	KQU(16046126562789165480), KQU( 8072101652214192925),
-	KQU(15267091584090664910), KQU( 9309579200403648940),
-	KQU( 5218892439752408722), KQU(14492477246004337115),
-	KQU(17431037586679770619), KQU( 7385248135963250480),
-	KQU( 9580144956565560660), KQU( 4919546228040008720),
-	KQU(15261542469145035584), KQU(18233297270822253102),
-	KQU( 5453248417992302857), KQU( 9309519155931460285),
-	KQU(10342813012345291756), KQU(15676085186784762381),
-	KQU(15912092950691300645), KQU( 9371053121499003195),
-	KQU( 9897186478226866746), KQU(14061858287188196327),
-	KQU(  122575971620788119), KQU(12146750969116317754),
-	KQU( 4438317272813245201), KQU( 8332576791009527119),
-	KQU(13907785691786542057), KQU(10374194887283287467),
-	KQU( 2098798755649059566), KQU( 3416235197748288894),
-	KQU( 8688269957320773484), KQU( 7503964602397371571),
-	KQU(16724977015147478236), KQU( 9461512855439858184),
-	KQU(13259049744534534727), KQU( 3583094952542899294),
-	KQU( 8764245731305528292), KQU(13240823595462088985),
-	KQU(13716141617617910448), KQU(18114969519935960955),
-	KQU( 2297553615798302206), KQU( 4585521442944663362),
-	KQU(17776858680630198686), KQU( 4685873229192163363),
-	KQU(  152558080671135627), KQU(15424900540842670088),
-	KQU(13229630297130024108), KQU(17530268788245718717),
-	KQU(16675633913065714144), KQU( 3158912717897568068),
-	KQU(15399132185380087288), KQU( 7401418744515677872),
-	KQU(13135412922344398535), KQU( 6385314346100509511),
-	KQU(13962867001134161139), KQU(10272780155442671999),
-	KQU(12894856086597769142), KQU(13340877795287554994),
-	KQU(12913630602094607396), KQU(12543167911119793857),
-	KQU(17343570372251873096), KQU(10959487764494150545),
-	KQU( 6966737953093821128), KQU(13780699135496988601),
-	KQU( 4405070719380142046), KQU(14923788365607284982),
-	KQU( 2869487678905148380), KQU( 6416272754197188403),
-	KQU(15017380475943612591), KQU( 1995636220918429487),
-	KQU( 3402016804620122716), KQU(15800188663407057080),
-	KQU(11362369990390932882), KQU(15262183501637986147),
-	KQU(10239175385387371494), KQU( 9352042420365748334),
-	KQU( 1682457034285119875), KQU( 1724710651376289644),
-	KQU( 2038157098893817966), KQU( 9897825558324608773),
-	KQU( 1477666236519164736), KQU(16835397314511233640),
-	KQU(10370866327005346508), KQU(10157504370660621982),
-	KQU(12113904045335882069), KQU(13326444439742783008),
-	KQU(11302769043000765804), KQU(13594979923955228484),
-	KQU(11779351762613475968), KQU( 3786101619539298383),
-	KQU( 8021122969180846063), KQU(15745904401162500495),
-	KQU(10762168465993897267), KQU(13552058957896319026),
-	KQU(11200228655252462013), KQU( 5035370357337441226),
-	KQU( 7593918984545500013), KQU( 5418554918361528700),
-	KQU( 4858270799405446371), KQU( 9974659566876282544),
-	KQU(18227595922273957859), KQU( 2772778443635656220),
-	KQU(14285143053182085385), KQU( 9939700992429600469),
-	KQU(12756185904545598068), KQU( 2020783375367345262),
-	KQU(   57026775058331227), KQU(  950827867930065454),
-	KQU( 6602279670145371217), KQU( 2291171535443566929),
-	KQU( 5832380724425010313), KQU( 1220343904715982285),
-	KQU(17045542598598037633), KQU(15460481779702820971),
-	KQU(13948388779949365130), KQU(13975040175430829518),
-	KQU(17477538238425541763), KQU(11104663041851745725),
-	KQU(15860992957141157587), KQU(14529434633012950138),
-	KQU( 2504838019075394203), KQU( 7512113882611121886),
-	KQU( 4859973559980886617), KQU( 1258601555703250219),
-	KQU(15594548157514316394), KQU( 4516730171963773048),
-	KQU(11380103193905031983), KQU( 6809282239982353344),
-	KQU(18045256930420065002), KQU( 2453702683108791859),
-	KQU(  977214582986981460), KQU( 2006410402232713466),
-	KQU( 6192236267216378358), KQU( 3429468402195675253),
-	KQU(18146933153017348921), KQU(17369978576367231139),
-	KQU( 1246940717230386603), KQU(11335758870083327110),
-	KQU(14166488801730353682), KQU( 9008573127269635732),
-	KQU(10776025389820643815), KQU(15087605441903942962),
-	KQU( 1359542462712147922), KQU(13898874411226454206),
-	KQU(17911176066536804411), KQU( 9435590428600085274),
-	KQU(  294488509967864007), KQU( 8890111397567922046),
-	KQU( 7987823476034328778), KQU(13263827582440967651),
-	KQU( 7503774813106751573), KQU(14974747296185646837),
-	KQU( 8504765037032103375), KQU(17340303357444536213),
-	KQU( 7704610912964485743), KQU( 8107533670327205061),
-	KQU( 9062969835083315985), KQU(16968963142126734184),
-	KQU(12958041214190810180), KQU( 2720170147759570200),
-	KQU( 2986358963942189566), KQU(14884226322219356580),
-	KQU(  286224325144368520), KQU(11313800433154279797),
-	KQU(18366849528439673248), KQU(17899725929482368789),
-	KQU( 3730004284609106799), KQU( 1654474302052767205),
-	KQU( 5006698007047077032), KQU( 8196893913601182838),
-	KQU(15214541774425211640), KQU(17391346045606626073),
-	KQU( 8369003584076969089), KQU( 3939046733368550293),
-	KQU(10178639720308707785), KQU( 2180248669304388697),
-	KQU(   62894391300126322), KQU( 9205708961736223191),
-	KQU( 6837431058165360438), KQU( 3150743890848308214),
-	KQU(17849330658111464583), KQU(12214815643135450865),
-	KQU(13410713840519603402), KQU( 3200778126692046802),
-	KQU(13354780043041779313), KQU(  800850022756886036),
-	KQU(15660052933953067433), KQU( 6572823544154375676),
-	KQU(11030281857015819266), KQU(12682241941471433835),
-	KQU(11654136407300274693), KQU( 4517795492388641109),
-	KQU( 9757017371504524244), KQU(17833043400781889277),
-	KQU(12685085201747792227), KQU(10408057728835019573),
-	KQU(   98370418513455221), KQU( 6732663555696848598),
-	KQU(13248530959948529780), KQU( 3530441401230622826),
-	KQU(18188251992895660615), KQU( 1847918354186383756),
-	KQU( 1127392190402660921), KQU(11293734643143819463),
-	KQU( 3015506344578682982), KQU(13852645444071153329),
-	KQU( 2121359659091349142), KQU( 1294604376116677694),
-	KQU( 5616576231286352318), KQU( 7112502442954235625),
-	KQU(11676228199551561689), KQU(12925182803007305359),
-	KQU( 7852375518160493082), KQU( 1136513130539296154),
-	KQU( 5636923900916593195), KQU( 3221077517612607747),
-	KQU(17784790465798152513), KQU( 3554210049056995938),
-	KQU(17476839685878225874), KQU( 3206836372585575732),
-	KQU( 2765333945644823430), KQU(10080070903718799528),
-	KQU( 5412370818878286353), KQU( 9689685887726257728),
-	KQU( 8236117509123533998), KQU( 1951139137165040214),
-	KQU( 4492205209227980349), KQU(16541291230861602967),
-	KQU( 1424371548301437940), KQU( 9117562079669206794),
-	KQU(14374681563251691625), KQU(13873164030199921303),
-	KQU( 6680317946770936731), KQU(15586334026918276214),
-	KQU(10896213950976109802), KQU( 9506261949596413689),
-	KQU( 9903949574308040616), KQU( 6038397344557204470),
-	KQU(  174601465422373648), KQU(15946141191338238030),
-	KQU(17142225620992044937), KQU( 7552030283784477064),
-	KQU( 2947372384532947997), KQU(  510797021688197711),
-	KQU( 4962499439249363461), KQU(   23770320158385357),
-	KQU(  959774499105138124), KQU( 1468396011518788276),
-	KQU( 2015698006852312308), KQU( 4149400718489980136),
-	KQU( 5992916099522371188), KQU(10819182935265531076),
-	KQU(16189787999192351131), KQU(  342833961790261950),
-	KQU(12470830319550495336), KQU(18128495041912812501),
-	KQU( 1193600899723524337), KQU( 9056793666590079770),
-	KQU( 2154021227041669041), KQU( 4963570213951235735),
-	KQU( 4865075960209211409), KQU( 2097724599039942963),
-	KQU( 2024080278583179845), KQU(11527054549196576736),
-	KQU(10650256084182390252), KQU( 4808408648695766755),
-	KQU( 1642839215013788844), KQU(10607187948250398390),
-	KQU( 7076868166085913508), KQU(  730522571106887032),
-	KQU(12500579240208524895), KQU( 4484390097311355324),
-	KQU(15145801330700623870), KQU( 8055827661392944028),
-	KQU( 5865092976832712268), KQU(15159212508053625143),
-	KQU( 3560964582876483341), KQU( 4070052741344438280),
-	KQU( 6032585709886855634), KQU(15643262320904604873),
-	KQU( 2565119772293371111), KQU(  318314293065348260),
-	KQU(15047458749141511872), KQU( 7772788389811528730),
-	KQU( 7081187494343801976), KQU( 6465136009467253947),
-	KQU(10425940692543362069), KQU(  554608190318339115),
-	KQU(14796699860302125214), KQU( 1638153134431111443),
-	KQU(10336967447052276248), KQU( 8412308070396592958),
-	KQU( 4004557277152051226), KQU( 8143598997278774834),
-	KQU(16413323996508783221), KQU(13139418758033994949),
-	KQU( 9772709138335006667), KQU( 2818167159287157659),
-	KQU(17091740573832523669), KQU(14629199013130751608),
-	KQU(18268322711500338185), KQU( 8290963415675493063),
-	KQU( 8830864907452542588), KQU( 1614839084637494849),
-	KQU(14855358500870422231), KQU( 3472996748392519937),
-	KQU(15317151166268877716), KQU( 5825895018698400362),
-	KQU(16730208429367544129), KQU(10481156578141202800),
-	KQU( 4746166512382823750), KQU(12720876014472464998),
-	KQU( 8825177124486735972), KQU(13733447296837467838),
-	KQU( 6412293741681359625), KQU( 8313213138756135033),
-	KQU(11421481194803712517), KQU( 7997007691544174032),
-	KQU( 6812963847917605930), KQU( 9683091901227558641),
-	KQU(14703594165860324713), KQU( 1775476144519618309),
-	KQU( 2724283288516469519), KQU(  717642555185856868),
-	KQU( 8736402192215092346), KQU(11878800336431381021),
-	KQU( 4348816066017061293), KQU( 6115112756583631307),
-	KQU( 9176597239667142976), KQU(12615622714894259204),
-	KQU(10283406711301385987), KQU( 5111762509485379420),
-	KQU( 3118290051198688449), KQU( 7345123071632232145),
-	KQU( 9176423451688682359), KQU( 4843865456157868971),
-	KQU(12008036363752566088), KQU(12058837181919397720),
-	KQU( 2145073958457347366), KQU( 1526504881672818067),
-	KQU( 3488830105567134848), KQU(13208362960674805143),
-	KQU( 4077549672899572192), KQU( 7770995684693818365),
-	KQU( 1398532341546313593), KQU(12711859908703927840),
-	KQU( 1417561172594446813), KQU(17045191024194170604),
-	KQU( 4101933177604931713), KQU(14708428834203480320),
-	KQU(17447509264469407724), KQU(14314821973983434255),
-	KQU(17990472271061617265), KQU( 5087756685841673942),
-	KQU(12797820586893859939), KQU( 1778128952671092879),
-	KQU( 3535918530508665898), KQU( 9035729701042481301),
-	KQU(14808661568277079962), KQU(14587345077537747914),
-	KQU(11920080002323122708), KQU( 6426515805197278753),
-	KQU( 3295612216725984831), KQU(11040722532100876120),
-	KQU(12305952936387598754), KQU(16097391899742004253),
-	KQU( 4908537335606182208), KQU(12446674552196795504),
-	KQU(16010497855816895177), KQU( 9194378874788615551),
-	KQU( 3382957529567613384), KQU( 5154647600754974077),
-	KQU( 9801822865328396141), KQU( 9023662173919288143),
-	KQU(17623115353825147868), KQU( 8238115767443015816),
-	KQU(15811444159859002560), KQU( 9085612528904059661),
-	KQU( 6888601089398614254), KQU(  258252992894160189),
-	KQU( 6704363880792428622), KQU( 6114966032147235763),
-	KQU(11075393882690261875), KQU( 8797664238933620407),
-	KQU( 5901892006476726920), KQU( 5309780159285518958),
-	KQU(14940808387240817367), KQU(14642032021449656698),
-	KQU( 9808256672068504139), KQU( 3670135111380607658),
-	KQU(11211211097845960152), KQU( 1474304506716695808),
-	KQU(15843166204506876239), KQU( 7661051252471780561),
-	KQU(10170905502249418476), KQU( 7801416045582028589),
-	KQU( 2763981484737053050), KQU( 9491377905499253054),
-	KQU(16201395896336915095), KQU( 9256513756442782198),
-	KQU( 5411283157972456034), KQU( 5059433122288321676),
-	KQU( 4327408006721123357), KQU( 9278544078834433377),
-	KQU( 7601527110882281612), KQU(11848295896975505251),
-	KQU(12096998801094735560), KQU(14773480339823506413),
-	KQU(15586227433895802149), KQU(12786541257830242872),
-	KQU( 6904692985140503067), KQU( 5309011515263103959),
-	KQU(12105257191179371066), KQU(14654380212442225037),
-	KQU( 2556774974190695009), KQU( 4461297399927600261),
-	KQU(14888225660915118646), KQU(14915459341148291824),
-	KQU( 2738802166252327631), KQU( 6047155789239131512),
-	KQU(12920545353217010338), KQU(10697617257007840205),
-	KQU( 2751585253158203504), KQU(13252729159780047496),
-	KQU(14700326134672815469), KQU(14082527904374600529),
-	KQU(16852962273496542070), KQU(17446675504235853907),
-	KQU(15019600398527572311), KQU(12312781346344081551),
-	KQU(14524667935039810450), KQU( 5634005663377195738),
-	KQU(11375574739525000569), KQU( 2423665396433260040),
-	KQU( 5222836914796015410), KQU( 4397666386492647387),
-	KQU( 4619294441691707638), KQU(  665088602354770716),
-	KQU(13246495665281593610), KQU( 6564144270549729409),
-	KQU(10223216188145661688), KQU( 3961556907299230585),
-	KQU(11543262515492439914), KQU(16118031437285993790),
-	KQU( 7143417964520166465), KQU(13295053515909486772),
-	KQU(   40434666004899675), KQU(17127804194038347164),
-	KQU( 8599165966560586269), KQU( 8214016749011284903),
-	KQU(13725130352140465239), KQU( 5467254474431726291),
-	KQU( 7748584297438219877), KQU(16933551114829772472),
-	KQU( 2169618439506799400), KQU( 2169787627665113463),
-	KQU(17314493571267943764), KQU(18053575102911354912),
-	KQU(11928303275378476973), KQU(11593850925061715550),
-	KQU(17782269923473589362), KQU( 3280235307704747039),
-	KQU( 6145343578598685149), KQU(17080117031114086090),
-	KQU(18066839902983594755), KQU( 6517508430331020706),
-	KQU( 8092908893950411541), KQU(12558378233386153732),
-	KQU( 4476532167973132976), KQU(16081642430367025016),
-	KQU( 4233154094369139361), KQU( 8693630486693161027),
-	KQU(11244959343027742285), KQU(12273503967768513508),
-	KQU(14108978636385284876), KQU( 7242414665378826984),
-	KQU( 6561316938846562432), KQU( 8601038474994665795),
-	KQU(17532942353612365904), KQU(17940076637020912186),
-	KQU( 7340260368823171304), KQU( 7061807613916067905),
-	KQU(10561734935039519326), KQU(17990796503724650862),
-	KQU( 6208732943911827159), KQU(  359077562804090617),
-	KQU(14177751537784403113), KQU(10659599444915362902),
-	KQU(15081727220615085833), KQU(13417573895659757486),
-	KQU(15513842342017811524), KQU(11814141516204288231),
-	KQU( 1827312513875101814), KQU( 2804611699894603103),
-	KQU(17116500469975602763), KQU(12270191815211952087),
-	KQU(12256358467786024988), KQU(18435021722453971267),
-	KQU(  671330264390865618), KQU(  476504300460286050),
-	KQU(16465470901027093441), KQU( 4047724406247136402),
-	KQU( 1322305451411883346), KQU( 1388308688834322280),
-	KQU( 7303989085269758176), KQU( 9323792664765233642),
-	KQU( 4542762575316368936), KQU(17342696132794337618),
-	KQU( 4588025054768498379), KQU(13415475057390330804),
-	KQU(17880279491733405570), KQU(10610553400618620353),
-	KQU( 3180842072658960139), KQU(13002966655454270120),
-	KQU( 1665301181064982826), KQU( 7083673946791258979),
-	KQU(  190522247122496820), KQU(17388280237250677740),
-	KQU( 8430770379923642945), KQU(12987180971921668584),
-	KQU( 2311086108365390642), KQU( 2870984383579822345),
-	KQU(14014682609164653318), KQU(14467187293062251484),
-	KQU(  192186361147413298), KQU(15171951713531796524),
-	KQU( 9900305495015948728), KQU(17958004775615466344),
-	KQU(14346380954498606514), KQU(18040047357617407096),
-	KQU( 5035237584833424532), KQU(15089555460613972287),
-	KQU( 4131411873749729831), KQU( 1329013581168250330),
-	KQU(10095353333051193949), KQU(10749518561022462716),
-	KQU( 9050611429810755847), KQU(15022028840236655649),
-	KQU( 8775554279239748298), KQU(13105754025489230502),
-	KQU(15471300118574167585), KQU(   89864764002355628),
-	KQU( 8776416323420466637), KQU( 5280258630612040891),
-	KQU( 2719174488591862912), KQU( 7599309137399661994),
-	KQU(15012887256778039979), KQU(14062981725630928925),
-	KQU(12038536286991689603), KQU( 7089756544681775245),
-	KQU(10376661532744718039), KQU( 1265198725901533130),
-	KQU(13807996727081142408), KQU( 2935019626765036403),
-	KQU( 7651672460680700141), KQU( 3644093016200370795),
-	KQU( 2840982578090080674), KQU(17956262740157449201),
-	KQU(18267979450492880548), KQU(11799503659796848070),
-	KQU( 9942537025669672388), KQU(11886606816406990297),
-	KQU( 5488594946437447576), KQU( 7226714353282744302),
-	KQU( 3784851653123877043), KQU(  878018453244803041),
-	KQU(12110022586268616085), KQU(  734072179404675123),
-	KQU(11869573627998248542), KQU(  469150421297783998),
-	KQU(  260151124912803804), KQU(11639179410120968649),
-	KQU( 9318165193840846253), KQU(12795671722734758075),
-	KQU(15318410297267253933), KQU(  691524703570062620),
-	KQU( 5837129010576994601), KQU(15045963859726941052),
-	KQU( 5850056944932238169), KQU(12017434144750943807),
-	KQU( 7447139064928956574), KQU( 3101711812658245019),
-	KQU(16052940704474982954), KQU(18195745945986994042),
-	KQU( 8932252132785575659), KQU(13390817488106794834),
-	KQU(11582771836502517453), KQU( 4964411326683611686),
-	KQU( 2195093981702694011), KQU(14145229538389675669),
-	KQU(16459605532062271798), KQU(  866316924816482864),
-	KQU( 4593041209937286377), KQU( 8415491391910972138),
-	KQU( 4171236715600528969), KQU(16637569303336782889),
-	KQU( 2002011073439212680), KQU(17695124661097601411),
-	KQU( 4627687053598611702), KQU( 7895831936020190403),
-	KQU( 8455951300917267802), KQU( 2923861649108534854),
-	KQU( 8344557563927786255), KQU( 6408671940373352556),
-	KQU(12210227354536675772), KQU(14294804157294222295),
-	KQU(10103022425071085127), KQU(10092959489504123771),
-	KQU( 6554774405376736268), KQU(12629917718410641774),
-	KQU( 6260933257596067126), KQU( 2460827021439369673),
-	KQU( 2541962996717103668), KQU(  597377203127351475),
-	KQU( 5316984203117315309), KQU( 4811211393563241961),
-	KQU(13119698597255811641), KQU( 8048691512862388981),
-	KQU(10216818971194073842), KQU( 4612229970165291764),
-	KQU(10000980798419974770), KQU( 6877640812402540687),
-	KQU( 1488727563290436992), KQU( 2227774069895697318),
-	KQU(11237754507523316593), KQU(13478948605382290972),
-	KQU( 1963583846976858124), KQU( 5512309205269276457),
-	KQU( 3972770164717652347), KQU( 3841751276198975037),
-	KQU(10283343042181903117), KQU( 8564001259792872199),
-	KQU(16472187244722489221), KQU( 8953493499268945921),
-	KQU( 3518747340357279580), KQU( 4003157546223963073),
-	KQU( 3270305958289814590), KQU( 3966704458129482496),
-	KQU( 8122141865926661939), KQU(14627734748099506653),
-	KQU(13064426990862560568), KQU( 2414079187889870829),
-	KQU( 5378461209354225306), KQU(10841985740128255566),
-	KQU(  538582442885401738), KQU( 7535089183482905946),
-	KQU(16117559957598879095), KQU( 8477890721414539741),
-	KQU( 1459127491209533386), KQU(17035126360733620462),
-	KQU( 8517668552872379126), KQU(10292151468337355014),
-	KQU(17081267732745344157), KQU(13751455337946087178),
-	KQU(14026945459523832966), KQU( 6653278775061723516),
-	KQU(10619085543856390441), KQU( 2196343631481122885),
-	KQU(10045966074702826136), KQU(10082317330452718282),
-	KQU( 5920859259504831242), KQU( 9951879073426540617),
-	KQU( 7074696649151414158), KQU(15808193543879464318),
-	KQU( 7385247772746953374), KQU( 3192003544283864292),
-	KQU(18153684490917593847), KQU(12423498260668568905),
-	KQU(10957758099756378169), KQU(11488762179911016040),
-	KQU( 2099931186465333782), KQU(11180979581250294432),
-	KQU( 8098916250668367933), KQU( 3529200436790763465),
-	KQU(12988418908674681745), KQU( 6147567275954808580),
-	KQU( 3207503344604030989), KQU(10761592604898615360),
-	KQU(  229854861031893504), KQU( 8809853962667144291),
-	KQU(13957364469005693860), KQU( 7634287665224495886),
-	KQU(12353487366976556874), KQU( 1134423796317152034),
-	KQU( 2088992471334107068), KQU( 7393372127190799698),
-	KQU( 1845367839871058391), KQU(  207922563987322884),
-	KQU(11960870813159944976), KQU(12182120053317317363),
-	KQU(17307358132571709283), KQU(13871081155552824936),
-	KQU(18304446751741566262), KQU( 7178705220184302849),
-	KQU(10929605677758824425), KQU(16446976977835806844),
-	KQU(13723874412159769044), KQU( 6942854352100915216),
-	KQU( 1726308474365729390), KQU( 2150078766445323155),
-	KQU(15345558947919656626), KQU(12145453828874527201),
-	KQU( 2054448620739726849), KQU( 2740102003352628137),
-	KQU(11294462163577610655), KQU(  756164283387413743),
-	KQU(17841144758438810880), KQU(10802406021185415861),
-	KQU( 8716455530476737846), KQU( 6321788834517649606),
-	KQU(14681322910577468426), KQU(17330043563884336387),
-	KQU(12701802180050071614), KQU(14695105111079727151),
-	KQU( 5112098511654172830), KQU( 4957505496794139973),
-	KQU( 8270979451952045982), KQU(12307685939199120969),
-	KQU(12425799408953443032), KQU( 8376410143634796588),
-	KQU(16621778679680060464), KQU( 3580497854566660073),
-	KQU( 1122515747803382416), KQU(  857664980960597599),
-	KQU( 6343640119895925918), KQU(12878473260854462891),
-	KQU(10036813920765722626), KQU(14451335468363173812),
-	KQU( 5476809692401102807), KQU(16442255173514366342),
-	KQU(13060203194757167104), KQU(14354124071243177715),
-	KQU(15961249405696125227), KQU(13703893649690872584),
-	KQU(  363907326340340064), KQU( 6247455540491754842),
-	KQU(12242249332757832361), KQU(  156065475679796717),
-	KQU( 9351116235749732355), KQU( 4590350628677701405),
-	KQU( 1671195940982350389), KQU(13501398458898451905),
-	KQU( 6526341991225002255), KQU( 1689782913778157592),
-	KQU( 7439222350869010334), KQU(13975150263226478308),
-	KQU(11411961169932682710), KQU(17204271834833847277),
-	KQU(  541534742544435367), KQU( 6591191931218949684),
-	KQU( 2645454775478232486), KQU( 4322857481256485321),
-	KQU( 8477416487553065110), KQU(12902505428548435048),
-	KQU(  971445777981341415), KQU(14995104682744976712),
-	KQU( 4243341648807158063), KQU( 8695061252721927661),
-	KQU( 5028202003270177222), KQU( 2289257340915567840),
-	KQU(13870416345121866007), KQU(13994481698072092233),
-	KQU( 6912785400753196481), KQU( 2278309315841980139),
-	KQU( 4329765449648304839), KQU( 5963108095785485298),
-	KQU( 4880024847478722478), KQU(16015608779890240947),
-	KQU( 1866679034261393544), KQU(  914821179919731519),
-	KQU( 9643404035648760131), KQU( 2418114953615593915),
-	KQU(  944756836073702374), KQU(15186388048737296834),
-	KQU( 7723355336128442206), KQU( 7500747479679599691),
-	KQU(18013961306453293634), KQU( 2315274808095756456),
-	KQU(13655308255424029566), KQU(17203800273561677098),
-	KQU( 1382158694422087756), KQU( 5090390250309588976),
-	KQU(  517170818384213989), KQU( 1612709252627729621),
-	KQU( 1330118955572449606), KQU(  300922478056709885),
-	KQU(18115693291289091987), KQU(13491407109725238321),
-	KQU(15293714633593827320), KQU( 5151539373053314504),
-	KQU( 5951523243743139207), KQU(14459112015249527975),
-	KQU( 5456113959000700739), KQU( 3877918438464873016),
-	KQU(12534071654260163555), KQU(15871678376893555041),
-	KQU(11005484805712025549), KQU(16353066973143374252),
-	KQU( 4358331472063256685), KQU( 8268349332210859288),
-	KQU(12485161590939658075), KQU(13955993592854471343),
-	KQU( 5911446886848367039), KQU(14925834086813706974),
-	KQU( 6590362597857994805), KQU( 1280544923533661875),
-	KQU( 1637756018947988164), KQU( 4734090064512686329),
-	KQU(16693705263131485912), KQU( 6834882340494360958),
-	KQU( 8120732176159658505), KQU( 2244371958905329346),
-	KQU(10447499707729734021), KQU( 7318742361446942194),
-	KQU( 8032857516355555296), KQU(14023605983059313116),
-	KQU( 1032336061815461376), KQU( 9840995337876562612),
-	KQU( 9869256223029203587), KQU(12227975697177267636),
-	KQU(12728115115844186033), KQU( 7752058479783205470),
-	KQU(  729733219713393087), KQU(12954017801239007622)
-};
-static const uint64_t init_by_array_64_expected[] = {
-	KQU( 2100341266307895239), KQU( 8344256300489757943),
-	KQU(15687933285484243894), KQU( 8268620370277076319),
-	KQU(12371852309826545459), KQU( 8800491541730110238),
-	KQU(18113268950100835773), KQU( 2886823658884438119),
-	KQU( 3293667307248180724), KQU( 9307928143300172731),
-	KQU( 7688082017574293629), KQU(  900986224735166665),
-	KQU( 9977972710722265039), KQU( 6008205004994830552),
-	KQU(  546909104521689292), KQU( 7428471521869107594),
-	KQU(14777563419314721179), KQU(16116143076567350053),
-	KQU( 5322685342003142329), KQU( 4200427048445863473),
-	KQU( 4693092150132559146), KQU(13671425863759338582),
-	KQU( 6747117460737639916), KQU( 4732666080236551150),
-	KQU( 5912839950611941263), KQU( 3903717554504704909),
-	KQU( 2615667650256786818), KQU(10844129913887006352),
-	KQU(13786467861810997820), KQU(14267853002994021570),
-	KQU(13767807302847237439), KQU(16407963253707224617),
-	KQU( 4802498363698583497), KQU( 2523802839317209764),
-	KQU( 3822579397797475589), KQU( 8950320572212130610),
-	KQU( 3745623504978342534), KQU(16092609066068482806),
-	KQU( 9817016950274642398), KQU(10591660660323829098),
-	KQU(11751606650792815920), KQU( 5122873818577122211),
-	KQU(17209553764913936624), KQU( 6249057709284380343),
-	KQU(15088791264695071830), KQU(15344673071709851930),
-	KQU( 4345751415293646084), KQU( 2542865750703067928),
-	KQU(13520525127852368784), KQU(18294188662880997241),
-	KQU( 3871781938044881523), KQU( 2873487268122812184),
-	KQU(15099676759482679005), KQU(15442599127239350490),
-	KQU( 6311893274367710888), KQU( 3286118760484672933),
-	KQU( 4146067961333542189), KQU(13303942567897208770),
-	KQU( 8196013722255630418), KQU( 4437815439340979989),
-	KQU(15433791533450605135), KQU( 4254828956815687049),
-	KQU( 1310903207708286015), KQU(10529182764462398549),
-	KQU(14900231311660638810), KQU( 9727017277104609793),
-	KQU( 1821308310948199033), KQU(11628861435066772084),
-	KQU( 9469019138491546924), KQU( 3145812670532604988),
-	KQU( 9938468915045491919), KQU( 1562447430672662142),
-	KQU(13963995266697989134), KQU( 3356884357625028695),
-	KQU( 4499850304584309747), KQU( 8456825817023658122),
-	KQU(10859039922814285279), KQU( 8099512337972526555),
-	KQU(  348006375109672149), KQU(11919893998241688603),
-	KQU( 1104199577402948826), KQU(16689191854356060289),
-	KQU(10992552041730168078), KQU( 7243733172705465836),
-	KQU( 5668075606180319560), KQU(18182847037333286970),
-	KQU( 4290215357664631322), KQU( 4061414220791828613),
-	KQU(13006291061652989604), KQU( 7140491178917128798),
-	KQU(12703446217663283481), KQU( 5500220597564558267),
-	KQU(10330551509971296358), KQU(15958554768648714492),
-	KQU( 5174555954515360045), KQU( 1731318837687577735),
-	KQU( 3557700801048354857), KQU(13764012341928616198),
-	KQU(13115166194379119043), KQU( 7989321021560255519),
-	KQU( 2103584280905877040), KQU( 9230788662155228488),
-	KQU(16396629323325547654), KQU(  657926409811318051),
-	KQU(15046700264391400727), KQU( 5120132858771880830),
-	KQU( 7934160097989028561), KQU( 6963121488531976245),
-	KQU(17412329602621742089), KQU(15144843053931774092),
-	KQU(17204176651763054532), KQU(13166595387554065870),
-	KQU( 8590377810513960213), KQU( 5834365135373991938),
-	KQU( 7640913007182226243), KQU( 3479394703859418425),
-	KQU(16402784452644521040), KQU( 4993979809687083980),
-	KQU(13254522168097688865), KQU(15643659095244365219),
-	KQU( 5881437660538424982), KQU(11174892200618987379),
-	KQU(  254409966159711077), KQU(17158413043140549909),
-	KQU( 3638048789290376272), KQU( 1376816930299489190),
-	KQU( 4622462095217761923), KQU(15086407973010263515),
-	KQU(13253971772784692238), KQU( 5270549043541649236),
-	KQU(11182714186805411604), KQU(12283846437495577140),
-	KQU( 5297647149908953219), KQU(10047451738316836654),
-	KQU( 4938228100367874746), KQU(12328523025304077923),
-	KQU( 3601049438595312361), KQU( 9313624118352733770),
-	KQU(13322966086117661798), KQU(16660005705644029394),
-	KQU(11337677526988872373), KQU(13869299102574417795),
-	KQU(15642043183045645437), KQU( 3021755569085880019),
-	KQU( 4979741767761188161), KQU(13679979092079279587),
-	KQU( 3344685842861071743), KQU(13947960059899588104),
-	KQU(  305806934293368007), KQU( 5749173929201650029),
-	KQU(11123724852118844098), KQU(15128987688788879802),
-	KQU(15251651211024665009), KQU( 7689925933816577776),
-	KQU(16732804392695859449), KQU(17087345401014078468),
-	KQU(14315108589159048871), KQU( 4820700266619778917),
-	KQU(16709637539357958441), KQU( 4936227875177351374),
-	KQU( 2137907697912987247), KQU(11628565601408395420),
-	KQU( 2333250549241556786), KQU( 5711200379577778637),
-	KQU( 5170680131529031729), KQU(12620392043061335164),
-	KQU(   95363390101096078), KQU( 5487981914081709462),
-	KQU( 1763109823981838620), KQU( 3395861271473224396),
-	KQU( 1300496844282213595), KQU( 6894316212820232902),
-	KQU(10673859651135576674), KQU( 5911839658857903252),
-	KQU(17407110743387299102), KQU( 8257427154623140385),
-	KQU(11389003026741800267), KQU( 4070043211095013717),
-	KQU(11663806997145259025), KQU(15265598950648798210),
-	KQU(  630585789434030934), KQU( 3524446529213587334),
-	KQU( 7186424168495184211), KQU(10806585451386379021),
-	KQU(11120017753500499273), KQU( 1586837651387701301),
-	KQU(17530454400954415544), KQU( 9991670045077880430),
-	KQU( 7550997268990730180), KQU( 8640249196597379304),
-	KQU( 3522203892786893823), KQU(10401116549878854788),
-	KQU(13690285544733124852), KQU( 8295785675455774586),
-	KQU(15535716172155117603), KQU( 3112108583723722511),
-	KQU(17633179955339271113), KQU(18154208056063759375),
-	KQU( 1866409236285815666), KQU(13326075895396412882),
-	KQU( 8756261842948020025), KQU( 6281852999868439131),
-	KQU(15087653361275292858), KQU(10333923911152949397),
-	KQU( 5265567645757408500), KQU(12728041843210352184),
-	KQU( 6347959327507828759), KQU(  154112802625564758),
-	KQU(18235228308679780218), KQU( 3253805274673352418),
-	KQU( 4849171610689031197), KQU(17948529398340432518),
-	KQU(13803510475637409167), KQU(13506570190409883095),
-	KQU(15870801273282960805), KQU( 8451286481299170773),
-	KQU( 9562190620034457541), KQU( 8518905387449138364),
-	KQU(12681306401363385655), KQU( 3788073690559762558),
-	KQU( 5256820289573487769), KQU( 2752021372314875467),
-	KQU( 6354035166862520716), KQU( 4328956378309739069),
-	KQU(  449087441228269600), KQU( 5533508742653090868),
-	KQU( 1260389420404746988), KQU(18175394473289055097),
-	KQU( 1535467109660399420), KQU( 8818894282874061442),
-	KQU(12140873243824811213), KQU(15031386653823014946),
-	KQU( 1286028221456149232), KQU( 6329608889367858784),
-	KQU( 9419654354945132725), KQU( 6094576547061672379),
-	KQU(17706217251847450255), KQU( 1733495073065878126),
-	KQU(16918923754607552663), KQU( 8881949849954945044),
-	KQU(12938977706896313891), KQU(14043628638299793407),
-	KQU(18393874581723718233), KQU( 6886318534846892044),
-	KQU(14577870878038334081), KQU(13541558383439414119),
-	KQU(13570472158807588273), KQU(18300760537910283361),
-	KQU(  818368572800609205), KQU( 1417000585112573219),
-	KQU(12337533143867683655), KQU(12433180994702314480),
-	KQU(  778190005829189083), KQU(13667356216206524711),
-	KQU( 9866149895295225230), KQU(11043240490417111999),
-	KQU( 1123933826541378598), KQU( 6469631933605123610),
-	KQU(14508554074431980040), KQU(13918931242962026714),
-	KQU( 2870785929342348285), KQU(14786362626740736974),
-	KQU(13176680060902695786), KQU( 9591778613541679456),
-	KQU( 9097662885117436706), KQU(  749262234240924947),
-	KQU( 1944844067793307093), KQU( 4339214904577487742),
-	KQU( 8009584152961946551), KQU(16073159501225501777),
-	KQU( 3335870590499306217), KQU(17088312653151202847),
-	KQU( 3108893142681931848), KQU(16636841767202792021),
-	KQU(10423316431118400637), KQU( 8008357368674443506),
-	KQU(11340015231914677875), KQU(17687896501594936090),
-	KQU(15173627921763199958), KQU(  542569482243721959),
-	KQU(15071714982769812975), KQU( 4466624872151386956),
-	KQU( 1901780715602332461), KQU( 9822227742154351098),
-	KQU( 1479332892928648780), KQU( 6981611948382474400),
-	KQU( 7620824924456077376), KQU(14095973329429406782),
-	KQU( 7902744005696185404), KQU(15830577219375036920),
-	KQU(10287076667317764416), KQU(12334872764071724025),
-	KQU( 4419302088133544331), KQU(14455842851266090520),
-	KQU(12488077416504654222), KQU( 7953892017701886766),
-	KQU( 6331484925529519007), KQU( 4902145853785030022),
-	KQU(17010159216096443073), KQU(11945354668653886087),
-	KQU(15112022728645230829), KQU(17363484484522986742),
-	KQU( 4423497825896692887), KQU( 8155489510809067471),
-	KQU(  258966605622576285), KQU( 5462958075742020534),
-	KQU( 6763710214913276228), KQU( 2368935183451109054),
-	KQU(14209506165246453811), KQU( 2646257040978514881),
-	KQU( 3776001911922207672), KQU( 1419304601390147631),
-	KQU(14987366598022458284), KQU( 3977770701065815721),
-	KQU(  730820417451838898), KQU( 3982991703612885327),
-	KQU( 2803544519671388477), KQU(17067667221114424649),
-	KQU( 2922555119737867166), KQU( 1989477584121460932),
-	KQU(15020387605892337354), KQU( 9293277796427533547),
-	KQU(10722181424063557247), KQU(16704542332047511651),
-	KQU( 5008286236142089514), KQU(16174732308747382540),
-	KQU(17597019485798338402), KQU(13081745199110622093),
-	KQU( 8850305883842258115), KQU(12723629125624589005),
-	KQU( 8140566453402805978), KQU(15356684607680935061),
-	KQU(14222190387342648650), KQU(11134610460665975178),
-	KQU( 1259799058620984266), KQU(13281656268025610041),
-	KQU(  298262561068153992), KQU(12277871700239212922),
-	KQU(13911297774719779438), KQU(16556727962761474934),
-	KQU(17903010316654728010), KQU( 9682617699648434744),
-	KQU(14757681836838592850), KQU( 1327242446558524473),
-	KQU(11126645098780572792), KQU( 1883602329313221774),
-	KQU( 2543897783922776873), KQU(15029168513767772842),
-	KQU(12710270651039129878), KQU(16118202956069604504),
-	KQU(15010759372168680524), KQU( 2296827082251923948),
-	KQU(10793729742623518101), KQU(13829764151845413046),
-	KQU(17769301223184451213), KQU( 3118268169210783372),
-	KQU(17626204544105123127), KQU( 7416718488974352644),
-	KQU(10450751996212925994), KQU( 9352529519128770586),
-	KQU(  259347569641110140), KQU( 8048588892269692697),
-	KQU( 1774414152306494058), KQU(10669548347214355622),
-	KQU(13061992253816795081), KQU(18432677803063861659),
-	KQU( 8879191055593984333), KQU(12433753195199268041),
-	KQU(14919392415439730602), KQU( 6612848378595332963),
-	KQU( 6320986812036143628), KQU(10465592420226092859),
-	KQU( 4196009278962570808), KQU( 3747816564473572224),
-	KQU(17941203486133732898), KQU( 2350310037040505198),
-	KQU( 5811779859134370113), KQU(10492109599506195126),
-	KQU( 7699650690179541274), KQU( 1954338494306022961),
-	KQU(14095816969027231152), KQU( 5841346919964852061),
-	KQU(14945969510148214735), KQU( 3680200305887550992),
-	KQU( 6218047466131695792), KQU( 8242165745175775096),
-	KQU(11021371934053307357), KQU( 1265099502753169797),
-	KQU( 4644347436111321718), KQU( 3609296916782832859),
-	KQU( 8109807992218521571), KQU(18387884215648662020),
-	KQU(14656324896296392902), KQU(17386819091238216751),
-	KQU(17788300878582317152), KQU( 7919446259742399591),
-	KQU( 4466613134576358004), KQU(12928181023667938509),
-	KQU(13147446154454932030), KQU(16552129038252734620),
-	KQU( 8395299403738822450), KQU(11313817655275361164),
-	KQU(  434258809499511718), KQU( 2074882104954788676),
-	KQU( 7929892178759395518), KQU( 9006461629105745388),
-	KQU( 5176475650000323086), KQU(11128357033468341069),
-	KQU(12026158851559118955), KQU(14699716249471156500),
-	KQU(  448982497120206757), KQU( 4156475356685519900),
-	KQU( 6063816103417215727), KQU(10073289387954971479),
-	KQU( 8174466846138590962), KQU( 2675777452363449006),
-	KQU( 9090685420572474281), KQU( 6659652652765562060),
-	KQU(12923120304018106621), KQU(11117480560334526775),
-	KQU(  937910473424587511), KQU( 1838692113502346645),
-	KQU(11133914074648726180), KQU( 7922600945143884053),
-	KQU(13435287702700959550), KQU( 5287964921251123332),
-	KQU(11354875374575318947), KQU(17955724760748238133),
-	KQU(13728617396297106512), KQU( 4107449660118101255),
-	KQU( 1210269794886589623), KQU(11408687205733456282),
-	KQU( 4538354710392677887), KQU(13566803319341319267),
-	KQU(17870798107734050771), KQU( 3354318982568089135),
-	KQU( 9034450839405133651), KQU(13087431795753424314),
-	KQU(  950333102820688239), KQU( 1968360654535604116),
-	KQU(16840551645563314995), KQU( 8867501803892924995),
-	KQU(11395388644490626845), KQU( 1529815836300732204),
-	KQU(13330848522996608842), KQU( 1813432878817504265),
-	KQU( 2336867432693429560), KQU(15192805445973385902),
-	KQU( 2528593071076407877), KQU(  128459777936689248),
-	KQU( 9976345382867214866), KQU( 6208885766767996043),
-	KQU(14982349522273141706), KQU( 3099654362410737822),
-	KQU(13776700761947297661), KQU( 8806185470684925550),
-	KQU( 8151717890410585321), KQU(  640860591588072925),
-	KQU(14592096303937307465), KQU( 9056472419613564846),
-	KQU(14861544647742266352), KQU(12703771500398470216),
-	KQU( 3142372800384138465), KQU( 6201105606917248196),
-	KQU(18337516409359270184), KQU(15042268695665115339),
-	KQU(15188246541383283846), KQU(12800028693090114519),
-	KQU( 5992859621101493472), KQU(18278043971816803521),
-	KQU( 9002773075219424560), KQU( 7325707116943598353),
-	KQU( 7930571931248040822), KQU( 5645275869617023448),
-	KQU( 7266107455295958487), KQU( 4363664528273524411),
-	KQU(14313875763787479809), KQU(17059695613553486802),
-	KQU( 9247761425889940932), KQU(13704726459237593128),
-	KQU( 2701312427328909832), KQU(17235532008287243115),
-	KQU(14093147761491729538), KQU( 6247352273768386516),
-	KQU( 8268710048153268415), KQU( 7985295214477182083),
-	KQU(15624495190888896807), KQU( 3772753430045262788),
-	KQU( 9133991620474991698), KQU( 5665791943316256028),
-	KQU( 7551996832462193473), KQU(13163729206798953877),
-	KQU( 9263532074153846374), KQU( 1015460703698618353),
-	KQU(17929874696989519390), KQU(18257884721466153847),
-	KQU(16271867543011222991), KQU( 3905971519021791941),
-	KQU(16814488397137052085), KQU( 1321197685504621613),
-	KQU( 2870359191894002181), KQU(14317282970323395450),
-	KQU(13663920845511074366), KQU( 2052463995796539594),
-	KQU(14126345686431444337), KQU( 1727572121947022534),
-	KQU(17793552254485594241), KQU( 6738857418849205750),
-	KQU( 1282987123157442952), KQU(16655480021581159251),
-	KQU( 6784587032080183866), KQU(14726758805359965162),
-	KQU( 7577995933961987349), KQU(12539609320311114036),
-	KQU(10789773033385439494), KQU( 8517001497411158227),
-	KQU(10075543932136339710), KQU(14838152340938811081),
-	KQU( 9560840631794044194), KQU(17445736541454117475),
-	KQU(10633026464336393186), KQU(15705729708242246293),
-	KQU( 1117517596891411098), KQU( 4305657943415886942),
-	KQU( 4948856840533979263), KQU(16071681989041789593),
-	KQU(13723031429272486527), KQU( 7639567622306509462),
-	KQU(12670424537483090390), KQU( 9715223453097197134),
-	KQU( 5457173389992686394), KQU(  289857129276135145),
-	KQU(17048610270521972512), KQU(  692768013309835485),
-	KQU(14823232360546632057), KQU(18218002361317895936),
-	KQU( 3281724260212650204), KQU(16453957266549513795),
-	KQU( 8592711109774511881), KQU(  929825123473369579),
-	KQU(15966784769764367791), KQU( 9627344291450607588),
-	KQU(10849555504977813287), KQU( 9234566913936339275),
-	KQU( 6413807690366911210), KQU(10862389016184219267),
-	KQU(13842504799335374048), KQU( 1531994113376881174),
-	KQU( 2081314867544364459), KQU(16430628791616959932),
-	KQU( 8314714038654394368), KQU( 9155473892098431813),
-	KQU(12577843786670475704), KQU( 4399161106452401017),
-	KQU( 1668083091682623186), KQU( 1741383777203714216),
-	KQU( 2162597285417794374), KQU(15841980159165218736),
-	KQU( 1971354603551467079), KQU( 1206714764913205968),
-	KQU( 4790860439591272330), KQU(14699375615594055799),
-	KQU( 8374423871657449988), KQU(10950685736472937738),
-	KQU(  697344331343267176), KQU(10084998763118059810),
-	KQU(12897369539795983124), KQU(12351260292144383605),
-	KQU( 1268810970176811234), KQU( 7406287800414582768),
-	KQU(  516169557043807831), KQU( 5077568278710520380),
-	KQU( 3828791738309039304), KQU( 7721974069946943610),
-	KQU( 3534670260981096460), KQU( 4865792189600584891),
-	KQU(16892578493734337298), KQU( 9161499464278042590),
-	KQU(11976149624067055931), KQU(13219479887277343990),
-	KQU(14161556738111500680), KQU(14670715255011223056),
-	KQU( 4671205678403576558), KQU(12633022931454259781),
-	KQU(14821376219869187646), KQU(  751181776484317028),
-	KQU( 2192211308839047070), KQU(11787306362361245189),
-	KQU(10672375120744095707), KQU( 4601972328345244467),
-	KQU(15457217788831125879), KQU( 8464345256775460809),
-	KQU(10191938789487159478), KQU( 6184348739615197613),
-	KQU(11425436778806882100), KQU( 2739227089124319793),
-	KQU(  461464518456000551), KQU( 4689850170029177442),
-	KQU( 6120307814374078625), KQU(11153579230681708671),
-	KQU( 7891721473905347926), KQU(10281646937824872400),
-	KQU( 3026099648191332248), KQU( 8666750296953273818),
-	KQU(14978499698844363232), KQU(13303395102890132065),
-	KQU( 8182358205292864080), KQU(10560547713972971291),
-	KQU(11981635489418959093), KQU( 3134621354935288409),
-	KQU(11580681977404383968), KQU(14205530317404088650),
-	KQU( 5997789011854923157), KQU(13659151593432238041),
-	KQU(11664332114338865086), KQU( 7490351383220929386),
-	KQU( 7189290499881530378), KQU(15039262734271020220),
-	KQU( 2057217285976980055), KQU(  555570804905355739),
-	KQU(11235311968348555110), KQU(13824557146269603217),
-	KQU(16906788840653099693), KQU( 7222878245455661677),
-	KQU( 5245139444332423756), KQU( 4723748462805674292),
-	KQU(12216509815698568612), KQU(17402362976648951187),
-	KQU(17389614836810366768), KQU( 4880936484146667711),
-	KQU( 9085007839292639880), KQU(13837353458498535449),
-	KQU(11914419854360366677), KQU(16595890135313864103),
-	KQU( 6313969847197627222), KQU(18296909792163910431),
-	KQU(10041780113382084042), KQU( 2499478551172884794),
-	KQU(11057894246241189489), KQU( 9742243032389068555),
-	KQU(12838934582673196228), KQU(13437023235248490367),
-	KQU(13372420669446163240), KQU( 6752564244716909224),
-	KQU( 7157333073400313737), KQU(12230281516370654308),
-	KQU( 1182884552219419117), KQU( 2955125381312499218),
-	KQU(10308827097079443249), KQU( 1337648572986534958),
-	KQU(16378788590020343939), KQU(  108619126514420935),
-	KQU( 3990981009621629188), KQU( 5460953070230946410),
-	KQU( 9703328329366531883), KQU(13166631489188077236),
-	KQU( 1104768831213675170), KQU( 3447930458553877908),
-	KQU( 8067172487769945676), KQU( 5445802098190775347),
-	KQU( 3244840981648973873), KQU(17314668322981950060),
-	KQU( 5006812527827763807), KQU(18158695070225526260),
-	KQU( 2824536478852417853), KQU(13974775809127519886),
-	KQU( 9814362769074067392), KQU(17276205156374862128),
-	KQU(11361680725379306967), KQU( 3422581970382012542),
-	KQU(11003189603753241266), KQU(11194292945277862261),
-	KQU( 6839623313908521348), KQU(11935326462707324634),
-	KQU( 1611456788685878444), KQU(13112620989475558907),
-	KQU(  517659108904450427), KQU(13558114318574407624),
-	KQU(15699089742731633077), KQU( 4988979278862685458),
-	KQU( 8111373583056521297), KQU( 3891258746615399627),
-	KQU( 8137298251469718086), KQU(12748663295624701649),
-	KQU( 4389835683495292062), KQU( 5775217872128831729),
-	KQU( 9462091896405534927), KQU( 8498124108820263989),
-	KQU( 8059131278842839525), KQU(10503167994254090892),
-	KQU(11613153541070396656), KQU(18069248738504647790),
-	KQU(  570657419109768508), KQU( 3950574167771159665),
-	KQU( 5514655599604313077), KQU( 2908460854428484165),
-	KQU(10777722615935663114), KQU(12007363304839279486),
-	KQU( 9800646187569484767), KQU( 8795423564889864287),
-	KQU(14257396680131028419), KQU( 6405465117315096498),
-	KQU( 7939411072208774878), KQU(17577572378528990006),
-	KQU(14785873806715994850), KQU(16770572680854747390),
-	KQU(18127549474419396481), KQU(11637013449455757750),
-	KQU(14371851933996761086), KQU( 3601181063650110280),
-	KQU( 4126442845019316144), KQU(10198287239244320669),
-	KQU(18000169628555379659), KQU(18392482400739978269),
-	KQU( 6219919037686919957), KQU( 3610085377719446052),
-	KQU( 2513925039981776336), KQU(16679413537926716955),
-	KQU(12903302131714909434), KQU( 5581145789762985009),
-	KQU(12325955044293303233), KQU(17216111180742141204),
-	KQU( 6321919595276545740), KQU( 3507521147216174501),
-	KQU( 9659194593319481840), KQU(11473976005975358326),
-	KQU(14742730101435987026), KQU(  492845897709954780),
-	KQU(16976371186162599676), KQU(17712703422837648655),
-	KQU( 9881254778587061697), KQU( 8413223156302299551),
-	KQU( 1563841828254089168), KQU( 9996032758786671975),
-	KQU(  138877700583772667), KQU(13003043368574995989),
-	KQU( 4390573668650456587), KQU( 8610287390568126755),
-	KQU(15126904974266642199), KQU( 6703637238986057662),
-	KQU( 2873075592956810157), KQU( 6035080933946049418),
-	KQU(13382846581202353014), KQU( 7303971031814642463),
-	KQU(18418024405307444267), KQU( 5847096731675404647),
-	KQU( 4035880699639842500), KQU(11525348625112218478),
-	KQU( 3041162365459574102), KQU( 2604734487727986558),
-	KQU(15526341771636983145), KQU(14556052310697370254),
-	KQU(12997787077930808155), KQU( 9601806501755554499),
-	KQU(11349677952521423389), KQU(14956777807644899350),
-	KQU(16559736957742852721), KQU(12360828274778140726),
-	KQU( 6685373272009662513), KQU(16932258748055324130),
-	KQU(15918051131954158508), KQU( 1692312913140790144),
-	KQU(  546653826801637367), KQU( 5341587076045986652),
-	KQU(14975057236342585662), KQU(12374976357340622412),
-	KQU(10328833995181940552), KQU(12831807101710443149),
-	KQU(10548514914382545716), KQU( 2217806727199715993),
-	KQU(12627067369242845138), KQU( 4598965364035438158),
-	KQU(  150923352751318171), KQU(14274109544442257283),
-	KQU( 4696661475093863031), KQU( 1505764114384654516),
-	KQU(10699185831891495147), KQU( 2392353847713620519),
-	KQU( 3652870166711788383), KQU( 8640653276221911108),
-	KQU( 3894077592275889704), KQU( 4918592872135964845),
-	KQU(16379121273281400789), KQU(12058465483591683656),
-	KQU(11250106829302924945), KQU( 1147537556296983005),
-	KQU( 6376342756004613268), KQU(14967128191709280506),
-	KQU(18007449949790627628), KQU( 9497178279316537841),
-	KQU( 7920174844809394893), KQU(10037752595255719907),
-	KQU(15875342784985217697), KQU(15311615921712850696),
-	KQU( 9552902652110992950), KQU(14054979450099721140),
-	KQU( 5998709773566417349), KQU(18027910339276320187),
-	KQU( 8223099053868585554), KQU( 7842270354824999767),
-	KQU( 4896315688770080292), KQU(12969320296569787895),
-	KQU( 2674321489185759961), KQU( 4053615936864718439),
-	KQU(11349775270588617578), KQU( 4743019256284553975),
-	KQU( 5602100217469723769), KQU(14398995691411527813),
-	KQU( 7412170493796825470), KQU(  836262406131744846),
-	KQU( 8231086633845153022), KQU( 5161377920438552287),
-	KQU( 8828731196169924949), KQU(16211142246465502680),
-	KQU( 3307990879253687818), KQU( 5193405406899782022),
-	KQU( 8510842117467566693), KQU( 6070955181022405365),
-	KQU(14482950231361409799), KQU(12585159371331138077),
-	KQU( 3511537678933588148), KQU( 2041849474531116417),
-	KQU(10944936685095345792), KQU(18303116923079107729),
-	KQU( 2720566371239725320), KQU( 4958672473562397622),
-	KQU( 3032326668253243412), KQU(13689418691726908338),
-	KQU( 1895205511728843996), KQU( 8146303515271990527),
-	KQU(16507343500056113480), KQU(  473996939105902919),
-	KQU( 9897686885246881481), KQU(14606433762712790575),
-	KQU( 6732796251605566368), KQU( 1399778120855368916),
-	KQU(  935023885182833777), KQU(16066282816186753477),
-	KQU( 7291270991820612055), KQU(17530230393129853844),
-	KQU(10223493623477451366), KQU(15841725630495676683),
-	KQU(17379567246435515824), KQU( 8588251429375561971),
-	KQU(18339511210887206423), KQU(17349587430725976100),
-	KQU(12244876521394838088), KQU( 6382187714147161259),
-	KQU(12335807181848950831), KQU(16948885622305460665),
-	KQU(13755097796371520506), KQU(14806740373324947801),
-	KQU( 4828699633859287703), KQU( 8209879281452301604),
-	KQU(12435716669553736437), KQU(13970976859588452131),
-	KQU( 6233960842566773148), KQU(12507096267900505759),
-	KQU( 1198713114381279421), KQU(14989862731124149015),
-	KQU(15932189508707978949), KQU( 2526406641432708722),
-	KQU(   29187427817271982), KQU( 1499802773054556353),
-	KQU(10816638187021897173), KQU( 5436139270839738132),
-	KQU( 6659882287036010082), KQU( 2154048955317173697),
-	KQU(10887317019333757642), KQU(16281091802634424955),
-	KQU(10754549879915384901), KQU(10760611745769249815),
-	KQU( 2161505946972504002), KQU( 5243132808986265107),
-	KQU(10129852179873415416), KQU(  710339480008649081),
-	KQU( 7802129453068808528), KQU(17967213567178907213),
-	KQU(15730859124668605599), KQU(13058356168962376502),
-	KQU( 3701224985413645909), KQU(14464065869149109264),
-	KQU( 9959272418844311646), KQU(10157426099515958752),
-	KQU(14013736814538268528), KQU(17797456992065653951),
-	KQU(17418878140257344806), KQU(15457429073540561521),
-	KQU( 2184426881360949378), KQU( 2062193041154712416),
-	KQU( 8553463347406931661), KQU( 4913057625202871854),
-	KQU( 2668943682126618425), KQU(17064444737891172288),
-	KQU( 4997115903913298637), KQU(12019402608892327416),
-	KQU(17603584559765897352), KQU(11367529582073647975),
-	KQU( 8211476043518436050), KQU( 8676849804070323674),
-	KQU(18431829230394475730), KQU(10490177861361247904),
-	KQU( 9508720602025651349), KQU( 7409627448555722700),
-	KQU( 5804047018862729008), KQU(11943858176893142594),
-	KQU(11908095418933847092), KQU( 5415449345715887652),
-	KQU( 1554022699166156407), KQU( 9073322106406017161),
-	KQU( 7080630967969047082), KQU(18049736940860732943),
-	KQU(12748714242594196794), KQU( 1226992415735156741),
-	KQU(17900981019609531193), KQU(11720739744008710999),
-	KQU( 3006400683394775434), KQU(11347974011751996028),
-	KQU( 3316999628257954608), KQU( 8384484563557639101),
-	KQU(18117794685961729767), KQU( 1900145025596618194),
-	KQU(17459527840632892676), KQU( 5634784101865710994),
-	KQU( 7918619300292897158), KQU( 3146577625026301350),
-	KQU( 9955212856499068767), KQU( 1873995843681746975),
-	KQU( 1561487759967972194), KQU( 8322718804375878474),
-	KQU(11300284215327028366), KQU( 4667391032508998982),
-	KQU( 9820104494306625580), KQU(17922397968599970610),
-	KQU( 1784690461886786712), KQU(14940365084341346821),
-	KQU( 5348719575594186181), KQU(10720419084507855261),
-	KQU(14210394354145143274), KQU( 2426468692164000131),
-	KQU(16271062114607059202), KQU(14851904092357070247),
-	KQU( 6524493015693121897), KQU( 9825473835127138531),
-	KQU(14222500616268569578), KQU(15521484052007487468),
-	KQU(14462579404124614699), KQU(11012375590820665520),
-	KQU(11625327350536084927), KQU(14452017765243785417),
-	KQU( 9989342263518766305), KQU( 3640105471101803790),
-	KQU( 4749866455897513242), KQU(13963064946736312044),
-	KQU(10007416591973223791), KQU(18314132234717431115),
-	KQU( 3286596588617483450), KQU( 7726163455370818765),
-	KQU( 7575454721115379328), KQU( 5308331576437663422),
-	KQU(18288821894903530934), KQU( 8028405805410554106),
-	KQU(15744019832103296628), KQU(  149765559630932100),
-	KQU( 6137705557200071977), KQU(14513416315434803615),
-	KQU(11665702820128984473), KQU(  218926670505601386),
-	KQU( 6868675028717769519), KQU(15282016569441512302),
-	KQU( 5707000497782960236), KQU( 6671120586555079567),
-	KQU( 2194098052618985448), KQU(16849577895477330978),
-	KQU(12957148471017466283), KQU( 1997805535404859393),
-	KQU( 1180721060263860490), KQU(13206391310193756958),
-	KQU(12980208674461861797), KQU( 3825967775058875366),
-	KQU(17543433670782042631), KQU( 1518339070120322730),
-	KQU(16344584340890991669), KQU( 2611327165318529819),
-	KQU(11265022723283422529), KQU( 4001552800373196817),
-	KQU(14509595890079346161), KQU( 3528717165416234562),
-	KQU(18153222571501914072), KQU( 9387182977209744425),
-	KQU(10064342315985580021), KQU(11373678413215253977),
-	KQU( 2308457853228798099), KQU( 9729042942839545302),
-	KQU( 7833785471140127746), KQU( 6351049900319844436),
-	KQU(14454610627133496067), KQU(12533175683634819111),
-	KQU(15570163926716513029), KQU(13356980519185762498)
-};
+static const uint32_t init_gen_rand_32_expected[] = {3440181298U, 1564997079U,
+    1510669302U, 2930277156U, 1452439940U, 3796268453U, 423124208U, 2143818589U,
+    3827219408U, 2987036003U, 2674978610U, 1536842514U, 2027035537U,
+    2534897563U, 1686527725U, 545368292U, 1489013321U, 1370534252U, 4231012796U,
+    3994803019U, 1764869045U, 824597505U, 862581900U, 2469764249U, 812862514U,
+    359318673U, 116957936U, 3367389672U, 2327178354U, 1898245200U, 3206507879U,
+    2378925033U, 1040214787U, 2524778605U, 3088428700U, 1417665896U, 964324147U,
+    2282797708U, 2456269299U, 313400376U, 2245093271U, 1015729427U, 2694465011U,
+    3246975184U, 1992793635U, 463679346U, 3721104591U, 3475064196U, 856141236U,
+    1499559719U, 3522818941U, 3721533109U, 1954826617U, 1282044024U,
+    1543279136U, 1301863085U, 2669145051U, 4221477354U, 3896016841U,
+    3392740262U, 462466863U, 1037679449U, 1228140306U, 922298197U, 1205109853U,
+    1872938061U, 3102547608U, 2742766808U, 1888626088U, 4028039414U, 157593879U,
+    1136901695U, 4038377686U, 3572517236U, 4231706728U, 2997311961U,
+    1189931652U, 3981543765U, 2826166703U, 87159245U, 1721379072U, 3897926942U,
+    1790395498U, 2569178939U, 1047368729U, 2340259131U, 3144212906U,
+    2301169789U, 2442885464U, 3034046771U, 3667880593U, 3935928400U,
+    2372805237U, 1666397115U, 2460584504U, 513866770U, 3810869743U, 2147400037U,
+    2792078025U, 2941761810U, 3212265810U, 984692259U, 346590253U, 1804179199U,
+    3298543443U, 750108141U, 2880257022U, 243310542U, 1869036465U, 1588062513U,
+    2983949551U, 1931450364U, 4034505847U, 2735030199U, 1628461061U,
+    2539522841U, 127965585U, 3992448871U, 913388237U, 559130076U, 1202933193U,
+    4087643167U, 2590021067U, 2256240196U, 1746697293U, 1013913783U,
+    1155864921U, 2715773730U, 915061862U, 1948766573U, 2322882854U, 3761119102U,
+    1343405684U, 3078711943U, 3067431651U, 3245156316U, 3588354584U,
+    3484623306U, 3899621563U, 4156689741U, 3237090058U, 3880063844U, 862416318U,
+    4039923869U, 2303788317U, 3073590536U, 701653667U, 2131530884U, 3169309950U,
+    2028486980U, 747196777U, 3620218225U, 432016035U, 1449580595U, 2772266392U,
+    444224948U, 1662832057U, 3184055582U, 3028331792U, 1861686254U, 1104864179U,
+    342430307U, 1350510923U, 3024656237U, 1028417492U, 2870772950U, 290847558U,
+    3675663500U, 508431529U, 4264340390U, 2263569913U, 1669302976U, 519511383U,
+    2706411211U, 3764615828U, 3883162495U, 4051445305U, 2412729798U,
+    3299405164U, 3991911166U, 2348767304U, 2664054906U, 3763609282U, 593943581U,
+    3757090046U, 2075338894U, 2020550814U, 4287452920U, 4290140003U,
+    1422957317U, 2512716667U, 2003485045U, 2307520103U, 2288472169U,
+    3940751663U, 4204638664U, 2892583423U, 1710068300U, 3904755993U,
+    2363243951U, 3038334120U, 547099465U, 771105860U, 3199983734U, 4282046461U,
+    2298388363U, 934810218U, 2837827901U, 3952500708U, 2095130248U, 3083335297U,
+    26885281U, 3932155283U, 1531751116U, 1425227133U, 495654159U, 3279634176U,
+    3855562207U, 3957195338U, 4159985527U, 893375062U, 1875515536U, 1327247422U,
+    3754140693U, 1028923197U, 1729880440U, 805571298U, 448971099U, 2726757106U,
+    2749436461U, 2485987104U, 175337042U, 3235477922U, 3882114302U, 2020970972U,
+    943926109U, 2762587195U, 1904195558U, 3452650564U, 108432281U, 3893463573U,
+    3977583081U, 2636504348U, 1110673525U, 3548479841U, 4258854744U, 980047703U,
+    4057175418U, 3890008292U, 145653646U, 3141868989U, 3293216228U, 1194331837U,
+    1254570642U, 3049934521U, 2868313360U, 2886032750U, 1110873820U, 279553524U,
+    3007258565U, 1104807822U, 3186961098U, 315764646U, 2163680838U, 3574508994U,
+    3099755655U, 191957684U, 3642656737U, 3317946149U, 3522087636U, 444526410U,
+    779157624U, 1088229627U, 1092460223U, 1856013765U, 3659877367U, 368270451U,
+    503570716U, 3000984671U, 2742789647U, 928097709U, 2914109539U, 308843566U,
+    2816161253U, 3667192079U, 2762679057U, 3395240989U, 2928925038U,
+    1491465914U, 3458702834U, 3787782576U, 2894104823U, 1296880455U,
+    1253636503U, 989959407U, 2291560361U, 2776790436U, 1913178042U, 1584677829U,
+    689637520U, 1898406878U, 688391508U, 3385234998U, 845493284U, 1943591856U,
+    2720472050U, 222695101U, 1653320868U, 2904632120U, 4084936008U, 1080720688U,
+    3938032556U, 387896427U, 2650839632U, 99042991U, 1720913794U, 1047186003U,
+    1877048040U, 2090457659U, 517087501U, 4172014665U, 2129713163U, 2413533132U,
+    2760285054U, 4129272496U, 1317737175U, 2309566414U, 2228873332U,
+    3889671280U, 1110864630U, 3576797776U, 2074552772U, 832002644U, 3097122623U,
+    2464859298U, 2679603822U, 1667489885U, 3237652716U, 1478413938U,
+    1719340335U, 2306631119U, 639727358U, 3369698270U, 226902796U, 2099920751U,
+    1892289957U, 2201594097U, 3508197013U, 3495811856U, 3900381493U, 841660320U,
+    3974501451U, 3360949056U, 1676829340U, 728899254U, 2047809627U, 2390948962U,
+    670165943U, 3412951831U, 4189320049U, 1911595255U, 2055363086U, 507170575U,
+    418219594U, 4141495280U, 2692088692U, 4203630654U, 3540093932U, 791986533U,
+    2237921051U, 2526864324U, 2956616642U, 1394958700U, 1983768223U,
+    1893373266U, 591653646U, 228432437U, 1611046598U, 3007736357U, 1040040725U,
+    2726180733U, 2789804360U, 4263568405U, 829098158U, 3847722805U, 1123578029U,
+    1804276347U, 997971319U, 4203797076U, 4185199713U, 2811733626U, 2343642194U,
+    2985262313U, 1417930827U, 3759587724U, 1967077982U, 1585223204U,
+    1097475516U, 1903944948U, 740382444U, 1114142065U, 1541796065U, 1718384172U,
+    1544076191U, 1134682254U, 3519754455U, 2866243923U, 341865437U, 645498576U,
+    2690735853U, 1046963033U, 2493178460U, 1187604696U, 1619577821U, 488503634U,
+    3255768161U, 2306666149U, 1630514044U, 2377698367U, 2751503746U,
+    3794467088U, 1796415981U, 3657173746U, 409136296U, 1387122342U, 1297726519U,
+    219544855U, 4270285558U, 437578827U, 1444698679U, 2258519491U, 963109892U,
+    3982244073U, 3351535275U, 385328496U, 1804784013U, 698059346U, 3920535147U,
+    708331212U, 784338163U, 785678147U, 1238376158U, 1557298846U, 2037809321U,
+    271576218U, 4145155269U, 1913481602U, 2763691931U, 588981080U, 1201098051U,
+    3717640232U, 1509206239U, 662536967U, 3180523616U, 1133105435U, 2963500837U,
+    2253971215U, 3153642623U, 1066925709U, 2582781958U, 3034720222U,
+    1090798544U, 2942170004U, 4036187520U, 686972531U, 2610990302U, 2641437026U,
+    1837562420U, 722096247U, 1315333033U, 2102231203U, 3402389208U, 3403698140U,
+    1312402831U, 2898426558U, 814384596U, 385649582U, 1916643285U, 1924625106U,
+    2512905582U, 2501170304U, 4275223366U, 2841225246U, 1467663688U,
+    3563567847U, 2969208552U, 884750901U, 102992576U, 227844301U, 3681442994U,
+    3502881894U, 4034693299U, 1166727018U, 1697460687U, 1737778332U,
+    1787161139U, 1053003655U, 1215024478U, 2791616766U, 2525841204U,
+    1629323443U, 3233815U, 2003823032U, 3083834263U, 2379264872U, 3752392312U,
+    1287475550U, 3770904171U, 3004244617U, 1502117784U, 918698423U, 2419857538U,
+    3864502062U, 1751322107U, 2188775056U, 4018728324U, 983712955U, 440071928U,
+    3710838677U, 2001027698U, 3994702151U, 22493119U, 3584400918U, 3446253670U,
+    4254789085U, 1405447860U, 1240245579U, 1800644159U, 1661363424U,
+    3278326132U, 3403623451U, 67092802U, 2609352193U, 3914150340U, 1814842761U,
+    3610830847U, 591531412U, 3880232807U, 1673505890U, 2585326991U, 1678544474U,
+    3148435887U, 3457217359U, 1193226330U, 2816576908U, 154025329U, 121678860U,
+    1164915738U, 973873761U, 269116100U, 52087970U, 744015362U, 498556057U,
+    94298882U, 1563271621U, 2383059628U, 4197367290U, 3958472990U, 2592083636U,
+    2906408439U, 1097742433U, 3924840517U, 264557272U, 2292287003U, 3203307984U,
+    4047038857U, 3820609705U, 2333416067U, 1839206046U, 3600944252U,
+    3412254904U, 583538222U, 2390557166U, 4140459427U, 2810357445U, 226777499U,
+    2496151295U, 2207301712U, 3283683112U, 611630281U, 1933218215U, 3315610954U,
+    3889441987U, 3719454256U, 3957190521U, 1313998161U, 2365383016U,
+    3146941060U, 1801206260U, 796124080U, 2076248581U, 1747472464U, 3254365145U,
+    595543130U, 3573909503U, 3758250204U, 2020768540U, 2439254210U, 93368951U,
+    3155792250U, 2600232980U, 3709198295U, 3894900440U, 2971850836U,
+    1578909644U, 1443493395U, 2581621665U, 3086506297U, 2443465861U, 558107211U,
+    1519367835U, 249149686U, 908102264U, 2588765675U, 1232743965U, 1001330373U,
+    3561331654U, 2259301289U, 1564977624U, 3835077093U, 727244906U, 4255738067U,
+    1214133513U, 2570786021U, 3899704621U, 1633861986U, 1636979509U,
+    1438500431U, 58463278U, 2823485629U, 2297430187U, 2926781924U, 3371352948U,
+    1864009023U, 2722267973U, 1444292075U, 437703973U, 1060414512U, 189705863U,
+    910018135U, 4077357964U, 884213423U, 2644986052U, 3973488374U, 1187906116U,
+    2331207875U, 780463700U, 3713351662U, 3854611290U, 412805574U, 2978462572U,
+    2176222820U, 829424696U, 2790788332U, 2750819108U, 1594611657U, 3899878394U,
+    3032870364U, 1702887682U, 1948167778U, 14130042U, 192292500U, 947227076U,
+    90719497U, 3854230320U, 784028434U, 2142399787U, 1563449646U, 2844400217U,
+    819143172U, 2883302356U, 2328055304U, 1328532246U, 2603885363U, 3375188924U,
+    933941291U, 3627039714U, 2129697284U, 2167253953U, 2506905438U, 1412424497U,
+    2981395985U, 1418359660U, 2925902456U, 52752784U, 3713667988U, 3924669405U,
+    648975707U, 1145520213U, 4018650664U, 3805915440U, 2380542088U, 2013260958U,
+    3262572197U, 2465078101U, 1114540067U, 3728768081U, 2396958768U, 590672271U,
+    904818725U, 4263660715U, 700754408U, 1042601829U, 4094111823U, 4274838909U,
+    2512692617U, 2774300207U, 2057306915U, 3470942453U, 99333088U, 1142661026U,
+    2889931380U, 14316674U, 2201179167U, 415289459U, 448265759U, 3515142743U,
+    3254903683U, 246633281U, 1184307224U, 2418347830U, 2092967314U, 2682072314U,
+    2558750234U, 2000352263U, 1544150531U, 399010405U, 1513946097U, 499682937U,
+    461167460U, 3045570638U, 1633669705U, 851492362U, 4052801922U, 2055266765U,
+    635556996U, 368266356U, 2385737383U, 3218202352U, 2603772408U, 349178792U,
+    226482567U, 3102426060U, 3575998268U, 2103001871U, 3243137071U, 225500688U,
+    1634718593U, 4283311431U, 4292122923U, 3842802787U, 811735523U, 105712518U,
+    663434053U, 1855889273U, 2847972595U, 1196355421U, 2552150115U, 4254510614U,
+    3752181265U, 3430721819U, 3828705396U, 3436287905U, 3441964937U,
+    4123670631U, 353001539U, 459496439U, 3799690868U, 1293777660U, 2761079737U,
+    498096339U, 3398433374U, 4080378380U, 2304691596U, 2995729055U, 4134660419U,
+    3903444024U, 3576494993U, 203682175U, 3321164857U, 2747963611U, 79749085U,
+    2992890370U, 1240278549U, 1772175713U, 2111331972U, 2655023449U,
+    1683896345U, 2836027212U, 3482868021U, 2489884874U, 756853961U, 2298874501U,
+    4013448667U, 4143996022U, 2948306858U, 4132920035U, 1283299272U, 995592228U,
+    3450508595U, 1027845759U, 1766942720U, 3861411826U, 1446861231U, 95974993U,
+    3502263554U, 1487532194U, 601502472U, 4129619129U, 250131773U, 2050079547U,
+    3198903947U, 3105589778U, 4066481316U, 3026383978U, 2276901713U, 365637751U,
+    2260718426U, 1394775634U, 1791172338U, 2690503163U, 2952737846U,
+    1568710462U, 732623190U, 2980358000U, 1053631832U, 1432426951U, 3229149635U,
+    1854113985U, 3719733532U, 3204031934U, 735775531U, 107468620U, 3734611984U,
+    631009402U, 3083622457U, 4109580626U, 159373458U, 1301970201U, 4132389302U,
+    1293255004U, 847182752U, 4170022737U, 96712900U, 2641406755U, 1381727755U,
+    405608287U, 4287919625U, 1703554290U, 3589580244U, 2911403488U, 2166565U,
+    2647306451U, 2330535117U, 1200815358U, 1165916754U, 245060911U, 4040679071U,
+    3684908771U, 2452834126U, 2486872773U, 2318678365U, 2940627908U,
+    1837837240U, 3447897409U, 4270484676U, 1495388728U, 3754288477U,
+    4204167884U, 1386977705U, 2692224733U, 3076249689U, 4109568048U,
+    4170955115U, 4167531356U, 4020189950U, 4261855038U, 3036907575U,
+    3410399885U, 3076395737U, 1046178638U, 144496770U, 230725846U, 3349637149U,
+    17065717U, 2809932048U, 2054581785U, 3608424964U, 3259628808U, 134897388U,
+    3743067463U, 257685904U, 3795656590U, 1562468719U, 3589103904U, 3120404710U,
+    254684547U, 2653661580U, 3663904795U, 2631942758U, 1063234347U, 2609732900U,
+    2332080715U, 3521125233U, 1180599599U, 1935868586U, 4110970440U, 296706371U,
+    2128666368U, 1319875791U, 1570900197U, 3096025483U, 1799882517U,
+    1928302007U, 1163707758U, 1244491489U, 3533770203U, 567496053U, 2757924305U,
+    2781639343U, 2818420107U, 560404889U, 2619609724U, 4176035430U, 2511289753U,
+    2521842019U, 3910553502U, 2926149387U, 3302078172U, 4237118867U, 330725126U,
+    367400677U, 888239854U, 545570454U, 4259590525U, 134343617U, 1102169784U,
+    1647463719U, 3260979784U, 1518840883U, 3631537963U, 3342671457U,
+    1301549147U, 2083739356U, 146593792U, 3217959080U, 652755743U, 2032187193U,
+    3898758414U, 1021358093U, 4037409230U, 2176407931U, 3427391950U,
+    2883553603U, 985613827U, 3105265092U, 3423168427U, 3387507672U, 467170288U,
+    2141266163U, 3723870208U, 916410914U, 1293987799U, 2652584950U, 769160137U,
+    3205292896U, 1561287359U, 1684510084U, 3136055621U, 3765171391U, 639683232U,
+    2639569327U, 1218546948U, 4263586685U, 3058215773U, 2352279820U, 401870217U,
+    2625822463U, 1529125296U, 2981801895U, 1191285226U, 4027725437U,
+    3432700217U, 4098835661U, 971182783U, 2443861173U, 3881457123U, 3874386651U,
+    457276199U, 2638294160U, 4002809368U, 421169044U, 1112642589U, 3076213779U,
+    3387033971U, 2499610950U, 3057240914U, 1662679783U, 461224431U,
+    1168395933U};
+static const uint32_t init_by_array_32_expected[] = {2920711183U, 3885745737U,
+    3501893680U, 856470934U, 1421864068U, 277361036U, 1518638004U, 2328404353U,
+    3355513634U, 64329189U, 1624587673U, 3508467182U, 2481792141U, 3706480799U,
+    1925859037U, 2913275699U, 882658412U, 384641219U, 422202002U, 1873384891U,
+    2006084383U, 3924929912U, 1636718106U, 3108838742U, 1245465724U,
+    4195470535U, 779207191U, 1577721373U, 1390469554U, 2928648150U, 121399709U,
+    3170839019U, 4044347501U, 953953814U, 3821710850U, 3085591323U, 3666535579U,
+    3577837737U, 2012008410U, 3565417471U, 4044408017U, 433600965U, 1637785608U,
+    1798509764U, 860770589U, 3081466273U, 3982393409U, 2451928325U, 3437124742U,
+    4093828739U, 3357389386U, 2154596123U, 496568176U, 2650035164U, 2472361850U,
+    3438299U, 2150366101U, 1577256676U, 3802546413U, 1787774626U, 4078331588U,
+    3706103141U, 170391138U, 3806085154U, 1680970100U, 1961637521U, 3316029766U,
+    890610272U, 1453751581U, 1430283664U, 3051057411U, 3597003186U, 542563954U,
+    3796490244U, 1690016688U, 3448752238U, 440702173U, 347290497U, 1121336647U,
+    2540588620U, 280881896U, 2495136428U, 213707396U, 15104824U, 2946180358U,
+    659000016U, 566379385U, 2614030979U, 2855760170U, 334526548U, 2315569495U,
+    2729518615U, 564745877U, 1263517638U, 3157185798U, 1604852056U, 1011639885U,
+    2950579535U, 2524219188U, 312951012U, 1528896652U, 1327861054U, 2846910138U,
+    3966855905U, 2536721582U, 855353911U, 1685434729U, 3303978929U, 1624872055U,
+    4020329649U, 3164802143U, 1642802700U, 1957727869U, 1792352426U,
+    3334618929U, 2631577923U, 3027156164U, 842334259U, 3353446843U, 1226432104U,
+    1742801369U, 3552852535U, 3471698828U, 1653910186U, 3380330939U,
+    2313782701U, 3351007196U, 2129839995U, 1800682418U, 4085884420U,
+    1625156629U, 3669701987U, 615211810U, 3294791649U, 4131143784U, 2590843588U,
+    3207422808U, 3275066464U, 561592872U, 3957205738U, 3396578098U, 48410678U,
+    3505556445U, 1005764855U, 3920606528U, 2936980473U, 2378918600U,
+    2404449845U, 1649515163U, 701203563U, 3705256349U, 83714199U, 3586854132U,
+    922978446U, 2863406304U, 3523398907U, 2606864832U, 2385399361U, 3171757816U,
+    4262841009U, 3645837721U, 1169579486U, 3666433897U, 3174689479U,
+    1457866976U, 3803895110U, 3346639145U, 1907224409U, 1978473712U,
+    1036712794U, 980754888U, 1302782359U, 1765252468U, 459245755U, 3728923860U,
+    1512894209U, 2046491914U, 207860527U, 514188684U, 2288713615U, 1597354672U,
+    3349636117U, 2357291114U, 3995796221U, 945364213U, 1893326518U, 3770814016U,
+    1691552714U, 2397527410U, 967486361U, 776416472U, 4197661421U, 951150819U,
+    1852770983U, 4044624181U, 1399439738U, 4194455275U, 2284037669U,
+    1550734958U, 3321078108U, 1865235926U, 2912129961U, 2664980877U,
+    1357572033U, 2600196436U, 2486728200U, 2372668724U, 1567316966U,
+    2374111491U, 1839843570U, 20815612U, 3727008608U, 3871996229U, 824061249U,
+    1932503978U, 3404541726U, 758428924U, 2609331364U, 1223966026U, 1299179808U,
+    648499352U, 2180134401U, 880821170U, 3781130950U, 113491270U, 1032413764U,
+    4185884695U, 2490396037U, 1201932817U, 4060951446U, 4165586898U,
+    1629813212U, 2887821158U, 415045333U, 628926856U, 2193466079U, 3391843445U,
+    2227540681U, 1907099846U, 2848448395U, 1717828221U, 1372704537U,
+    1707549841U, 2294058813U, 2101214437U, 2052479531U, 1695809164U,
+    3176587306U, 2632770465U, 81634404U, 1603220563U, 644238487U, 302857763U,
+    897352968U, 2613146653U, 1391730149U, 4245717312U, 4191828749U, 1948492526U,
+    2618174230U, 3992984522U, 2178852787U, 3596044509U, 3445573503U,
+    2026614616U, 915763564U, 3415689334U, 2532153403U, 3879661562U, 2215027417U,
+    3111154986U, 2929478371U, 668346391U, 1152241381U, 2632029711U, 3004150659U,
+    2135025926U, 948690501U, 2799119116U, 4228829406U, 1981197489U, 4209064138U,
+    684318751U, 3459397845U, 201790843U, 4022541136U, 3043635877U, 492509624U,
+    3263466772U, 1509148086U, 921459029U, 3198857146U, 705479721U, 3835966910U,
+    3603356465U, 576159741U, 1742849431U, 594214882U, 2055294343U, 3634861861U,
+    449571793U, 3246390646U, 3868232151U, 1479156585U, 2900125656U, 2464815318U,
+    3960178104U, 1784261920U, 18311476U, 3627135050U, 644609697U, 424968996U,
+    919890700U, 2986824110U, 816423214U, 4003562844U, 1392714305U, 1757384428U,
+    2569030598U, 995949559U, 3875659880U, 2933807823U, 2752536860U, 2993858466U,
+    4030558899U, 2770783427U, 2775406005U, 2777781742U, 1931292655U, 472147933U,
+    3865853827U, 2726470545U, 2668412860U, 2887008249U, 408979190U, 3578063323U,
+    3242082049U, 1778193530U, 27981909U, 2362826515U, 389875677U, 1043878156U,
+    581653903U, 3830568952U, 389535942U, 3713523185U, 2768373359U, 2526101582U,
+    1998618197U, 1160859704U, 3951172488U, 1098005003U, 906275699U, 3446228002U,
+    2220677963U, 2059306445U, 132199571U, 476838790U, 1868039399U, 3097344807U,
+    857300945U, 396345050U, 2835919916U, 1782168828U, 1419519470U, 4288137521U,
+    819087232U, 596301494U, 872823172U, 1526888217U, 805161465U, 1116186205U,
+    2829002754U, 2352620120U, 620121516U, 354159268U, 3601949785U, 209568138U,
+    1352371732U, 2145977349U, 4236871834U, 1539414078U, 3558126206U,
+    3224857093U, 4164166682U, 3817553440U, 3301780278U, 2682696837U,
+    3734994768U, 1370950260U, 1477421202U, 2521315749U, 1330148125U,
+    1261554731U, 2769143688U, 3554756293U, 4235882678U, 3254686059U,
+    3530579953U, 1215452615U, 3574970923U, 4057131421U, 589224178U, 1000098193U,
+    171190718U, 2521852045U, 2351447494U, 2284441580U, 2646685513U, 3486933563U,
+    3789864960U, 1190528160U, 1702536782U, 1534105589U, 4262946827U,
+    2726686826U, 3584544841U, 2348270128U, 2145092281U, 2502718509U,
+    1027832411U, 3571171153U, 1287361161U, 4011474411U, 3241215351U,
+    2419700818U, 971242709U, 1361975763U, 1096842482U, 3271045537U, 81165449U,
+    612438025U, 3912966678U, 1356929810U, 733545735U, 537003843U, 1282953084U,
+    884458241U, 588930090U, 3930269801U, 2961472450U, 1219535534U, 3632251943U,
+    268183903U, 1441240533U, 3653903360U, 3854473319U, 2259087390U, 2548293048U,
+    2022641195U, 2105543911U, 1764085217U, 3246183186U, 482438805U, 888317895U,
+    2628314765U, 2466219854U, 717546004U, 2322237039U, 416725234U, 1544049923U,
+    1797944973U, 3398652364U, 3111909456U, 485742908U, 2277491072U, 1056355088U,
+    3181001278U, 129695079U, 2693624550U, 1764438564U, 3797785470U, 195503713U,
+    3266519725U, 2053389444U, 1961527818U, 3400226523U, 3777903038U,
+    2597274307U, 4235851091U, 4094406648U, 2171410785U, 1781151386U,
+    1378577117U, 654643266U, 3424024173U, 3385813322U, 679385799U, 479380913U,
+    681715441U, 3096225905U, 276813409U, 3854398070U, 2721105350U, 831263315U,
+    3276280337U, 2628301522U, 3984868494U, 1466099834U, 2104922114U,
+    1412672743U, 820330404U, 3491501010U, 942735832U, 710652807U, 3972652090U,
+    679881088U, 40577009U, 3705286397U, 2815423480U, 3566262429U, 663396513U,
+    3777887429U, 4016670678U, 404539370U, 1142712925U, 1140173408U, 2913248352U,
+    2872321286U, 263751841U, 3175196073U, 3162557581U, 2878996619U, 75498548U,
+    3836833140U, 3284664959U, 1157523805U, 112847376U, 207855609U, 1337979698U,
+    1222578451U, 157107174U, 901174378U, 3883717063U, 1618632639U, 1767889440U,
+    4264698824U, 1582999313U, 884471997U, 2508825098U, 3756370771U, 2457213553U,
+    3565776881U, 3709583214U, 915609601U, 460833524U, 1091049576U, 85522880U,
+    2553251U, 132102809U, 2429882442U, 2562084610U, 1386507633U, 4112471229U,
+    21965213U, 1981516006U, 2418435617U, 3054872091U, 4251511224U, 2025783543U,
+    1916911512U, 2454491136U, 3938440891U, 3825869115U, 1121698605U,
+    3463052265U, 802340101U, 1912886800U, 4031997367U, 3550640406U, 1596096923U,
+    610150600U, 431464457U, 2541325046U, 486478003U, 739704936U, 2862696430U,
+    3037903166U, 1129749694U, 2611481261U, 1228993498U, 510075548U, 3424962587U,
+    2458689681U, 818934833U, 4233309125U, 1608196251U, 3419476016U, 1858543939U,
+    2682166524U, 3317854285U, 631986188U, 3008214764U, 613826412U, 3567358221U,
+    3512343882U, 1552467474U, 3316162670U, 1275841024U, 4142173454U, 565267881U,
+    768644821U, 198310105U, 2396688616U, 1837659011U, 203429334U, 854539004U,
+    4235811518U, 3338304926U, 3730418692U, 3852254981U, 3032046452U,
+    2329811860U, 2303590566U, 2696092212U, 3894665932U, 145835667U, 249563655U,
+    1932210840U, 2431696407U, 3312636759U, 214962629U, 2092026914U, 3020145527U,
+    4073039873U, 2739105705U, 1308336752U, 855104522U, 2391715321U, 67448785U,
+    547989482U, 854411802U, 3608633740U, 431731530U, 537375589U, 3888005760U,
+    696099141U, 397343236U, 1864511780U, 44029739U, 1729526891U, 1993398655U,
+    2010173426U, 2591546756U, 275223291U, 1503900299U, 4217765081U, 2185635252U,
+    1122436015U, 3550155364U, 681707194U, 3260479338U, 933579397U, 2983029282U,
+    2505504587U, 2667410393U, 2962684490U, 4139721708U, 2658172284U,
+    2452602383U, 2607631612U, 1344296217U, 3075398709U, 2949785295U,
+    1049956168U, 3917185129U, 2155660174U, 3280524475U, 1503827867U, 674380765U,
+    1918468193U, 3843983676U, 634358221U, 2538335643U, 1873351298U, 3368723763U,
+    2129144130U, 3203528633U, 3087174986U, 2691698871U, 2516284287U, 24437745U,
+    1118381474U, 2816314867U, 2448576035U, 4281989654U, 217287825U, 165872888U,
+    2628995722U, 3533525116U, 2721669106U, 872340568U, 3429930655U, 3309047304U,
+    3916704967U, 3270160355U, 1348884255U, 1634797670U, 881214967U, 4259633554U,
+    174613027U, 1103974314U, 1625224232U, 2678368291U, 1133866707U, 3853082619U,
+    4073196549U, 1189620777U, 637238656U, 930241537U, 4042750792U, 3842136042U,
+    2417007212U, 2524907510U, 1243036827U, 1282059441U, 3764588774U,
+    1394459615U, 2323620015U, 1166152231U, 3307479609U, 3849322257U,
+    3507445699U, 4247696636U, 758393720U, 967665141U, 1095244571U, 1319812152U,
+    407678762U, 2640605208U, 2170766134U, 3663594275U, 4039329364U, 2512175520U,
+    725523154U, 2249807004U, 3312617979U, 2414634172U, 1278482215U, 349206484U,
+    1573063308U, 1196429124U, 3873264116U, 2400067801U, 268795167U, 226175489U,
+    2961367263U, 1968719665U, 42656370U, 1010790699U, 561600615U, 2422453992U,
+    3082197735U, 1636700484U, 3977715296U, 3125350482U, 3478021514U,
+    2227819446U, 1540868045U, 3061908980U, 1087362407U, 3625200291U, 361937537U,
+    580441897U, 1520043666U, 2270875402U, 1009161260U, 2502355842U, 4278769785U,
+    473902412U, 1057239083U, 1905829039U, 1483781177U, 2080011417U, 1207494246U,
+    1806991954U, 2194674403U, 3455972205U, 807207678U, 3655655687U, 674112918U,
+    195425752U, 3917890095U, 1874364234U, 1837892715U, 3663478166U, 1548892014U,
+    2570748714U, 2049929836U, 2167029704U, 697543767U, 3499545023U, 3342496315U,
+    1725251190U, 3561387469U, 2905606616U, 1580182447U, 3934525927U,
+    4103172792U, 1365672522U, 1534795737U, 3308667416U, 2841911405U,
+    3943182730U, 4072020313U, 3494770452U, 3332626671U, 55327267U, 478030603U,
+    411080625U, 3419529010U, 1604767823U, 3513468014U, 570668510U, 913790824U,
+    2283967995U, 695159462U, 3825542932U, 4150698144U, 1829758699U, 202895590U,
+    1609122645U, 1267651008U, 2910315509U, 2511475445U, 2477423819U,
+    3932081579U, 900879979U, 2145588390U, 2670007504U, 580819444U, 1864996828U,
+    2526325979U, 1019124258U, 815508628U, 2765933989U, 1277301341U, 3006021786U,
+    855540956U, 288025710U, 1919594237U, 2331223864U, 177452412U, 2475870369U,
+    2689291749U, 865194284U, 253432152U, 2628531804U, 2861208555U, 2361597573U,
+    1653952120U, 1039661024U, 2159959078U, 3709040440U, 3564718533U,
+    2596878672U, 2041442161U, 31164696U, 2662962485U, 3665637339U, 1678115244U,
+    2699839832U, 3651968520U, 3521595541U, 458433303U, 2423096824U, 21831741U,
+    380011703U, 2498168716U, 861806087U, 1673574843U, 4188794405U, 2520563651U,
+    2632279153U, 2170465525U, 4171949898U, 3886039621U, 1661344005U,
+    3424285243U, 992588372U, 2500984144U, 2993248497U, 3590193895U, 1535327365U,
+    515645636U, 131633450U, 3729760261U, 1613045101U, 3254194278U, 15889678U,
+    1493590689U, 244148718U, 2991472662U, 1401629333U, 777349878U, 2501401703U,
+    4285518317U, 3794656178U, 955526526U, 3442142820U, 3970298374U, 736025417U,
+    2737370764U, 1271509744U, 440570731U, 136141826U, 1596189518U, 923399175U,
+    257541519U, 3505774281U, 2194358432U, 2518162991U, 1379893637U, 2667767062U,
+    3748146247U, 1821712620U, 3923161384U, 1947811444U, 2392527197U,
+    4127419685U, 1423694998U, 4156576871U, 1382885582U, 3420127279U,
+    3617499534U, 2994377493U, 4038063986U, 1918458672U, 2983166794U,
+    4200449033U, 353294540U, 1609232588U, 243926648U, 2332803291U, 507996832U,
+    2392838793U, 4075145196U, 2060984340U, 4287475136U, 88232602U, 2491531140U,
+    4159725633U, 2272075455U, 759298618U, 201384554U, 838356250U, 1416268324U,
+    674476934U, 90795364U, 141672229U, 3660399588U, 4196417251U, 3249270244U,
+    3774530247U, 59587265U, 3683164208U, 19392575U, 1463123697U, 1882205379U,
+    293780489U, 2553160622U, 2933904694U, 675638239U, 2851336944U, 1435238743U,
+    2448730183U, 804436302U, 2119845972U, 322560608U, 4097732704U, 2987802540U,
+    641492617U, 2575442710U, 4217822703U, 3271835300U, 2836418300U, 3739921620U,
+    2138378768U, 2879771855U, 4294903423U, 3121097946U, 2603440486U,
+    2560820391U, 1012930944U, 2313499967U, 584489368U, 3431165766U, 897384869U,
+    2062537737U, 2847889234U, 3742362450U, 2951174585U, 4204621084U,
+    1109373893U, 3668075775U, 2750138839U, 3518055702U, 733072558U, 4169325400U,
+    788493625U};
+static const uint64_t init_gen_rand_64_expected[] = {KQU(16924766246869039260),
+    KQU(8201438687333352714), KQU(2265290287015001750),
+    KQU(18397264611805473832), KQU(3375255223302384358),
+    KQU(6345559975416828796), KQU(18229739242790328073),
+    KQU(7596792742098800905), KQU(255338647169685981), KQU(2052747240048610300),
+    KQU(18328151576097299343), KQU(12472905421133796567),
+    KQU(11315245349717600863), KQU(16594110197775871209),
+    KQU(15708751964632456450), KQU(10452031272054632535),
+    KQU(11097646720811454386), KQU(4556090668445745441),
+    KQU(17116187693090663106), KQU(14931526836144510645),
+    KQU(9190752218020552591), KQU(9625800285771901401),
+    KQU(13995141077659972832), KQU(5194209094927829625),
+    KQU(4156788379151063303), KQU(8523452593770139494),
+    KQU(14082382103049296727), KQU(2462601863986088483),
+    KQU(3030583461592840678), KQU(5221622077872827681),
+    KQU(3084210671228981236), KQU(13956758381389953823),
+    KQU(13503889856213423831), KQU(15696904024189836170),
+    KQU(4612584152877036206), KQU(6231135538447867881),
+    KQU(10172457294158869468), KQU(6452258628466708150),
+    KQU(14044432824917330221), KQU(370168364480044279),
+    KQU(10102144686427193359), KQU(667870489994776076),
+    KQU(2732271956925885858), KQU(18027788905977284151),
+    KQU(15009842788582923859), KQU(7136357960180199542),
+    KQU(15901736243475578127), KQU(16951293785352615701),
+    KQU(10551492125243691632), KQU(17668869969146434804),
+    KQU(13646002971174390445), KQU(9804471050759613248),
+    KQU(5511670439655935493), KQU(18103342091070400926),
+    KQU(17224512747665137533), KQU(15534627482992618168),
+    KQU(1423813266186582647), KQU(15821176807932930024), KQU(30323369733607156),
+    KQU(11599382494723479403), KQU(653856076586810062),
+    KQU(3176437395144899659), KQU(14028076268147963917),
+    KQU(16156398271809666195), KQU(3166955484848201676),
+    KQU(5746805620136919390), KQU(17297845208891256593),
+    KQU(11691653183226428483), KQU(17900026146506981577),
+    KQU(15387382115755971042), KQU(16923567681040845943),
+    KQU(8039057517199388606), KQU(11748409241468629263),
+    KQU(794358245539076095), KQU(13438501964693401242),
+    KQU(14036803236515618962), KQU(5252311215205424721),
+    KQU(17806589612915509081), KQU(6802767092397596006),
+    KQU(14212120431184557140), KQU(1072951366761385712),
+    KQU(13098491780722836296), KQU(9466676828710797353),
+    KQU(12673056849042830081), KQU(12763726623645357580),
+    KQU(16468961652999309493), KQU(15305979875636438926),
+    KQU(17444713151223449734), KQU(5692214267627883674),
+    KQU(13049589139196151505), KQU(880115207831670745),
+    KQU(1776529075789695498), KQU(16695225897801466485),
+    KQU(10666901778795346845), KQU(6164389346722833869),
+    KQU(2863817793264300475), KQU(9464049921886304754),
+    KQU(3993566636740015468), KQU(9983749692528514136),
+    KQU(16375286075057755211), KQU(16042643417005440820),
+    KQU(11445419662923489877), KQU(7999038846885158836),
+    KQU(6721913661721511535), KQU(5363052654139357320),
+    KQU(1817788761173584205), KQU(13290974386445856444),
+    KQU(4650350818937984680), KQU(8219183528102484836),
+    KQU(1569862923500819899), KQU(4189359732136641860),
+    KQU(14202822961683148583), KQU(4457498315309429058),
+    KQU(13089067387019074834), KQU(11075517153328927293),
+    KQU(10277016248336668389), KQU(7070509725324401122),
+    KQU(17808892017780289380), KQU(13143367339909287349),
+    KQU(1377743745360085151), KQU(5749341807421286485),
+    KQU(14832814616770931325), KQU(7688820635324359492),
+    KQU(10960474011539770045), KQU(81970066653179790),
+    KQU(12619476072607878022), KQU(4419566616271201744),
+    KQU(15147917311750568503), KQU(5549739182852706345),
+    KQU(7308198397975204770), KQU(13580425496671289278),
+    KQU(17070764785210130301), KQU(8202832846285604405),
+    KQU(6873046287640887249), KQU(6927424434308206114),
+    KQU(6139014645937224874), KQU(10290373645978487639),
+    KQU(15904261291701523804), KQU(9628743442057826883),
+    KQU(18383429096255546714), KQU(4977413265753686967),
+    KQU(7714317492425012869), KQU(9025232586309926193),
+    KQU(14627338359776709107), KQU(14759849896467790763),
+    KQU(10931129435864423252), KQU(4588456988775014359),
+    KQU(10699388531797056724), KQU(468652268869238792),
+    KQU(5755943035328078086), KQU(2102437379988580216),
+    KQU(9986312786506674028), KQU(2654207180040945604),
+    KQU(8726634790559960062), KQU(100497234871808137), KQU(2800137176951425819),
+    KQU(6076627612918553487), KQU(5780186919186152796),
+    KQU(8179183595769929098), KQU(6009426283716221169),
+    KQU(2796662551397449358), KQU(1756961367041986764),
+    KQU(6972897917355606205), KQU(14524774345368968243),
+    KQU(2773529684745706940), KQU(4853632376213075959),
+    KQU(4198177923731358102), KQU(8271224913084139776),
+    KQU(2741753121611092226), KQU(16782366145996731181),
+    KQU(15426125238972640790), KQU(13595497100671260342),
+    KQU(3173531022836259898), KQU(6573264560319511662),
+    KQU(18041111951511157441), KQU(2351433581833135952),
+    KQU(3113255578908173487), KQU(1739371330877858784),
+    KQU(16046126562789165480), KQU(8072101652214192925),
+    KQU(15267091584090664910), KQU(9309579200403648940),
+    KQU(5218892439752408722), KQU(14492477246004337115),
+    KQU(17431037586679770619), KQU(7385248135963250480),
+    KQU(9580144956565560660), KQU(4919546228040008720),
+    KQU(15261542469145035584), KQU(18233297270822253102),
+    KQU(5453248417992302857), KQU(9309519155931460285),
+    KQU(10342813012345291756), KQU(15676085186784762381),
+    KQU(15912092950691300645), KQU(9371053121499003195),
+    KQU(9897186478226866746), KQU(14061858287188196327),
+    KQU(122575971620788119), KQU(12146750969116317754),
+    KQU(4438317272813245201), KQU(8332576791009527119),
+    KQU(13907785691786542057), KQU(10374194887283287467),
+    KQU(2098798755649059566), KQU(3416235197748288894),
+    KQU(8688269957320773484), KQU(7503964602397371571),
+    KQU(16724977015147478236), KQU(9461512855439858184),
+    KQU(13259049744534534727), KQU(3583094952542899294),
+    KQU(8764245731305528292), KQU(13240823595462088985),
+    KQU(13716141617617910448), KQU(18114969519935960955),
+    KQU(2297553615798302206), KQU(4585521442944663362),
+    KQU(17776858680630198686), KQU(4685873229192163363),
+    KQU(152558080671135627), KQU(15424900540842670088),
+    KQU(13229630297130024108), KQU(17530268788245718717),
+    KQU(16675633913065714144), KQU(3158912717897568068),
+    KQU(15399132185380087288), KQU(7401418744515677872),
+    KQU(13135412922344398535), KQU(6385314346100509511),
+    KQU(13962867001134161139), KQU(10272780155442671999),
+    KQU(12894856086597769142), KQU(13340877795287554994),
+    KQU(12913630602094607396), KQU(12543167911119793857),
+    KQU(17343570372251873096), KQU(10959487764494150545),
+    KQU(6966737953093821128), KQU(13780699135496988601),
+    KQU(4405070719380142046), KQU(14923788365607284982),
+    KQU(2869487678905148380), KQU(6416272754197188403),
+    KQU(15017380475943612591), KQU(1995636220918429487),
+    KQU(3402016804620122716), KQU(15800188663407057080),
+    KQU(11362369990390932882), KQU(15262183501637986147),
+    KQU(10239175385387371494), KQU(9352042420365748334),
+    KQU(1682457034285119875), KQU(1724710651376289644),
+    KQU(2038157098893817966), KQU(9897825558324608773),
+    KQU(1477666236519164736), KQU(16835397314511233640),
+    KQU(10370866327005346508), KQU(10157504370660621982),
+    KQU(12113904045335882069), KQU(13326444439742783008),
+    KQU(11302769043000765804), KQU(13594979923955228484),
+    KQU(11779351762613475968), KQU(3786101619539298383),
+    KQU(8021122969180846063), KQU(15745904401162500495),
+    KQU(10762168465993897267), KQU(13552058957896319026),
+    KQU(11200228655252462013), KQU(5035370357337441226),
+    KQU(7593918984545500013), KQU(5418554918361528700),
+    KQU(4858270799405446371), KQU(9974659566876282544),
+    KQU(18227595922273957859), KQU(2772778443635656220),
+    KQU(14285143053182085385), KQU(9939700992429600469),
+    KQU(12756185904545598068), KQU(2020783375367345262), KQU(57026775058331227),
+    KQU(950827867930065454), KQU(6602279670145371217), KQU(2291171535443566929),
+    KQU(5832380724425010313), KQU(1220343904715982285),
+    KQU(17045542598598037633), KQU(15460481779702820971),
+    KQU(13948388779949365130), KQU(13975040175430829518),
+    KQU(17477538238425541763), KQU(11104663041851745725),
+    KQU(15860992957141157587), KQU(14529434633012950138),
+    KQU(2504838019075394203), KQU(7512113882611121886),
+    KQU(4859973559980886617), KQU(1258601555703250219),
+    KQU(15594548157514316394), KQU(4516730171963773048),
+    KQU(11380103193905031983), KQU(6809282239982353344),
+    KQU(18045256930420065002), KQU(2453702683108791859),
+    KQU(977214582986981460), KQU(2006410402232713466), KQU(6192236267216378358),
+    KQU(3429468402195675253), KQU(18146933153017348921),
+    KQU(17369978576367231139), KQU(1246940717230386603),
+    KQU(11335758870083327110), KQU(14166488801730353682),
+    KQU(9008573127269635732), KQU(10776025389820643815),
+    KQU(15087605441903942962), KQU(1359542462712147922),
+    KQU(13898874411226454206), KQU(17911176066536804411),
+    KQU(9435590428600085274), KQU(294488509967864007), KQU(8890111397567922046),
+    KQU(7987823476034328778), KQU(13263827582440967651),
+    KQU(7503774813106751573), KQU(14974747296185646837),
+    KQU(8504765037032103375), KQU(17340303357444536213),
+    KQU(7704610912964485743), KQU(8107533670327205061),
+    KQU(9062969835083315985), KQU(16968963142126734184),
+    KQU(12958041214190810180), KQU(2720170147759570200),
+    KQU(2986358963942189566), KQU(14884226322219356580),
+    KQU(286224325144368520), KQU(11313800433154279797),
+    KQU(18366849528439673248), KQU(17899725929482368789),
+    KQU(3730004284609106799), KQU(1654474302052767205),
+    KQU(5006698007047077032), KQU(8196893913601182838),
+    KQU(15214541774425211640), KQU(17391346045606626073),
+    KQU(8369003584076969089), KQU(3939046733368550293),
+    KQU(10178639720308707785), KQU(2180248669304388697), KQU(62894391300126322),
+    KQU(9205708961736223191), KQU(6837431058165360438),
+    KQU(3150743890848308214), KQU(17849330658111464583),
+    KQU(12214815643135450865), KQU(13410713840519603402),
+    KQU(3200778126692046802), KQU(13354780043041779313),
+    KQU(800850022756886036), KQU(15660052933953067433),
+    KQU(6572823544154375676), KQU(11030281857015819266),
+    KQU(12682241941471433835), KQU(11654136407300274693),
+    KQU(4517795492388641109), KQU(9757017371504524244),
+    KQU(17833043400781889277), KQU(12685085201747792227),
+    KQU(10408057728835019573), KQU(98370418513455221), KQU(6732663555696848598),
+    KQU(13248530959948529780), KQU(3530441401230622826),
+    KQU(18188251992895660615), KQU(1847918354186383756),
+    KQU(1127392190402660921), KQU(11293734643143819463),
+    KQU(3015506344578682982), KQU(13852645444071153329),
+    KQU(2121359659091349142), KQU(1294604376116677694),
+    KQU(5616576231286352318), KQU(7112502442954235625),
+    KQU(11676228199551561689), KQU(12925182803007305359),
+    KQU(7852375518160493082), KQU(1136513130539296154),
+    KQU(5636923900916593195), KQU(3221077517612607747),
+    KQU(17784790465798152513), KQU(3554210049056995938),
+    KQU(17476839685878225874), KQU(3206836372585575732),
+    KQU(2765333945644823430), KQU(10080070903718799528),
+    KQU(5412370818878286353), KQU(9689685887726257728),
+    KQU(8236117509123533998), KQU(1951139137165040214),
+    KQU(4492205209227980349), KQU(16541291230861602967),
+    KQU(1424371548301437940), KQU(9117562079669206794),
+    KQU(14374681563251691625), KQU(13873164030199921303),
+    KQU(6680317946770936731), KQU(15586334026918276214),
+    KQU(10896213950976109802), KQU(9506261949596413689),
+    KQU(9903949574308040616), KQU(6038397344557204470), KQU(174601465422373648),
+    KQU(15946141191338238030), KQU(17142225620992044937),
+    KQU(7552030283784477064), KQU(2947372384532947997), KQU(510797021688197711),
+    KQU(4962499439249363461), KQU(23770320158385357), KQU(959774499105138124),
+    KQU(1468396011518788276), KQU(2015698006852312308),
+    KQU(4149400718489980136), KQU(5992916099522371188),
+    KQU(10819182935265531076), KQU(16189787999192351131),
+    KQU(342833961790261950), KQU(12470830319550495336),
+    KQU(18128495041912812501), KQU(1193600899723524337),
+    KQU(9056793666590079770), KQU(2154021227041669041),
+    KQU(4963570213951235735), KQU(4865075960209211409),
+    KQU(2097724599039942963), KQU(2024080278583179845),
+    KQU(11527054549196576736), KQU(10650256084182390252),
+    KQU(4808408648695766755), KQU(1642839215013788844),
+    KQU(10607187948250398390), KQU(7076868166085913508),
+    KQU(730522571106887032), KQU(12500579240208524895),
+    KQU(4484390097311355324), KQU(15145801330700623870),
+    KQU(8055827661392944028), KQU(5865092976832712268),
+    KQU(15159212508053625143), KQU(3560964582876483341),
+    KQU(4070052741344438280), KQU(6032585709886855634),
+    KQU(15643262320904604873), KQU(2565119772293371111),
+    KQU(318314293065348260), KQU(15047458749141511872),
+    KQU(7772788389811528730), KQU(7081187494343801976),
+    KQU(6465136009467253947), KQU(10425940692543362069),
+    KQU(554608190318339115), KQU(14796699860302125214),
+    KQU(1638153134431111443), KQU(10336967447052276248),
+    KQU(8412308070396592958), KQU(4004557277152051226),
+    KQU(8143598997278774834), KQU(16413323996508783221),
+    KQU(13139418758033994949), KQU(9772709138335006667),
+    KQU(2818167159287157659), KQU(17091740573832523669),
+    KQU(14629199013130751608), KQU(18268322711500338185),
+    KQU(8290963415675493063), KQU(8830864907452542588),
+    KQU(1614839084637494849), KQU(14855358500870422231),
+    KQU(3472996748392519937), KQU(15317151166268877716),
+    KQU(5825895018698400362), KQU(16730208429367544129),
+    KQU(10481156578141202800), KQU(4746166512382823750),
+    KQU(12720876014472464998), KQU(8825177124486735972),
+    KQU(13733447296837467838), KQU(6412293741681359625),
+    KQU(8313213138756135033), KQU(11421481194803712517),
+    KQU(7997007691544174032), KQU(6812963847917605930),
+    KQU(9683091901227558641), KQU(14703594165860324713),
+    KQU(1775476144519618309), KQU(2724283288516469519), KQU(717642555185856868),
+    KQU(8736402192215092346), KQU(11878800336431381021),
+    KQU(4348816066017061293), KQU(6115112756583631307),
+    KQU(9176597239667142976), KQU(12615622714894259204),
+    KQU(10283406711301385987), KQU(5111762509485379420),
+    KQU(3118290051198688449), KQU(7345123071632232145),
+    KQU(9176423451688682359), KQU(4843865456157868971),
+    KQU(12008036363752566088), KQU(12058837181919397720),
+    KQU(2145073958457347366), KQU(1526504881672818067),
+    KQU(3488830105567134848), KQU(13208362960674805143),
+    KQU(4077549672899572192), KQU(7770995684693818365),
+    KQU(1398532341546313593), KQU(12711859908703927840),
+    KQU(1417561172594446813), KQU(17045191024194170604),
+    KQU(4101933177604931713), KQU(14708428834203480320),
+    KQU(17447509264469407724), KQU(14314821973983434255),
+    KQU(17990472271061617265), KQU(5087756685841673942),
+    KQU(12797820586893859939), KQU(1778128952671092879),
+    KQU(3535918530508665898), KQU(9035729701042481301),
+    KQU(14808661568277079962), KQU(14587345077537747914),
+    KQU(11920080002323122708), KQU(6426515805197278753),
+    KQU(3295612216725984831), KQU(11040722532100876120),
+    KQU(12305952936387598754), KQU(16097391899742004253),
+    KQU(4908537335606182208), KQU(12446674552196795504),
+    KQU(16010497855816895177), KQU(9194378874788615551),
+    KQU(3382957529567613384), KQU(5154647600754974077),
+    KQU(9801822865328396141), KQU(9023662173919288143),
+    KQU(17623115353825147868), KQU(8238115767443015816),
+    KQU(15811444159859002560), KQU(9085612528904059661),
+    KQU(6888601089398614254), KQU(258252992894160189), KQU(6704363880792428622),
+    KQU(6114966032147235763), KQU(11075393882690261875),
+    KQU(8797664238933620407), KQU(5901892006476726920),
+    KQU(5309780159285518958), KQU(14940808387240817367),
+    KQU(14642032021449656698), KQU(9808256672068504139),
+    KQU(3670135111380607658), KQU(11211211097845960152),
+    KQU(1474304506716695808), KQU(15843166204506876239),
+    KQU(7661051252471780561), KQU(10170905502249418476),
+    KQU(7801416045582028589), KQU(2763981484737053050),
+    KQU(9491377905499253054), KQU(16201395896336915095),
+    KQU(9256513756442782198), KQU(5411283157972456034),
+    KQU(5059433122288321676), KQU(4327408006721123357),
+    KQU(9278544078834433377), KQU(7601527110882281612),
+    KQU(11848295896975505251), KQU(12096998801094735560),
+    KQU(14773480339823506413), KQU(15586227433895802149),
+    KQU(12786541257830242872), KQU(6904692985140503067),
+    KQU(5309011515263103959), KQU(12105257191179371066),
+    KQU(14654380212442225037), KQU(2556774974190695009),
+    KQU(4461297399927600261), KQU(14888225660915118646),
+    KQU(14915459341148291824), KQU(2738802166252327631),
+    KQU(6047155789239131512), KQU(12920545353217010338),
+    KQU(10697617257007840205), KQU(2751585253158203504),
+    KQU(13252729159780047496), KQU(14700326134672815469),
+    KQU(14082527904374600529), KQU(16852962273496542070),
+    KQU(17446675504235853907), KQU(15019600398527572311),
+    KQU(12312781346344081551), KQU(14524667935039810450),
+    KQU(5634005663377195738), KQU(11375574739525000569),
+    KQU(2423665396433260040), KQU(5222836914796015410),
+    KQU(4397666386492647387), KQU(4619294441691707638), KQU(665088602354770716),
+    KQU(13246495665281593610), KQU(6564144270549729409),
+    KQU(10223216188145661688), KQU(3961556907299230585),
+    KQU(11543262515492439914), KQU(16118031437285993790),
+    KQU(7143417964520166465), KQU(13295053515909486772), KQU(40434666004899675),
+    KQU(17127804194038347164), KQU(8599165966560586269),
+    KQU(8214016749011284903), KQU(13725130352140465239),
+    KQU(5467254474431726291), KQU(7748584297438219877),
+    KQU(16933551114829772472), KQU(2169618439506799400),
+    KQU(2169787627665113463), KQU(17314493571267943764),
+    KQU(18053575102911354912), KQU(11928303275378476973),
+    KQU(11593850925061715550), KQU(17782269923473589362),
+    KQU(3280235307704747039), KQU(6145343578598685149),
+    KQU(17080117031114086090), KQU(18066839902983594755),
+    KQU(6517508430331020706), KQU(8092908893950411541),
+    KQU(12558378233386153732), KQU(4476532167973132976),
+    KQU(16081642430367025016), KQU(4233154094369139361),
+    KQU(8693630486693161027), KQU(11244959343027742285),
+    KQU(12273503967768513508), KQU(14108978636385284876),
+    KQU(7242414665378826984), KQU(6561316938846562432),
+    KQU(8601038474994665795), KQU(17532942353612365904),
+    KQU(17940076637020912186), KQU(7340260368823171304),
+    KQU(7061807613916067905), KQU(10561734935039519326),
+    KQU(17990796503724650862), KQU(6208732943911827159),
+    KQU(359077562804090617), KQU(14177751537784403113),
+    KQU(10659599444915362902), KQU(15081727220615085833),
+    KQU(13417573895659757486), KQU(15513842342017811524),
+    KQU(11814141516204288231), KQU(1827312513875101814),
+    KQU(2804611699894603103), KQU(17116500469975602763),
+    KQU(12270191815211952087), KQU(12256358467786024988),
+    KQU(18435021722453971267), KQU(671330264390865618), KQU(476504300460286050),
+    KQU(16465470901027093441), KQU(4047724406247136402),
+    KQU(1322305451411883346), KQU(1388308688834322280),
+    KQU(7303989085269758176), KQU(9323792664765233642),
+    KQU(4542762575316368936), KQU(17342696132794337618),
+    KQU(4588025054768498379), KQU(13415475057390330804),
+    KQU(17880279491733405570), KQU(10610553400618620353),
+    KQU(3180842072658960139), KQU(13002966655454270120),
+    KQU(1665301181064982826), KQU(7083673946791258979), KQU(190522247122496820),
+    KQU(17388280237250677740), KQU(8430770379923642945),
+    KQU(12987180971921668584), KQU(2311086108365390642),
+    KQU(2870984383579822345), KQU(14014682609164653318),
+    KQU(14467187293062251484), KQU(192186361147413298),
+    KQU(15171951713531796524), KQU(9900305495015948728),
+    KQU(17958004775615466344), KQU(14346380954498606514),
+    KQU(18040047357617407096), KQU(5035237584833424532),
+    KQU(15089555460613972287), KQU(4131411873749729831),
+    KQU(1329013581168250330), KQU(10095353333051193949),
+    KQU(10749518561022462716), KQU(9050611429810755847),
+    KQU(15022028840236655649), KQU(8775554279239748298),
+    KQU(13105754025489230502), KQU(15471300118574167585),
+    KQU(89864764002355628), KQU(8776416323420466637), KQU(5280258630612040891),
+    KQU(2719174488591862912), KQU(7599309137399661994),
+    KQU(15012887256778039979), KQU(14062981725630928925),
+    KQU(12038536286991689603), KQU(7089756544681775245),
+    KQU(10376661532744718039), KQU(1265198725901533130),
+    KQU(13807996727081142408), KQU(2935019626765036403),
+    KQU(7651672460680700141), KQU(3644093016200370795),
+    KQU(2840982578090080674), KQU(17956262740157449201),
+    KQU(18267979450492880548), KQU(11799503659796848070),
+    KQU(9942537025669672388), KQU(11886606816406990297),
+    KQU(5488594946437447576), KQU(7226714353282744302),
+    KQU(3784851653123877043), KQU(878018453244803041),
+    KQU(12110022586268616085), KQU(734072179404675123),
+    KQU(11869573627998248542), KQU(469150421297783998), KQU(260151124912803804),
+    KQU(11639179410120968649), KQU(9318165193840846253),
+    KQU(12795671722734758075), KQU(15318410297267253933),
+    KQU(691524703570062620), KQU(5837129010576994601),
+    KQU(15045963859726941052), KQU(5850056944932238169),
+    KQU(12017434144750943807), KQU(7447139064928956574),
+    KQU(3101711812658245019), KQU(16052940704474982954),
+    KQU(18195745945986994042), KQU(8932252132785575659),
+    KQU(13390817488106794834), KQU(11582771836502517453),
+    KQU(4964411326683611686), KQU(2195093981702694011),
+    KQU(14145229538389675669), KQU(16459605532062271798),
+    KQU(866316924816482864), KQU(4593041209937286377), KQU(8415491391910972138),
+    KQU(4171236715600528969), KQU(16637569303336782889),
+    KQU(2002011073439212680), KQU(17695124661097601411),
+    KQU(4627687053598611702), KQU(7895831936020190403),
+    KQU(8455951300917267802), KQU(2923861649108534854),
+    KQU(8344557563927786255), KQU(6408671940373352556),
+    KQU(12210227354536675772), KQU(14294804157294222295),
+    KQU(10103022425071085127), KQU(10092959489504123771),
+    KQU(6554774405376736268), KQU(12629917718410641774),
+    KQU(6260933257596067126), KQU(2460827021439369673),
+    KQU(2541962996717103668), KQU(597377203127351475), KQU(5316984203117315309),
+    KQU(4811211393563241961), KQU(13119698597255811641),
+    KQU(8048691512862388981), KQU(10216818971194073842),
+    KQU(4612229970165291764), KQU(10000980798419974770),
+    KQU(6877640812402540687), KQU(1488727563290436992),
+    KQU(2227774069895697318), KQU(11237754507523316593),
+    KQU(13478948605382290972), KQU(1963583846976858124),
+    KQU(5512309205269276457), KQU(3972770164717652347),
+    KQU(3841751276198975037), KQU(10283343042181903117),
+    KQU(8564001259792872199), KQU(16472187244722489221),
+    KQU(8953493499268945921), KQU(3518747340357279580),
+    KQU(4003157546223963073), KQU(3270305958289814590),
+    KQU(3966704458129482496), KQU(8122141865926661939),
+    KQU(14627734748099506653), KQU(13064426990862560568),
+    KQU(2414079187889870829), KQU(5378461209354225306),
+    KQU(10841985740128255566), KQU(538582442885401738),
+    KQU(7535089183482905946), KQU(16117559957598879095),
+    KQU(8477890721414539741), KQU(1459127491209533386),
+    KQU(17035126360733620462), KQU(8517668552872379126),
+    KQU(10292151468337355014), KQU(17081267732745344157),
+    KQU(13751455337946087178), KQU(14026945459523832966),
+    KQU(6653278775061723516), KQU(10619085543856390441),
+    KQU(2196343631481122885), KQU(10045966074702826136),
+    KQU(10082317330452718282), KQU(5920859259504831242),
+    KQU(9951879073426540617), KQU(7074696649151414158),
+    KQU(15808193543879464318), KQU(7385247772746953374),
+    KQU(3192003544283864292), KQU(18153684490917593847),
+    KQU(12423498260668568905), KQU(10957758099756378169),
+    KQU(11488762179911016040), KQU(2099931186465333782),
+    KQU(11180979581250294432), KQU(8098916250668367933),
+    KQU(3529200436790763465), KQU(12988418908674681745),
+    KQU(6147567275954808580), KQU(3207503344604030989),
+    KQU(10761592604898615360), KQU(229854861031893504),
+    KQU(8809853962667144291), KQU(13957364469005693860),
+    KQU(7634287665224495886), KQU(12353487366976556874),
+    KQU(1134423796317152034), KQU(2088992471334107068),
+    KQU(7393372127190799698), KQU(1845367839871058391), KQU(207922563987322884),
+    KQU(11960870813159944976), KQU(12182120053317317363),
+    KQU(17307358132571709283), KQU(13871081155552824936),
+    KQU(18304446751741566262), KQU(7178705220184302849),
+    KQU(10929605677758824425), KQU(16446976977835806844),
+    KQU(13723874412159769044), KQU(6942854352100915216),
+    KQU(1726308474365729390), KQU(2150078766445323155),
+    KQU(15345558947919656626), KQU(12145453828874527201),
+    KQU(2054448620739726849), KQU(2740102003352628137),
+    KQU(11294462163577610655), KQU(756164283387413743),
+    KQU(17841144758438810880), KQU(10802406021185415861),
+    KQU(8716455530476737846), KQU(6321788834517649606),
+    KQU(14681322910577468426), KQU(17330043563884336387),
+    KQU(12701802180050071614), KQU(14695105111079727151),
+    KQU(5112098511654172830), KQU(4957505496794139973),
+    KQU(8270979451952045982), KQU(12307685939199120969),
+    KQU(12425799408953443032), KQU(8376410143634796588),
+    KQU(16621778679680060464), KQU(3580497854566660073),
+    KQU(1122515747803382416), KQU(857664980960597599), KQU(6343640119895925918),
+    KQU(12878473260854462891), KQU(10036813920765722626),
+    KQU(14451335468363173812), KQU(5476809692401102807),
+    KQU(16442255173514366342), KQU(13060203194757167104),
+    KQU(14354124071243177715), KQU(15961249405696125227),
+    KQU(13703893649690872584), KQU(363907326340340064),
+    KQU(6247455540491754842), KQU(12242249332757832361),
+    KQU(156065475679796717), KQU(9351116235749732355), KQU(4590350628677701405),
+    KQU(1671195940982350389), KQU(13501398458898451905),
+    KQU(6526341991225002255), KQU(1689782913778157592),
+    KQU(7439222350869010334), KQU(13975150263226478308),
+    KQU(11411961169932682710), KQU(17204271834833847277),
+    KQU(541534742544435367), KQU(6591191931218949684), KQU(2645454775478232486),
+    KQU(4322857481256485321), KQU(8477416487553065110),
+    KQU(12902505428548435048), KQU(971445777981341415),
+    KQU(14995104682744976712), KQU(4243341648807158063),
+    KQU(8695061252721927661), KQU(5028202003270177222),
+    KQU(2289257340915567840), KQU(13870416345121866007),
+    KQU(13994481698072092233), KQU(6912785400753196481),
+    KQU(2278309315841980139), KQU(4329765449648304839),
+    KQU(5963108095785485298), KQU(4880024847478722478),
+    KQU(16015608779890240947), KQU(1866679034261393544),
+    KQU(914821179919731519), KQU(9643404035648760131), KQU(2418114953615593915),
+    KQU(944756836073702374), KQU(15186388048737296834),
+    KQU(7723355336128442206), KQU(7500747479679599691),
+    KQU(18013961306453293634), KQU(2315274808095756456),
+    KQU(13655308255424029566), KQU(17203800273561677098),
+    KQU(1382158694422087756), KQU(5090390250309588976), KQU(517170818384213989),
+    KQU(1612709252627729621), KQU(1330118955572449606), KQU(300922478056709885),
+    KQU(18115693291289091987), KQU(13491407109725238321),
+    KQU(15293714633593827320), KQU(5151539373053314504),
+    KQU(5951523243743139207), KQU(14459112015249527975),
+    KQU(5456113959000700739), KQU(3877918438464873016),
+    KQU(12534071654260163555), KQU(15871678376893555041),
+    KQU(11005484805712025549), KQU(16353066973143374252),
+    KQU(4358331472063256685), KQU(8268349332210859288),
+    KQU(12485161590939658075), KQU(13955993592854471343),
+    KQU(5911446886848367039), KQU(14925834086813706974),
+    KQU(6590362597857994805), KQU(1280544923533661875),
+    KQU(1637756018947988164), KQU(4734090064512686329),
+    KQU(16693705263131485912), KQU(6834882340494360958),
+    KQU(8120732176159658505), KQU(2244371958905329346),
+    KQU(10447499707729734021), KQU(7318742361446942194),
+    KQU(8032857516355555296), KQU(14023605983059313116),
+    KQU(1032336061815461376), KQU(9840995337876562612),
+    KQU(9869256223029203587), KQU(12227975697177267636),
+    KQU(12728115115844186033), KQU(7752058479783205470),
+    KQU(729733219713393087), KQU(12954017801239007622)};
+static const uint64_t init_by_array_64_expected[] = {KQU(2100341266307895239),
+    KQU(8344256300489757943), KQU(15687933285484243894),
+    KQU(8268620370277076319), KQU(12371852309826545459),
+    KQU(8800491541730110238), KQU(18113268950100835773),
+    KQU(2886823658884438119), KQU(3293667307248180724),
+    KQU(9307928143300172731), KQU(7688082017574293629), KQU(900986224735166665),
+    KQU(9977972710722265039), KQU(6008205004994830552), KQU(546909104521689292),
+    KQU(7428471521869107594), KQU(14777563419314721179),
+    KQU(16116143076567350053), KQU(5322685342003142329),
+    KQU(4200427048445863473), KQU(4693092150132559146),
+    KQU(13671425863759338582), KQU(6747117460737639916),
+    KQU(4732666080236551150), KQU(5912839950611941263),
+    KQU(3903717554504704909), KQU(2615667650256786818),
+    KQU(10844129913887006352), KQU(13786467861810997820),
+    KQU(14267853002994021570), KQU(13767807302847237439),
+    KQU(16407963253707224617), KQU(4802498363698583497),
+    KQU(2523802839317209764), KQU(3822579397797475589),
+    KQU(8950320572212130610), KQU(3745623504978342534),
+    KQU(16092609066068482806), KQU(9817016950274642398),
+    KQU(10591660660323829098), KQU(11751606650792815920),
+    KQU(5122873818577122211), KQU(17209553764913936624),
+    KQU(6249057709284380343), KQU(15088791264695071830),
+    KQU(15344673071709851930), KQU(4345751415293646084),
+    KQU(2542865750703067928), KQU(13520525127852368784),
+    KQU(18294188662880997241), KQU(3871781938044881523),
+    KQU(2873487268122812184), KQU(15099676759482679005),
+    KQU(15442599127239350490), KQU(6311893274367710888),
+    KQU(3286118760484672933), KQU(4146067961333542189),
+    KQU(13303942567897208770), KQU(8196013722255630418),
+    KQU(4437815439340979989), KQU(15433791533450605135),
+    KQU(4254828956815687049), KQU(1310903207708286015),
+    KQU(10529182764462398549), KQU(14900231311660638810),
+    KQU(9727017277104609793), KQU(1821308310948199033),
+    KQU(11628861435066772084), KQU(9469019138491546924),
+    KQU(3145812670532604988), KQU(9938468915045491919),
+    KQU(1562447430672662142), KQU(13963995266697989134),
+    KQU(3356884357625028695), KQU(4499850304584309747),
+    KQU(8456825817023658122), KQU(10859039922814285279),
+    KQU(8099512337972526555), KQU(348006375109672149),
+    KQU(11919893998241688603), KQU(1104199577402948826),
+    KQU(16689191854356060289), KQU(10992552041730168078),
+    KQU(7243733172705465836), KQU(5668075606180319560),
+    KQU(18182847037333286970), KQU(4290215357664631322),
+    KQU(4061414220791828613), KQU(13006291061652989604),
+    KQU(7140491178917128798), KQU(12703446217663283481),
+    KQU(5500220597564558267), KQU(10330551509971296358),
+    KQU(15958554768648714492), KQU(5174555954515360045),
+    KQU(1731318837687577735), KQU(3557700801048354857),
+    KQU(13764012341928616198), KQU(13115166194379119043),
+    KQU(7989321021560255519), KQU(2103584280905877040),
+    KQU(9230788662155228488), KQU(16396629323325547654),
+    KQU(657926409811318051), KQU(15046700264391400727),
+    KQU(5120132858771880830), KQU(7934160097989028561),
+    KQU(6963121488531976245), KQU(17412329602621742089),
+    KQU(15144843053931774092), KQU(17204176651763054532),
+    KQU(13166595387554065870), KQU(8590377810513960213),
+    KQU(5834365135373991938), KQU(7640913007182226243),
+    KQU(3479394703859418425), KQU(16402784452644521040),
+    KQU(4993979809687083980), KQU(13254522168097688865),
+    KQU(15643659095244365219), KQU(5881437660538424982),
+    KQU(11174892200618987379), KQU(254409966159711077),
+    KQU(17158413043140549909), KQU(3638048789290376272),
+    KQU(1376816930299489190), KQU(4622462095217761923),
+    KQU(15086407973010263515), KQU(13253971772784692238),
+    KQU(5270549043541649236), KQU(11182714186805411604),
+    KQU(12283846437495577140), KQU(5297647149908953219),
+    KQU(10047451738316836654), KQU(4938228100367874746),
+    KQU(12328523025304077923), KQU(3601049438595312361),
+    KQU(9313624118352733770), KQU(13322966086117661798),
+    KQU(16660005705644029394), KQU(11337677526988872373),
+    KQU(13869299102574417795), KQU(15642043183045645437),
+    KQU(3021755569085880019), KQU(4979741767761188161),
+    KQU(13679979092079279587), KQU(3344685842861071743),
+    KQU(13947960059899588104), KQU(305806934293368007),
+    KQU(5749173929201650029), KQU(11123724852118844098),
+    KQU(15128987688788879802), KQU(15251651211024665009),
+    KQU(7689925933816577776), KQU(16732804392695859449),
+    KQU(17087345401014078468), KQU(14315108589159048871),
+    KQU(4820700266619778917), KQU(16709637539357958441),
+    KQU(4936227875177351374), KQU(2137907697912987247),
+    KQU(11628565601408395420), KQU(2333250549241556786),
+    KQU(5711200379577778637), KQU(5170680131529031729),
+    KQU(12620392043061335164), KQU(95363390101096078), KQU(5487981914081709462),
+    KQU(1763109823981838620), KQU(3395861271473224396),
+    KQU(1300496844282213595), KQU(6894316212820232902),
+    KQU(10673859651135576674), KQU(5911839658857903252),
+    KQU(17407110743387299102), KQU(8257427154623140385),
+    KQU(11389003026741800267), KQU(4070043211095013717),
+    KQU(11663806997145259025), KQU(15265598950648798210),
+    KQU(630585789434030934), KQU(3524446529213587334), KQU(7186424168495184211),
+    KQU(10806585451386379021), KQU(11120017753500499273),
+    KQU(1586837651387701301), KQU(17530454400954415544),
+    KQU(9991670045077880430), KQU(7550997268990730180),
+    KQU(8640249196597379304), KQU(3522203892786893823),
+    KQU(10401116549878854788), KQU(13690285544733124852),
+    KQU(8295785675455774586), KQU(15535716172155117603),
+    KQU(3112108583723722511), KQU(17633179955339271113),
+    KQU(18154208056063759375), KQU(1866409236285815666),
+    KQU(13326075895396412882), KQU(8756261842948020025),
+    KQU(6281852999868439131), KQU(15087653361275292858),
+    KQU(10333923911152949397), KQU(5265567645757408500),
+    KQU(12728041843210352184), KQU(6347959327507828759),
+    KQU(154112802625564758), KQU(18235228308679780218),
+    KQU(3253805274673352418), KQU(4849171610689031197),
+    KQU(17948529398340432518), KQU(13803510475637409167),
+    KQU(13506570190409883095), KQU(15870801273282960805),
+    KQU(8451286481299170773), KQU(9562190620034457541),
+    KQU(8518905387449138364), KQU(12681306401363385655),
+    KQU(3788073690559762558), KQU(5256820289573487769),
+    KQU(2752021372314875467), KQU(6354035166862520716),
+    KQU(4328956378309739069), KQU(449087441228269600), KQU(5533508742653090868),
+    KQU(1260389420404746988), KQU(18175394473289055097),
+    KQU(1535467109660399420), KQU(8818894282874061442),
+    KQU(12140873243824811213), KQU(15031386653823014946),
+    KQU(1286028221456149232), KQU(6329608889367858784),
+    KQU(9419654354945132725), KQU(6094576547061672379),
+    KQU(17706217251847450255), KQU(1733495073065878126),
+    KQU(16918923754607552663), KQU(8881949849954945044),
+    KQU(12938977706896313891), KQU(14043628638299793407),
+    KQU(18393874581723718233), KQU(6886318534846892044),
+    KQU(14577870878038334081), KQU(13541558383439414119),
+    KQU(13570472158807588273), KQU(18300760537910283361),
+    KQU(818368572800609205), KQU(1417000585112573219),
+    KQU(12337533143867683655), KQU(12433180994702314480),
+    KQU(778190005829189083), KQU(13667356216206524711),
+    KQU(9866149895295225230), KQU(11043240490417111999),
+    KQU(1123933826541378598), KQU(6469631933605123610),
+    KQU(14508554074431980040), KQU(13918931242962026714),
+    KQU(2870785929342348285), KQU(14786362626740736974),
+    KQU(13176680060902695786), KQU(9591778613541679456),
+    KQU(9097662885117436706), KQU(749262234240924947), KQU(1944844067793307093),
+    KQU(4339214904577487742), KQU(8009584152961946551),
+    KQU(16073159501225501777), KQU(3335870590499306217),
+    KQU(17088312653151202847), KQU(3108893142681931848),
+    KQU(16636841767202792021), KQU(10423316431118400637),
+    KQU(8008357368674443506), KQU(11340015231914677875),
+    KQU(17687896501594936090), KQU(15173627921763199958),
+    KQU(542569482243721959), KQU(15071714982769812975),
+    KQU(4466624872151386956), KQU(1901780715602332461),
+    KQU(9822227742154351098), KQU(1479332892928648780),
+    KQU(6981611948382474400), KQU(7620824924456077376),
+    KQU(14095973329429406782), KQU(7902744005696185404),
+    KQU(15830577219375036920), KQU(10287076667317764416),
+    KQU(12334872764071724025), KQU(4419302088133544331),
+    KQU(14455842851266090520), KQU(12488077416504654222),
+    KQU(7953892017701886766), KQU(6331484925529519007),
+    KQU(4902145853785030022), KQU(17010159216096443073),
+    KQU(11945354668653886087), KQU(15112022728645230829),
+    KQU(17363484484522986742), KQU(4423497825896692887),
+    KQU(8155489510809067471), KQU(258966605622576285), KQU(5462958075742020534),
+    KQU(6763710214913276228), KQU(2368935183451109054),
+    KQU(14209506165246453811), KQU(2646257040978514881),
+    KQU(3776001911922207672), KQU(1419304601390147631),
+    KQU(14987366598022458284), KQU(3977770701065815721),
+    KQU(730820417451838898), KQU(3982991703612885327), KQU(2803544519671388477),
+    KQU(17067667221114424649), KQU(2922555119737867166),
+    KQU(1989477584121460932), KQU(15020387605892337354),
+    KQU(9293277796427533547), KQU(10722181424063557247),
+    KQU(16704542332047511651), KQU(5008286236142089514),
+    KQU(16174732308747382540), KQU(17597019485798338402),
+    KQU(13081745199110622093), KQU(8850305883842258115),
+    KQU(12723629125624589005), KQU(8140566453402805978),
+    KQU(15356684607680935061), KQU(14222190387342648650),
+    KQU(11134610460665975178), KQU(1259799058620984266),
+    KQU(13281656268025610041), KQU(298262561068153992),
+    KQU(12277871700239212922), KQU(13911297774719779438),
+    KQU(16556727962761474934), KQU(17903010316654728010),
+    KQU(9682617699648434744), KQU(14757681836838592850),
+    KQU(1327242446558524473), KQU(11126645098780572792),
+    KQU(1883602329313221774), KQU(2543897783922776873),
+    KQU(15029168513767772842), KQU(12710270651039129878),
+    KQU(16118202956069604504), KQU(15010759372168680524),
+    KQU(2296827082251923948), KQU(10793729742623518101),
+    KQU(13829764151845413046), KQU(17769301223184451213),
+    KQU(3118268169210783372), KQU(17626204544105123127),
+    KQU(7416718488974352644), KQU(10450751996212925994),
+    KQU(9352529519128770586), KQU(259347569641110140), KQU(8048588892269692697),
+    KQU(1774414152306494058), KQU(10669548347214355622),
+    KQU(13061992253816795081), KQU(18432677803063861659),
+    KQU(8879191055593984333), KQU(12433753195199268041),
+    KQU(14919392415439730602), KQU(6612848378595332963),
+    KQU(6320986812036143628), KQU(10465592420226092859),
+    KQU(4196009278962570808), KQU(3747816564473572224),
+    KQU(17941203486133732898), KQU(2350310037040505198),
+    KQU(5811779859134370113), KQU(10492109599506195126),
+    KQU(7699650690179541274), KQU(1954338494306022961),
+    KQU(14095816969027231152), KQU(5841346919964852061),
+    KQU(14945969510148214735), KQU(3680200305887550992),
+    KQU(6218047466131695792), KQU(8242165745175775096),
+    KQU(11021371934053307357), KQU(1265099502753169797),
+    KQU(4644347436111321718), KQU(3609296916782832859),
+    KQU(8109807992218521571), KQU(18387884215648662020),
+    KQU(14656324896296392902), KQU(17386819091238216751),
+    KQU(17788300878582317152), KQU(7919446259742399591),
+    KQU(4466613134576358004), KQU(12928181023667938509),
+    KQU(13147446154454932030), KQU(16552129038252734620),
+    KQU(8395299403738822450), KQU(11313817655275361164),
+    KQU(434258809499511718), KQU(2074882104954788676), KQU(7929892178759395518),
+    KQU(9006461629105745388), KQU(5176475650000323086),
+    KQU(11128357033468341069), KQU(12026158851559118955),
+    KQU(14699716249471156500), KQU(448982497120206757),
+    KQU(4156475356685519900), KQU(6063816103417215727),
+    KQU(10073289387954971479), KQU(8174466846138590962),
+    KQU(2675777452363449006), KQU(9090685420572474281),
+    KQU(6659652652765562060), KQU(12923120304018106621),
+    KQU(11117480560334526775), KQU(937910473424587511),
+    KQU(1838692113502346645), KQU(11133914074648726180),
+    KQU(7922600945143884053), KQU(13435287702700959550),
+    KQU(5287964921251123332), KQU(11354875374575318947),
+    KQU(17955724760748238133), KQU(13728617396297106512),
+    KQU(4107449660118101255), KQU(1210269794886589623),
+    KQU(11408687205733456282), KQU(4538354710392677887),
+    KQU(13566803319341319267), KQU(17870798107734050771),
+    KQU(3354318982568089135), KQU(9034450839405133651),
+    KQU(13087431795753424314), KQU(950333102820688239),
+    KQU(1968360654535604116), KQU(16840551645563314995),
+    KQU(8867501803892924995), KQU(11395388644490626845),
+    KQU(1529815836300732204), KQU(13330848522996608842),
+    KQU(1813432878817504265), KQU(2336867432693429560),
+    KQU(15192805445973385902), KQU(2528593071076407877),
+    KQU(128459777936689248), KQU(9976345382867214866), KQU(6208885766767996043),
+    KQU(14982349522273141706), KQU(3099654362410737822),
+    KQU(13776700761947297661), KQU(8806185470684925550),
+    KQU(8151717890410585321), KQU(640860591588072925),
+    KQU(14592096303937307465), KQU(9056472419613564846),
+    KQU(14861544647742266352), KQU(12703771500398470216),
+    KQU(3142372800384138465), KQU(6201105606917248196),
+    KQU(18337516409359270184), KQU(15042268695665115339),
+    KQU(15188246541383283846), KQU(12800028693090114519),
+    KQU(5992859621101493472), KQU(18278043971816803521),
+    KQU(9002773075219424560), KQU(7325707116943598353),
+    KQU(7930571931248040822), KQU(5645275869617023448),
+    KQU(7266107455295958487), KQU(4363664528273524411),
+    KQU(14313875763787479809), KQU(17059695613553486802),
+    KQU(9247761425889940932), KQU(13704726459237593128),
+    KQU(2701312427328909832), KQU(17235532008287243115),
+    KQU(14093147761491729538), KQU(6247352273768386516),
+    KQU(8268710048153268415), KQU(7985295214477182083),
+    KQU(15624495190888896807), KQU(3772753430045262788),
+    KQU(9133991620474991698), KQU(5665791943316256028),
+    KQU(7551996832462193473), KQU(13163729206798953877),
+    KQU(9263532074153846374), KQU(1015460703698618353),
+    KQU(17929874696989519390), KQU(18257884721466153847),
+    KQU(16271867543011222991), KQU(3905971519021791941),
+    KQU(16814488397137052085), KQU(1321197685504621613),
+    KQU(2870359191894002181), KQU(14317282970323395450),
+    KQU(13663920845511074366), KQU(2052463995796539594),
+    KQU(14126345686431444337), KQU(1727572121947022534),
+    KQU(17793552254485594241), KQU(6738857418849205750),
+    KQU(1282987123157442952), KQU(16655480021581159251),
+    KQU(6784587032080183866), KQU(14726758805359965162),
+    KQU(7577995933961987349), KQU(12539609320311114036),
+    KQU(10789773033385439494), KQU(8517001497411158227),
+    KQU(10075543932136339710), KQU(14838152340938811081),
+    KQU(9560840631794044194), KQU(17445736541454117475),
+    KQU(10633026464336393186), KQU(15705729708242246293),
+    KQU(1117517596891411098), KQU(4305657943415886942),
+    KQU(4948856840533979263), KQU(16071681989041789593),
+    KQU(13723031429272486527), KQU(7639567622306509462),
+    KQU(12670424537483090390), KQU(9715223453097197134),
+    KQU(5457173389992686394), KQU(289857129276135145),
+    KQU(17048610270521972512), KQU(692768013309835485),
+    KQU(14823232360546632057), KQU(18218002361317895936),
+    KQU(3281724260212650204), KQU(16453957266549513795),
+    KQU(8592711109774511881), KQU(929825123473369579),
+    KQU(15966784769764367791), KQU(9627344291450607588),
+    KQU(10849555504977813287), KQU(9234566913936339275),
+    KQU(6413807690366911210), KQU(10862389016184219267),
+    KQU(13842504799335374048), KQU(1531994113376881174),
+    KQU(2081314867544364459), KQU(16430628791616959932),
+    KQU(8314714038654394368), KQU(9155473892098431813),
+    KQU(12577843786670475704), KQU(4399161106452401017),
+    KQU(1668083091682623186), KQU(1741383777203714216),
+    KQU(2162597285417794374), KQU(15841980159165218736),
+    KQU(1971354603551467079), KQU(1206714764913205968),
+    KQU(4790860439591272330), KQU(14699375615594055799),
+    KQU(8374423871657449988), KQU(10950685736472937738),
+    KQU(697344331343267176), KQU(10084998763118059810),
+    KQU(12897369539795983124), KQU(12351260292144383605),
+    KQU(1268810970176811234), KQU(7406287800414582768), KQU(516169557043807831),
+    KQU(5077568278710520380), KQU(3828791738309039304),
+    KQU(7721974069946943610), KQU(3534670260981096460),
+    KQU(4865792189600584891), KQU(16892578493734337298),
+    KQU(9161499464278042590), KQU(11976149624067055931),
+    KQU(13219479887277343990), KQU(14161556738111500680),
+    KQU(14670715255011223056), KQU(4671205678403576558),
+    KQU(12633022931454259781), KQU(14821376219869187646),
+    KQU(751181776484317028), KQU(2192211308839047070),
+    KQU(11787306362361245189), KQU(10672375120744095707),
+    KQU(4601972328345244467), KQU(15457217788831125879),
+    KQU(8464345256775460809), KQU(10191938789487159478),
+    KQU(6184348739615197613), KQU(11425436778806882100),
+    KQU(2739227089124319793), KQU(461464518456000551), KQU(4689850170029177442),
+    KQU(6120307814374078625), KQU(11153579230681708671),
+    KQU(7891721473905347926), KQU(10281646937824872400),
+    KQU(3026099648191332248), KQU(8666750296953273818),
+    KQU(14978499698844363232), KQU(13303395102890132065),
+    KQU(8182358205292864080), KQU(10560547713972971291),
+    KQU(11981635489418959093), KQU(3134621354935288409),
+    KQU(11580681977404383968), KQU(14205530317404088650),
+    KQU(5997789011854923157), KQU(13659151593432238041),
+    KQU(11664332114338865086), KQU(7490351383220929386),
+    KQU(7189290499881530378), KQU(15039262734271020220),
+    KQU(2057217285976980055), KQU(555570804905355739),
+    KQU(11235311968348555110), KQU(13824557146269603217),
+    KQU(16906788840653099693), KQU(7222878245455661677),
+    KQU(5245139444332423756), KQU(4723748462805674292),
+    KQU(12216509815698568612), KQU(17402362976648951187),
+    KQU(17389614836810366768), KQU(4880936484146667711),
+    KQU(9085007839292639880), KQU(13837353458498535449),
+    KQU(11914419854360366677), KQU(16595890135313864103),
+    KQU(6313969847197627222), KQU(18296909792163910431),
+    KQU(10041780113382084042), KQU(2499478551172884794),
+    KQU(11057894246241189489), KQU(9742243032389068555),
+    KQU(12838934582673196228), KQU(13437023235248490367),
+    KQU(13372420669446163240), KQU(6752564244716909224),
+    KQU(7157333073400313737), KQU(12230281516370654308),
+    KQU(1182884552219419117), KQU(2955125381312499218),
+    KQU(10308827097079443249), KQU(1337648572986534958),
+    KQU(16378788590020343939), KQU(108619126514420935),
+    KQU(3990981009621629188), KQU(5460953070230946410),
+    KQU(9703328329366531883), KQU(13166631489188077236),
+    KQU(1104768831213675170), KQU(3447930458553877908),
+    KQU(8067172487769945676), KQU(5445802098190775347),
+    KQU(3244840981648973873), KQU(17314668322981950060),
+    KQU(5006812527827763807), KQU(18158695070225526260),
+    KQU(2824536478852417853), KQU(13974775809127519886),
+    KQU(9814362769074067392), KQU(17276205156374862128),
+    KQU(11361680725379306967), KQU(3422581970382012542),
+    KQU(11003189603753241266), KQU(11194292945277862261),
+    KQU(6839623313908521348), KQU(11935326462707324634),
+    KQU(1611456788685878444), KQU(13112620989475558907),
+    KQU(517659108904450427), KQU(13558114318574407624),
+    KQU(15699089742731633077), KQU(4988979278862685458),
+    KQU(8111373583056521297), KQU(3891258746615399627),
+    KQU(8137298251469718086), KQU(12748663295624701649),
+    KQU(4389835683495292062), KQU(5775217872128831729),
+    KQU(9462091896405534927), KQU(8498124108820263989),
+    KQU(8059131278842839525), KQU(10503167994254090892),
+    KQU(11613153541070396656), KQU(18069248738504647790),
+    KQU(570657419109768508), KQU(3950574167771159665), KQU(5514655599604313077),
+    KQU(2908460854428484165), KQU(10777722615935663114),
+    KQU(12007363304839279486), KQU(9800646187569484767),
+    KQU(8795423564889864287), KQU(14257396680131028419),
+    KQU(6405465117315096498), KQU(7939411072208774878),
+    KQU(17577572378528990006), KQU(14785873806715994850),
+    KQU(16770572680854747390), KQU(18127549474419396481),
+    KQU(11637013449455757750), KQU(14371851933996761086),
+    KQU(3601181063650110280), KQU(4126442845019316144),
+    KQU(10198287239244320669), KQU(18000169628555379659),
+    KQU(18392482400739978269), KQU(6219919037686919957),
+    KQU(3610085377719446052), KQU(2513925039981776336),
+    KQU(16679413537926716955), KQU(12903302131714909434),
+    KQU(5581145789762985009), KQU(12325955044293303233),
+    KQU(17216111180742141204), KQU(6321919595276545740),
+    KQU(3507521147216174501), KQU(9659194593319481840),
+    KQU(11473976005975358326), KQU(14742730101435987026),
+    KQU(492845897709954780), KQU(16976371186162599676),
+    KQU(17712703422837648655), KQU(9881254778587061697),
+    KQU(8413223156302299551), KQU(1563841828254089168),
+    KQU(9996032758786671975), KQU(138877700583772667),
+    KQU(13003043368574995989), KQU(4390573668650456587),
+    KQU(8610287390568126755), KQU(15126904974266642199),
+    KQU(6703637238986057662), KQU(2873075592956810157),
+    KQU(6035080933946049418), KQU(13382846581202353014),
+    KQU(7303971031814642463), KQU(18418024405307444267),
+    KQU(5847096731675404647), KQU(4035880699639842500),
+    KQU(11525348625112218478), KQU(3041162365459574102),
+    KQU(2604734487727986558), KQU(15526341771636983145),
+    KQU(14556052310697370254), KQU(12997787077930808155),
+    KQU(9601806501755554499), KQU(11349677952521423389),
+    KQU(14956777807644899350), KQU(16559736957742852721),
+    KQU(12360828274778140726), KQU(6685373272009662513),
+    KQU(16932258748055324130), KQU(15918051131954158508),
+    KQU(1692312913140790144), KQU(546653826801637367), KQU(5341587076045986652),
+    KQU(14975057236342585662), KQU(12374976357340622412),
+    KQU(10328833995181940552), KQU(12831807101710443149),
+    KQU(10548514914382545716), KQU(2217806727199715993),
+    KQU(12627067369242845138), KQU(4598965364035438158),
+    KQU(150923352751318171), KQU(14274109544442257283),
+    KQU(4696661475093863031), KQU(1505764114384654516),
+    KQU(10699185831891495147), KQU(2392353847713620519),
+    KQU(3652870166711788383), KQU(8640653276221911108),
+    KQU(3894077592275889704), KQU(4918592872135964845),
+    KQU(16379121273281400789), KQU(12058465483591683656),
+    KQU(11250106829302924945), KQU(1147537556296983005),
+    KQU(6376342756004613268), KQU(14967128191709280506),
+    KQU(18007449949790627628), KQU(9497178279316537841),
+    KQU(7920174844809394893), KQU(10037752595255719907),
+    KQU(15875342784985217697), KQU(15311615921712850696),
+    KQU(9552902652110992950), KQU(14054979450099721140),
+    KQU(5998709773566417349), KQU(18027910339276320187),
+    KQU(8223099053868585554), KQU(7842270354824999767),
+    KQU(4896315688770080292), KQU(12969320296569787895),
+    KQU(2674321489185759961), KQU(4053615936864718439),
+    KQU(11349775270588617578), KQU(4743019256284553975),
+    KQU(5602100217469723769), KQU(14398995691411527813),
+    KQU(7412170493796825470), KQU(836262406131744846), KQU(8231086633845153022),
+    KQU(5161377920438552287), KQU(8828731196169924949),
+    KQU(16211142246465502680), KQU(3307990879253687818),
+    KQU(5193405406899782022), KQU(8510842117467566693),
+    KQU(6070955181022405365), KQU(14482950231361409799),
+    KQU(12585159371331138077), KQU(3511537678933588148),
+    KQU(2041849474531116417), KQU(10944936685095345792),
+    KQU(18303116923079107729), KQU(2720566371239725320),
+    KQU(4958672473562397622), KQU(3032326668253243412),
+    KQU(13689418691726908338), KQU(1895205511728843996),
+    KQU(8146303515271990527), KQU(16507343500056113480),
+    KQU(473996939105902919), KQU(9897686885246881481),
+    KQU(14606433762712790575), KQU(6732796251605566368),
+    KQU(1399778120855368916), KQU(935023885182833777),
+    KQU(16066282816186753477), KQU(7291270991820612055),
+    KQU(17530230393129853844), KQU(10223493623477451366),
+    KQU(15841725630495676683), KQU(17379567246435515824),
+    KQU(8588251429375561971), KQU(18339511210887206423),
+    KQU(17349587430725976100), KQU(12244876521394838088),
+    KQU(6382187714147161259), KQU(12335807181848950831),
+    KQU(16948885622305460665), KQU(13755097796371520506),
+    KQU(14806740373324947801), KQU(4828699633859287703),
+    KQU(8209879281452301604), KQU(12435716669553736437),
+    KQU(13970976859588452131), KQU(6233960842566773148),
+    KQU(12507096267900505759), KQU(1198713114381279421),
+    KQU(14989862731124149015), KQU(15932189508707978949),
+    KQU(2526406641432708722), KQU(29187427817271982), KQU(1499802773054556353),
+    KQU(10816638187021897173), KQU(5436139270839738132),
+    KQU(6659882287036010082), KQU(2154048955317173697),
+    KQU(10887317019333757642), KQU(16281091802634424955),
+    KQU(10754549879915384901), KQU(10760611745769249815),
+    KQU(2161505946972504002), KQU(5243132808986265107),
+    KQU(10129852179873415416), KQU(710339480008649081),
+    KQU(7802129453068808528), KQU(17967213567178907213),
+    KQU(15730859124668605599), KQU(13058356168962376502),
+    KQU(3701224985413645909), KQU(14464065869149109264),
+    KQU(9959272418844311646), KQU(10157426099515958752),
+    KQU(14013736814538268528), KQU(17797456992065653951),
+    KQU(17418878140257344806), KQU(15457429073540561521),
+    KQU(2184426881360949378), KQU(2062193041154712416),
+    KQU(8553463347406931661), KQU(4913057625202871854),
+    KQU(2668943682126618425), KQU(17064444737891172288),
+    KQU(4997115903913298637), KQU(12019402608892327416),
+    KQU(17603584559765897352), KQU(11367529582073647975),
+    KQU(8211476043518436050), KQU(8676849804070323674),
+    KQU(18431829230394475730), KQU(10490177861361247904),
+    KQU(9508720602025651349), KQU(7409627448555722700),
+    KQU(5804047018862729008), KQU(11943858176893142594),
+    KQU(11908095418933847092), KQU(5415449345715887652),
+    KQU(1554022699166156407), KQU(9073322106406017161),
+    KQU(7080630967969047082), KQU(18049736940860732943),
+    KQU(12748714242594196794), KQU(1226992415735156741),
+    KQU(17900981019609531193), KQU(11720739744008710999),
+    KQU(3006400683394775434), KQU(11347974011751996028),
+    KQU(3316999628257954608), KQU(8384484563557639101),
+    KQU(18117794685961729767), KQU(1900145025596618194),
+    KQU(17459527840632892676), KQU(5634784101865710994),
+    KQU(7918619300292897158), KQU(3146577625026301350),
+    KQU(9955212856499068767), KQU(1873995843681746975),
+    KQU(1561487759967972194), KQU(8322718804375878474),
+    KQU(11300284215327028366), KQU(4667391032508998982),
+    KQU(9820104494306625580), KQU(17922397968599970610),
+    KQU(1784690461886786712), KQU(14940365084341346821),
+    KQU(5348719575594186181), KQU(10720419084507855261),
+    KQU(14210394354145143274), KQU(2426468692164000131),
+    KQU(16271062114607059202), KQU(14851904092357070247),
+    KQU(6524493015693121897), KQU(9825473835127138531),
+    KQU(14222500616268569578), KQU(15521484052007487468),
+    KQU(14462579404124614699), KQU(11012375590820665520),
+    KQU(11625327350536084927), KQU(14452017765243785417),
+    KQU(9989342263518766305), KQU(3640105471101803790),
+    KQU(4749866455897513242), KQU(13963064946736312044),
+    KQU(10007416591973223791), KQU(18314132234717431115),
+    KQU(3286596588617483450), KQU(7726163455370818765),
+    KQU(7575454721115379328), KQU(5308331576437663422),
+    KQU(18288821894903530934), KQU(8028405805410554106),
+    KQU(15744019832103296628), KQU(149765559630932100),
+    KQU(6137705557200071977), KQU(14513416315434803615),
+    KQU(11665702820128984473), KQU(218926670505601386),
+    KQU(6868675028717769519), KQU(15282016569441512302),
+    KQU(5707000497782960236), KQU(6671120586555079567),
+    KQU(2194098052618985448), KQU(16849577895477330978),
+    KQU(12957148471017466283), KQU(1997805535404859393),
+    KQU(1180721060263860490), KQU(13206391310193756958),
+    KQU(12980208674461861797), KQU(3825967775058875366),
+    KQU(17543433670782042631), KQU(1518339070120322730),
+    KQU(16344584340890991669), KQU(2611327165318529819),
+    KQU(11265022723283422529), KQU(4001552800373196817),
+    KQU(14509595890079346161), KQU(3528717165416234562),
+    KQU(18153222571501914072), KQU(9387182977209744425),
+    KQU(10064342315985580021), KQU(11373678413215253977),
+    KQU(2308457853228798099), KQU(9729042942839545302),
+    KQU(7833785471140127746), KQU(6351049900319844436),
+    KQU(14454610627133496067), KQU(12533175683634819111),
+    KQU(15570163926716513029), KQU(13356980519185762498)};
 
 TEST_BEGIN(test_gen_rand_32) {
 	uint32_t array32[BLOCK_SIZE] JEMALLOC_ATTR(aligned(16));
 	uint32_t array32_2[BLOCK_SIZE] JEMALLOC_ATTR(aligned(16));
-	int i;
+	int      i;
 	uint32_t r32;
-	sfmt_t *ctx;
+	sfmt_t  *ctx;
 
-	expect_d_le(get_min_array_size32(), BLOCK_SIZE,
-	    "Array size too small");
+	expect_d_le(get_min_array_size32(), BLOCK_SIZE, "Array size too small");
 	ctx = init_gen_rand(1234);
 	fill_array32(ctx, array32, BLOCK_SIZE);
 	fill_array32(ctx, array32_2, BLOCK_SIZE);
@@ -1486,13 +1405,12 @@ TEST_END
 TEST_BEGIN(test_by_array_32) {
 	uint32_t array32[BLOCK_SIZE] JEMALLOC_ATTR(aligned(16));
 	uint32_t array32_2[BLOCK_SIZE] JEMALLOC_ATTR(aligned(16));
-	int i;
+	int      i;
 	uint32_t ini[4] = {0x1234, 0x5678, 0x9abc, 0xdef0};
 	uint32_t r32;
-	sfmt_t *ctx;
+	sfmt_t  *ctx;
 
-	expect_d_le(get_min_array_size32(), BLOCK_SIZE,
-	    "Array size too small");
+	expect_d_le(get_min_array_size32(), BLOCK_SIZE, "Array size too small");
 	ctx = init_by_array(ini, 4);
 	fill_array32(ctx, array32, BLOCK_SIZE);
 	fill_array32(ctx, array32_2, BLOCK_SIZE);
@@ -1521,12 +1439,12 @@ TEST_END
 TEST_BEGIN(test_gen_rand_64) {
 	uint64_t array64[BLOCK_SIZE64] JEMALLOC_ATTR(aligned(16));
 	uint64_t array64_2[BLOCK_SIZE64] JEMALLOC_ATTR(aligned(16));
-	int i;
+	int      i;
 	uint64_t r;
-	sfmt_t *ctx;
+	sfmt_t  *ctx;
 
-	expect_d_le(get_min_array_size64(), BLOCK_SIZE64,
-	    "Array size too small");
+	expect_d_le(
+	    get_min_array_size64(), BLOCK_SIZE64, "Array size too small");
 	ctx = init_gen_rand(4321);
 	fill_array64(ctx, array64, BLOCK_SIZE64);
 	fill_array64(ctx, array64_2, BLOCK_SIZE64);
@@ -1540,13 +1458,13 @@ TEST_BEGIN(test_gen_rand_64) {
 		}
 		r = gen_rand64(ctx);
 		expect_u64_eq(r, array64[i],
-		    "Mismatch at array64[%d]=%"FMTx64", gen=%"FMTx64, i,
+		    "Mismatch at array64[%d]=%" FMTx64 ", gen=%" FMTx64, i,
 		    array64[i], r);
 	}
 	for (i = 0; i < COUNT_2; i++) {
 		r = gen_rand64(ctx);
 		expect_u64_eq(r, array64_2[i],
-		    "Mismatch at array64_2[%d]=%"FMTx64" gen=%"FMTx64"", i,
+		    "Mismatch at array64_2[%d]=%" FMTx64 " gen=%" FMTx64 "", i,
 		    array64_2[i], r);
 	}
 	fini_gen_rand(ctx);
@@ -1556,13 +1474,13 @@ TEST_END
 TEST_BEGIN(test_by_array_64) {
 	uint64_t array64[BLOCK_SIZE64] JEMALLOC_ATTR(aligned(16));
 	uint64_t array64_2[BLOCK_SIZE64] JEMALLOC_ATTR(aligned(16));
-	int i;
+	int      i;
 	uint64_t r;
 	uint32_t ini[] = {5, 4, 3, 2, 1};
-	sfmt_t *ctx;
+	sfmt_t  *ctx;
 
-	expect_d_le(get_min_array_size64(), BLOCK_SIZE64,
-	    "Array size too small");
+	expect_d_le(
+	    get_min_array_size64(), BLOCK_SIZE64, "Array size too small");
 	ctx = init_by_array(ini, 5);
 	fill_array64(ctx, array64, BLOCK_SIZE64);
 	fill_array64(ctx, array64_2, BLOCK_SIZE64);
@@ -1576,13 +1494,13 @@ TEST_BEGIN(test_by_array_64) {
 		}
 		r = gen_rand64(ctx);
 		expect_u64_eq(r, array64[i],
-		    "Mismatch at array64[%d]=%"FMTx64" gen=%"FMTx64, i,
+		    "Mismatch at array64[%d]=%" FMTx64 " gen=%" FMTx64, i,
 		    array64[i], r);
 	}
 	for (i = 0; i < COUNT_2; i++) {
 		r = gen_rand64(ctx);
 		expect_u64_eq(r, array64_2[i],
-		    "Mismatch at array64_2[%d]=%"FMTx64" gen=%"FMTx64, i,
+		    "Mismatch at array64_2[%d]=%" FMTx64 " gen=%" FMTx64, i,
 		    array64_2[i], r);
 	}
 	fini_gen_rand(ctx);
@@ -1591,9 +1509,6 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_gen_rand_32,
-	    test_by_array_32,
-	    test_gen_rand_64,
+	return test(test_gen_rand_32, test_by_array_32, test_gen_rand_64,
 	    test_by_array_64);
 }
diff --git a/test/unit/a0.c b/test/unit/a0.c
index c1be79a6..63d792d2 100644
--- a/test/unit/a0.c
+++ b/test/unit/a0.c
@@ -11,6 +11,5 @@ TEST_END
 
 int
 main(void) {
-	return test_no_malloc_init(
-	    test_a0);
+	return test_no_malloc_init(test_a0);
 }
diff --git a/test/unit/arena_decay.c b/test/unit/arena_decay.c
index 177ba505..99c08ab9 100644
--- a/test/unit/arena_decay.c
+++ b/test/unit/arena_decay.c
@@ -4,11 +4,11 @@
 #include "jemalloc/internal/ticker.h"
 
 static nstime_monotonic_t *nstime_monotonic_orig;
-static nstime_update_t *nstime_update_orig;
+static nstime_update_t    *nstime_update_orig;
 
 static unsigned nupdates_mock;
 static nstime_t time_mock;
-static bool monotonic_mock;
+static bool     monotonic_mock;
 
 static bool
 nstime_monotonic_mock(void) {
@@ -28,26 +28,27 @@ TEST_BEGIN(test_decay_ticks) {
 	test_skip_if(opt_hpa);
 
 	ticker_geom_t *decay_ticker;
-	unsigned tick0, tick1, arena_ind;
-	size_t sz, large0;
-	void *p;
+	unsigned       tick0, tick1, arena_ind;
+	size_t         sz, large0;
+	void          *p;
 
 	sz = sizeof(size_t);
-	expect_d_eq(mallctl("arenas.lextent.0.size", (void *)&large0, &sz, NULL,
-	    0), 0, "Unexpected mallctl failure");
+	expect_d_eq(
+	    mallctl("arenas.lextent.0.size", (void *)&large0, &sz, NULL, 0), 0,
+	    "Unexpected mallctl failure");
 
 	/* Set up a manually managed arena for test. */
 	arena_ind = do_arena_create(0, 0);
 
 	/* Migrate to the new arena, and get the ticker. */
 	unsigned old_arena_ind;
-	size_t sz_arena_ind = sizeof(old_arena_ind);
+	size_t   sz_arena_ind = sizeof(old_arena_ind);
 	expect_d_eq(mallctl("thread.arena", (void *)&old_arena_ind,
-	    &sz_arena_ind, (void *)&arena_ind, sizeof(arena_ind)), 0,
-	    "Unexpected mallctl() failure");
+	                &sz_arena_ind, (void *)&arena_ind, sizeof(arena_ind)),
+	    0, "Unexpected mallctl() failure");
 	decay_ticker = tsd_arena_decay_tickerp_get(tsd_fetch());
-	expect_ptr_not_null(decay_ticker,
-	    "Unexpected failure getting decay ticker");
+	expect_ptr_not_null(
+	    decay_ticker, "Unexpected failure getting decay ticker");
 
 	/*
 	 * Test the standard APIs using a large size class, since we can't
@@ -80,8 +81,8 @@ TEST_BEGIN(test_decay_ticks) {
 	expect_d_eq(posix_memalign(&p, sizeof(size_t), large0), 0,
 	    "Unexpected posix_memalign() failure");
 	tick1 = ticker_geom_read(decay_ticker);
-	expect_u32_ne(tick1, tick0,
-	    "Expected ticker to tick during posix_memalign()");
+	expect_u32_ne(
+	    tick1, tick0, "Expected ticker to tick during posix_memalign()");
 	free(p);
 
 	/* aligned_alloc(). */
@@ -89,8 +90,8 @@ TEST_BEGIN(test_decay_ticks) {
 	p = aligned_alloc(sizeof(size_t), large0);
 	expect_ptr_not_null(p, "Unexpected aligned_alloc() failure");
 	tick1 = ticker_geom_read(decay_ticker);
-	expect_u32_ne(tick1, tick0,
-	    "Expected ticker to tick during aligned_alloc()");
+	expect_u32_ne(
+	    tick1, tick0, "Expected ticker to tick during aligned_alloc()");
 	free(p);
 
 	/* realloc(). */
@@ -118,7 +119,7 @@ TEST_BEGIN(test_decay_ticks) {
 	 */
 	{
 		unsigned i;
-		size_t allocx_sizes[2];
+		size_t   allocx_sizes[2];
 		allocx_sizes[0] = large0;
 		allocx_sizes[1] = 1;
 
@@ -163,7 +164,8 @@ TEST_BEGIN(test_decay_ticks) {
 			tick1 = ticker_geom_read(decay_ticker);
 			expect_u32_ne(tick1, tick0,
 			    "Expected ticker to tick during sdallocx() "
-			    "(sz=%zu)", sz);
+			    "(sz=%zu)",
+			    sz);
 		}
 	}
 
@@ -172,18 +174,19 @@ TEST_BEGIN(test_decay_ticks) {
 	 * using an explicit tcache.
 	 */
 	unsigned tcache_ind, i;
-	size_t tcache_sizes[2];
+	size_t   tcache_sizes[2];
 	tcache_sizes[0] = large0;
 	tcache_sizes[1] = 1;
 
 	size_t tcache_max, sz_tcache_max;
 	sz_tcache_max = sizeof(tcache_max);
 	expect_d_eq(mallctl("arenas.tcache_max", (void *)&tcache_max,
-	    &sz_tcache_max, NULL, 0), 0, "Unexpected mallctl() failure");
+	                &sz_tcache_max, NULL, 0),
+	    0, "Unexpected mallctl() failure");
 
 	sz = sizeof(unsigned);
-	expect_d_eq(mallctl("tcache.create", (void *)&tcache_ind, &sz,
-	    NULL, 0), 0, "Unexpected mallctl failure");
+	expect_d_eq(mallctl("tcache.create", (void *)&tcache_ind, &sz, NULL, 0),
+	    0, "Unexpected mallctl failure");
 
 	for (i = 0; i < sizeof(tcache_sizes) / sizeof(size_t); i++) {
 		sz = tcache_sizes[i];
@@ -195,13 +198,14 @@ TEST_BEGIN(test_decay_ticks) {
 		tick1 = ticker_geom_read(decay_ticker);
 		expect_u32_ne(tick1, tick0,
 		    "Expected ticker to tick during tcache fill "
-		    "(sz=%zu)", sz);
+		    "(sz=%zu)",
+		    sz);
 		/* tcache flush. */
 		dallocx(p, MALLOCX_TCACHE(tcache_ind));
 		tick0 = ticker_geom_read(decay_ticker);
 		expect_d_eq(mallctl("tcache.flush", NULL, NULL,
-		    (void *)&tcache_ind, sizeof(unsigned)), 0,
-		    "Unexpected mallctl failure");
+		                (void *)&tcache_ind, sizeof(unsigned)),
+		    0, "Unexpected mallctl failure");
 		tick1 = ticker_geom_read(decay_ticker);
 
 		/* Will only tick if it's in tcache. */
@@ -231,11 +235,11 @@ decay_ticker_helper(unsigned arena_ind, int flags, bool dirty, ssize_t dt,
 	 * cached slab were to repeatedly come and go during looping, it could
 	 * prevent the decay backlog ever becoming empty.
 	 */
-	void *p = do_mallocx(1, flags);
+	void    *p = do_mallocx(1, flags);
 	uint64_t dirty_npurge1, muzzy_npurge1;
 	do {
 		for (unsigned i = 0; i < ARENA_DECAY_NTICKS_PER_UPDATE / 2;
-		    i++) {
+		     i++) {
 			void *q = do_mallocx(1, flags);
 			dallocx(q, flags);
 		}
@@ -244,14 +248,15 @@ decay_ticker_helper(unsigned arena_ind, int flags, bool dirty, ssize_t dt,
 
 		nstime_add(&time_mock, &update_interval);
 		nstime_update(&time);
-	} while (nstime_compare(&time, &deadline) <= 0 && ((dirty_npurge1 ==
-	    dirty_npurge0 && muzzy_npurge1 == muzzy_npurge0) ||
-	    !terminate_asap));
+	} while (nstime_compare(&time, &deadline) <= 0
+	    && ((dirty_npurge1 == dirty_npurge0
+	            && muzzy_npurge1 == muzzy_npurge0)
+	        || !terminate_asap));
 	dallocx(p, flags);
 
 	if (config_stats) {
-		expect_u64_gt(dirty_npurge1 + muzzy_npurge1, dirty_npurge0 +
-		    muzzy_npurge0, "Expected purging to occur");
+		expect_u64_gt(dirty_npurge1 + muzzy_npurge1,
+		    dirty_npurge0 + muzzy_npurge0, "Expected purging to occur");
 	}
 #undef NINTERVALS
 }
@@ -260,11 +265,11 @@ TEST_BEGIN(test_decay_ticker) {
 	test_skip_if(is_background_thread_enabled());
 	test_skip_if(opt_hpa);
 #define NPS 2048
-	ssize_t ddt = opt_dirty_decay_ms;
-	ssize_t mdt = opt_muzzy_decay_ms;
+	ssize_t  ddt = opt_dirty_decay_ms;
+	ssize_t  mdt = opt_muzzy_decay_ms;
 	unsigned arena_ind = do_arena_create(ddt, mdt);
-	int flags = (MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE);
-	void *ps[NPS];
+	int      flags = (MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE);
+	void    *ps[NPS];
 
 	/*
 	 * Allocate a bunch of large objects, pause the clock, deallocate every
@@ -274,8 +279,9 @@ TEST_BEGIN(test_decay_ticker) {
 	 */
 	size_t large;
 	size_t sz = sizeof(size_t);
-	expect_d_eq(mallctl("arenas.lextent.0.size", (void *)&large, &sz, NULL,
-	    0), 0, "Unexpected mallctl failure");
+	expect_d_eq(
+	    mallctl("arenas.lextent.0.size", (void *)&large, &sz, NULL, 0), 0,
+	    "Unexpected mallctl failure");
 
 	do_purge(arena_ind);
 	uint64_t dirty_npurge0 = get_arena_dirty_npurge(arena_ind);
@@ -302,9 +308,9 @@ TEST_BEGIN(test_decay_ticker) {
 		    "Expected nstime_update() to be called");
 	}
 
-	decay_ticker_helper(arena_ind, flags, true, ddt, dirty_npurge0,
-	    muzzy_npurge0, true);
-	decay_ticker_helper(arena_ind, flags, false, ddt+mdt, dirty_npurge0,
+	decay_ticker_helper(
+	    arena_ind, flags, true, ddt, dirty_npurge0, muzzy_npurge0, true);
+	decay_ticker_helper(arena_ind, flags, false, ddt + mdt, dirty_npurge0,
 	    muzzy_npurge0, false);
 
 	do_arena_destroy(arena_ind);
@@ -319,16 +325,17 @@ TEST_BEGIN(test_decay_nonmonotonic) {
 	test_skip_if(is_background_thread_enabled());
 	test_skip_if(opt_hpa);
 #define NPS (SMOOTHSTEP_NSTEPS + 1)
-	int flags = (MALLOCX_ARENA(0) | MALLOCX_TCACHE_NONE);
-	void *ps[NPS];
+	int      flags = (MALLOCX_ARENA(0) | MALLOCX_TCACHE_NONE);
+	void    *ps[NPS];
 	uint64_t npurge0 = 0;
 	uint64_t npurge1 = 0;
-	size_t sz, large0;
+	size_t   sz, large0;
 	unsigned i, nupdates0;
 
 	sz = sizeof(size_t);
-	expect_d_eq(mallctl("arenas.lextent.0.size", (void *)&large0, &sz, NULL,
-	    0), 0, "Unexpected mallctl failure");
+	expect_d_eq(
+	    mallctl("arenas.lextent.0.size", (void *)&large0, &sz, NULL, 0), 0,
+	    "Unexpected mallctl failure");
 
 	expect_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl failure");
@@ -380,15 +387,15 @@ TEST_BEGIN(test_decay_now) {
 	unsigned arena_ind = do_arena_create(0, 0);
 	expect_zu_eq(get_arena_pdirty(arena_ind), 0, "Unexpected dirty pages");
 	expect_zu_eq(get_arena_pmuzzy(arena_ind), 0, "Unexpected muzzy pages");
-	size_t sizes[] = {16, PAGE<<2, HUGEPAGE<<2};
+	size_t sizes[] = {16, PAGE << 2, HUGEPAGE << 2};
 	/* Verify that dirty/muzzy pages never linger after deallocation. */
-	for (unsigned i = 0; i < sizeof(sizes)/sizeof(size_t); i++) {
+	for (unsigned i = 0; i < sizeof(sizes) / sizeof(size_t); i++) {
 		size_t size = sizes[i];
 		generate_dirty(arena_ind, size);
-		expect_zu_eq(get_arena_pdirty(arena_ind), 0,
-		    "Unexpected dirty pages");
-		expect_zu_eq(get_arena_pmuzzy(arena_ind), 0,
-		    "Unexpected muzzy pages");
+		expect_zu_eq(
+		    get_arena_pdirty(arena_ind), 0, "Unexpected dirty pages");
+		expect_zu_eq(
+		    get_arena_pmuzzy(arena_ind), 0, "Unexpected muzzy pages");
 	}
 	do_arena_destroy(arena_ind);
 }
@@ -399,12 +406,12 @@ TEST_BEGIN(test_decay_never) {
 	test_skip_if(opt_hpa);
 
 	unsigned arena_ind = do_arena_create(-1, -1);
-	int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
+	int      flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
 	expect_zu_eq(get_arena_pdirty(arena_ind), 0, "Unexpected dirty pages");
 	expect_zu_eq(get_arena_pmuzzy(arena_ind), 0, "Unexpected muzzy pages");
-	size_t sizes[] = {16, PAGE<<2, HUGEPAGE<<2};
-	void *ptrs[sizeof(sizes)/sizeof(size_t)];
-	for (unsigned i = 0; i < sizeof(sizes)/sizeof(size_t); i++) {
+	size_t sizes[] = {16, PAGE << 2, HUGEPAGE << 2};
+	void  *ptrs[sizeof(sizes) / sizeof(size_t)];
+	for (unsigned i = 0; i < sizeof(sizes) / sizeof(size_t); i++) {
 		ptrs[i] = do_mallocx(sizes[i], flags);
 	}
 	/* Verify that each deallocation generates additional dirty pages. */
@@ -419,7 +426,7 @@ TEST_BEGIN(test_decay_never) {
 		expect_zu_eq(pdirty_prev, 0, "Unexpected dirty pages");
 	}
 	expect_zu_eq(pmuzzy_prev, 0, "Unexpected muzzy pages");
-	for (unsigned i = 0; i < sizeof(sizes)/sizeof(size_t); i++) {
+	for (unsigned i = 0; i < sizeof(sizes) / sizeof(size_t); i++) {
 		dallocx(ptrs[i], flags);
 		size_t pdirty = get_arena_pdirty(arena_ind);
 		size_t pmuzzy = get_arena_pmuzzy(arena_ind);
@@ -434,10 +441,6 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_decay_ticks,
-	    test_decay_ticker,
-	    test_decay_nonmonotonic,
-	    test_decay_now,
-	    test_decay_never);
+	return test(test_decay_ticks, test_decay_ticker,
+	    test_decay_nonmonotonic, test_decay_now, test_decay_never);
 }
diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
index 42fa9a5d..3e0f3d75 100644
--- a/test/unit/arena_reset.c
+++ b/test/unit/arena_reset.c
@@ -1,5 +1,5 @@
 #ifndef ARENA_RESET_PROF_C_
-#include "test/jemalloc_test.h"
+#	include "test/jemalloc_test.h"
 #endif
 
 #include "jemalloc/internal/extent_mmap.h"
@@ -10,7 +10,7 @@
 static unsigned
 get_nsizes_impl(const char *cmd) {
 	unsigned ret;
-	size_t z;
+	size_t   z;
 
 	z = sizeof(unsigned);
 	expect_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0,
@@ -37,12 +37,12 @@ get_size_impl(const char *cmd, size_t ind) {
 	size_t miblen = 4;
 
 	z = sizeof(size_t);
-	expect_d_eq(mallctlnametomib(cmd, mib, &miblen),
-	    0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
+	expect_d_eq(mallctlnametomib(cmd, mib, &miblen), 0,
+	    "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
 	mib[2] = ind;
 	z = sizeof(size_t);
-	expect_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0),
-	    0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0), 0,
+	    "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
 
 	return ret;
 }
@@ -61,8 +61,8 @@ get_large_size(size_t ind) {
 static size_t
 vsalloc(tsdn_t *tsdn, const void *ptr) {
 	emap_full_alloc_ctx_t full_alloc_ctx;
-	bool missing = emap_full_alloc_ctx_try_lookup(tsdn, &arena_emap_global,
-	    ptr, &full_alloc_ctx);
+	bool                  missing = emap_full_alloc_ctx_try_lookup(
+            tsdn, &arena_emap_global, ptr, &full_alloc_ctx);
 	if (missing) {
 		return 0;
 	}
@@ -84,20 +84,21 @@ vsalloc(tsdn_t *tsdn, const void *ptr) {
 static unsigned
 do_arena_create(extent_hooks_t *h) {
 	unsigned arena_ind;
-	size_t sz = sizeof(unsigned);
-	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz,
-	    (void *)(h != NULL ? &h : NULL), (h != NULL ? sizeof(h) : 0)), 0,
-	    "Unexpected mallctl() failure");
+	size_t   sz = sizeof(unsigned);
+	expect_d_eq(
+	    mallctl("arenas.create", (void *)&arena_ind, &sz,
+	        (void *)(h != NULL ? &h : NULL), (h != NULL ? sizeof(h) : 0)),
+	    0, "Unexpected mallctl() failure");
 	return arena_ind;
 }
 
 static void
 do_arena_reset_pre(unsigned arena_ind, void ***ptrs, unsigned *nptrs) {
-#define NLARGE	32
+#define NLARGE 32
 	unsigned nsmall, nlarge, i;
-	size_t sz;
-	int flags;
-	tsdn_t *tsdn;
+	size_t   sz;
+	int      flags;
+	tsdn_t  *tsdn;
 
 	flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
 
@@ -132,14 +133,14 @@ do_arena_reset_pre(unsigned arena_ind, void ***ptrs, unsigned *nptrs) {
 
 static void
 do_arena_reset_post(void **ptrs, unsigned nptrs, unsigned arena_ind) {
-	tsdn_t *tsdn;
+	tsdn_t  *tsdn;
 	unsigned i;
 
 	tsdn = tsdn_fetch();
 
 	if (have_background_thread) {
-		malloc_mutex_lock(tsdn,
-		    &background_thread_info_get(arena_ind)->mtx);
+		malloc_mutex_lock(
+		    tsdn, &background_thread_info_get(arena_ind)->mtx);
 	}
 	/* Verify allocations no longer exist. */
 	for (i = 0; i < nptrs; i++) {
@@ -147,8 +148,8 @@ do_arena_reset_post(void **ptrs, unsigned nptrs, unsigned arena_ind) {
 		    "Allocation should no longer exist");
 	}
 	if (have_background_thread) {
-		malloc_mutex_unlock(tsdn,
-		    &background_thread_info_get(arena_ind)->mtx);
+		malloc_mutex_unlock(
+		    tsdn, &background_thread_info_get(arena_ind)->mtx);
 	}
 
 	free(ptrs);
@@ -159,7 +160,7 @@ do_arena_reset_destroy(const char *name, unsigned arena_ind) {
 	size_t mib[3];
 	size_t miblen;
 
-	miblen = sizeof(mib)/sizeof(size_t);
+	miblen = sizeof(mib) / sizeof(size_t);
 	expect_d_eq(mallctlnametomib(name, mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[1] = (size_t)arena_ind;
@@ -179,7 +180,7 @@ do_arena_destroy(unsigned arena_ind) {
 
 TEST_BEGIN(test_arena_reset) {
 	unsigned arena_ind;
-	void **ptrs;
+	void   **ptrs;
 	unsigned nptrs;
 
 	arena_ind = do_arena_create(NULL);
@@ -191,23 +192,25 @@ TEST_END
 
 static bool
 arena_i_initialized(unsigned arena_ind, bool refresh) {
-	bool initialized;
+	bool   initialized;
 	size_t mib[3];
 	size_t miblen, sz;
 
 	if (refresh) {
 		uint64_t epoch = 1;
-		expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch,
-		    sizeof(epoch)), 0, "Unexpected mallctl() failure");
+		expect_d_eq(
+		    mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+		    0, "Unexpected mallctl() failure");
 	}
 
-	miblen = sizeof(mib)/sizeof(size_t);
+	miblen = sizeof(mib) / sizeof(size_t);
 	expect_d_eq(mallctlnametomib("arena.0.initialized", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[1] = (size_t)arena_ind;
 	sz = sizeof(initialized);
-	expect_d_eq(mallctlbymib(mib, miblen, (void *)&initialized, &sz, NULL,
-	    0), 0, "Unexpected mallctlbymib() failure");
+	expect_d_eq(
+	    mallctlbymib(mib, miblen, (void *)&initialized, &sz, NULL, 0), 0,
+	    "Unexpected mallctlbymib() failure");
 
 	return initialized;
 }
@@ -220,7 +223,7 @@ TEST_END
 
 TEST_BEGIN(test_arena_destroy_hooks_default) {
 	unsigned arena_ind, arena_ind_another, arena_ind_prev;
-	void **ptrs;
+	void   **ptrs;
 	unsigned nptrs;
 
 	arena_ind = do_arena_create(NULL);
@@ -249,26 +252,27 @@ TEST_BEGIN(test_arena_destroy_hooks_default) {
 	arena_ind_prev = arena_ind;
 	arena_ind = do_arena_create(NULL);
 	do_arena_reset_pre(arena_ind, &ptrs, &nptrs);
-	expect_u_eq(arena_ind, arena_ind_prev,
-	    "Arena index should have been recycled");
+	expect_u_eq(
+	    arena_ind, arena_ind_prev, "Arena index should have been recycled");
 	do_arena_destroy(arena_ind);
 	do_arena_reset_post(ptrs, nptrs, arena_ind);
 
 	do_arena_destroy(arena_ind_another);
 
 	/* Try arena.create with custom hooks. */
-	size_t sz = sizeof(extent_hooks_t *);
+	size_t          sz = sizeof(extent_hooks_t *);
 	extent_hooks_t *a0_default_hooks;
 	expect_d_eq(mallctl("arena.0.extent_hooks", (void *)&a0_default_hooks,
-	    &sz, NULL, 0), 0, "Unexpected mallctlnametomib() failure");
+	                &sz, NULL, 0),
+	    0, "Unexpected mallctlnametomib() failure");
 
 	/* Default impl; but wrapped as "customized". */
-	extent_hooks_t new_hooks = *a0_default_hooks;
+	extent_hooks_t  new_hooks = *a0_default_hooks;
 	extent_hooks_t *hook = &new_hooks;
 	sz = sizeof(unsigned);
 	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz,
-	    (void *)&hook, sizeof(void *)), 0,
-	    "Unexpected mallctl() failure");
+	                (void *)&hook, sizeof(void *)),
+	    0, "Unexpected mallctl() failure");
 	do_arena_destroy(arena_ind);
 }
 TEST_END
@@ -280,13 +284,15 @@ TEST_END
 static bool
 extent_dalloc_unmap(extent_hooks_t *extent_hooks, void *addr, size_t size,
     bool committed, unsigned arena_ind) {
-	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, committed=%s, "
-	    "arena_ind=%u)\n", __func__, extent_hooks, addr, size, committed ?
-	    "true" : "false", arena_ind);
+	TRACE_HOOK(
+	    "%s(extent_hooks=%p, addr=%p, size=%zu, committed=%s, "
+	    "arena_ind=%u)\n",
+	    __func__, extent_hooks, addr, size, committed ? "true" : "false",
+	    arena_ind);
 	expect_ptr_eq(extent_hooks, &hooks,
 	    "extent_hooks should be same as pointer used to set hooks");
-	expect_ptr_eq(extent_hooks->dalloc, extent_dalloc_unmap,
-	    "Wrong hook function");
+	expect_ptr_eq(
+	    extent_hooks->dalloc, extent_dalloc_unmap, "Wrong hook function");
 	called_dalloc = true;
 	if (!try_dalloc) {
 		return true;
@@ -301,21 +307,15 @@ extent_dalloc_unmap(extent_hooks_t *extent_hooks, void *addr, size_t size,
 
 static extent_hooks_t hooks_orig;
 
-static extent_hooks_t hooks_unmap = {
-	extent_alloc_hook,
-	extent_dalloc_unmap, /* dalloc */
-	extent_destroy_hook,
-	extent_commit_hook,
-	extent_decommit_hook,
-	extent_purge_lazy_hook,
-	extent_purge_forced_hook,
-	extent_split_hook,
-	extent_merge_hook
-};
+static extent_hooks_t hooks_unmap = {extent_alloc_hook,
+    extent_dalloc_unmap, /* dalloc */
+    extent_destroy_hook, extent_commit_hook, extent_decommit_hook,
+    extent_purge_lazy_hook, extent_purge_forced_hook, extent_split_hook,
+    extent_merge_hook};
 
 TEST_BEGIN(test_arena_destroy_hooks_unmap) {
 	unsigned arena_ind;
-	void **ptrs;
+	void   **ptrs;
 	unsigned nptrs;
 
 	extent_hooks_prep();
@@ -353,9 +353,6 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_arena_reset,
-	    test_arena_destroy_initial,
-	    test_arena_destroy_hooks_default,
-	    test_arena_destroy_hooks_unmap);
+	return test(test_arena_reset, test_arena_destroy_initial,
+	    test_arena_destroy_hooks_default, test_arena_destroy_hooks_unmap);
 }
diff --git a/test/unit/atomic.c b/test/unit/atomic.c
index 6c4b85e5..b4f59431 100644
--- a/test/unit/atomic.c
+++ b/test/unit/atomic.c
@@ -187,7 +187,6 @@ TEST_BEGIN(test_atomic_u64) {
 }
 TEST_END
 
-
 TEST_STRUCT(uint32_t, u32);
 TEST_BEGIN(test_atomic_u32) {
 	INTEGER_TEST_BODY(uint32_t, u32);
@@ -212,7 +211,6 @@ TEST_BEGIN(test_atomic_zd) {
 }
 TEST_END
 
-
 TEST_STRUCT(unsigned, u);
 TEST_BEGIN(test_atomic_u) {
 	INTEGER_TEST_BODY(unsigned, u);
@@ -221,11 +219,6 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_atomic_u64,
-	    test_atomic_u32,
-	    test_atomic_p,
-	    test_atomic_zu,
-	    test_atomic_zd,
-	    test_atomic_u);
+	return test(test_atomic_u64, test_atomic_u32, test_atomic_p,
+	    test_atomic_zu, test_atomic_zd, test_atomic_u);
 }
diff --git a/test/unit/background_thread.c b/test/unit/background_thread.c
index c60010a8..819a81a6 100644
--- a/test/unit/background_thread.c
+++ b/test/unit/background_thread.c
@@ -4,14 +4,13 @@
 
 static void
 test_switch_background_thread_ctl(bool new_val) {
-	bool e0, e1;
+	bool   e0, e1;
 	size_t sz = sizeof(bool);
 
 	e1 = new_val;
-	expect_d_eq(mallctl("background_thread", (void *)&e0, &sz,
-	    &e1, sz), 0, "Unexpected mallctl() failure");
-	expect_b_eq(e0, !e1,
-	    "background_thread should be %d before.\n", !e1);
+	expect_d_eq(mallctl("background_thread", (void *)&e0, &sz, &e1, sz), 0,
+	    "Unexpected mallctl() failure");
+	expect_b_eq(e0, !e1, "background_thread should be %d before.\n", !e1);
 	if (e1) {
 		expect_zu_gt(n_background_threads, 0,
 		    "Number of background threads should be non zero.\n");
@@ -23,14 +22,13 @@ test_switch_background_thread_ctl(bool new_val) {
 
 static void
 test_repeat_background_thread_ctl(bool before) {
-	bool e0, e1;
+	bool   e0, e1;
 	size_t sz = sizeof(bool);
 
 	e1 = before;
-	expect_d_eq(mallctl("background_thread", (void *)&e0, &sz,
-	    &e1, sz), 0, "Unexpected mallctl() failure");
-	expect_b_eq(e0, before,
-	    "background_thread should be %d.\n", before);
+	expect_d_eq(mallctl("background_thread", (void *)&e0, &sz, &e1, sz), 0,
+	    "Unexpected mallctl() failure");
+	expect_b_eq(e0, before, "background_thread should be %d.\n", before);
 	if (e1) {
 		expect_zu_gt(n_background_threads, 0,
 		    "Number of background threads should be non zero.\n");
@@ -43,15 +41,15 @@ test_repeat_background_thread_ctl(bool before) {
 TEST_BEGIN(test_background_thread_ctl) {
 	test_skip_if(!have_background_thread);
 
-	bool e0, e1;
+	bool   e0, e1;
 	size_t sz = sizeof(bool);
 
-	expect_d_eq(mallctl("opt.background_thread", (void *)&e0, &sz,
-	    NULL, 0), 0, "Unexpected mallctl() failure");
-	expect_d_eq(mallctl("background_thread", (void *)&e1, &sz,
-	    NULL, 0), 0, "Unexpected mallctl() failure");
-	expect_b_eq(e0, e1,
-	    "Default and opt.background_thread does not match.\n");
+	expect_d_eq(mallctl("opt.background_thread", (void *)&e0, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
+	expect_d_eq(mallctl("background_thread", (void *)&e1, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
+	expect_b_eq(
+	    e0, e1, "Default and opt.background_thread does not match.\n");
 	if (e0) {
 		test_switch_background_thread_ctl(false);
 	}
@@ -75,7 +73,7 @@ TEST_BEGIN(test_background_thread_running) {
 	test_skip_if(!config_stats);
 
 #if defined(JEMALLOC_BACKGROUND_THREAD)
-	tsd_t *tsd = tsd_fetch();
+	tsd_t                    *tsd = tsd_fetch();
 	background_thread_info_t *info = &background_thread_info[0];
 
 	test_repeat_background_thread_ctl(false);
@@ -113,6 +111,5 @@ int
 main(void) {
 	/* Background_thread creation tests reentrancy naturally. */
 	return test_no_reentrancy(
-	    test_background_thread_ctl,
-	    test_background_thread_running);
+	    test_background_thread_ctl, test_background_thread_running);
 }
diff --git a/test/unit/background_thread_enable.c b/test/unit/background_thread_enable.c
index 3a2d55ac..57f26c4b 100644
--- a/test/unit/background_thread_enable.c
+++ b/test/unit/background_thread_enable.c
@@ -1,6 +1,7 @@
 #include "test/jemalloc_test.h"
 
-const char *malloc_conf = "background_thread:false,narenas:1,max_background_threads:8";
+const char *malloc_conf =
+    "background_thread:false,narenas:1,max_background_threads:8";
 
 static unsigned
 max_test_narenas(void) {
@@ -21,14 +22,14 @@ TEST_BEGIN(test_deferred) {
 	test_skip_if(!have_background_thread);
 
 	unsigned id;
-	size_t sz_u = sizeof(unsigned);
+	size_t   sz_u = sizeof(unsigned);
 
 	for (unsigned i = 0; i < max_test_narenas(); i++) {
 		expect_d_eq(mallctl("arenas.create", &id, &sz_u, NULL, 0), 0,
 		    "Failed to create arena");
 	}
 
-	bool enable = true;
+	bool   enable = true;
 	size_t sz_b = sizeof(bool);
 	expect_d_eq(mallctl("background_thread", NULL, NULL, &enable, sz_b), 0,
 	    "Failed to enable background threads");
@@ -44,29 +45,32 @@ TEST_BEGIN(test_max_background_threads) {
 	size_t max_n_thds;
 	size_t opt_max_n_thds;
 	size_t sz_m = sizeof(max_n_thds);
-	expect_d_eq(mallctl("opt.max_background_threads",
-	    &opt_max_n_thds, &sz_m, NULL, 0), 0,
-	    "Failed to get opt.max_background_threads");
-	expect_d_eq(mallctl("max_background_threads", &max_n_thds, &sz_m, NULL,
-	    0), 0, "Failed to get max background threads");
+	expect_d_eq(mallctl("opt.max_background_threads", &opt_max_n_thds,
+	                &sz_m, NULL, 0),
+	    0, "Failed to get opt.max_background_threads");
+	expect_d_eq(
+	    mallctl("max_background_threads", &max_n_thds, &sz_m, NULL, 0), 0,
+	    "Failed to get max background threads");
 	expect_zu_eq(opt_max_n_thds, max_n_thds,
 	    "max_background_threads and "
 	    "opt.max_background_threads should match");
-	expect_d_eq(mallctl("max_background_threads", NULL, NULL, &max_n_thds,
-	    sz_m), 0, "Failed to set max background threads");
+	expect_d_eq(
+	    mallctl("max_background_threads", NULL, NULL, &max_n_thds, sz_m), 0,
+	    "Failed to set max background threads");
 	size_t size_zero = 0;
-	expect_d_ne(mallctl("max_background_threads", NULL, NULL, &size_zero,
-	    sz_m), 0, "Should not allow zero background threads");
+	expect_d_ne(
+	    mallctl("max_background_threads", NULL, NULL, &size_zero, sz_m), 0,
+	    "Should not allow zero background threads");
 
 	unsigned id;
-	size_t sz_u = sizeof(unsigned);
+	size_t   sz_u = sizeof(unsigned);
 
 	for (unsigned i = 0; i < max_test_narenas(); i++) {
 		expect_d_eq(mallctl("arenas.create", &id, &sz_u, NULL, 0), 0,
 		    "Failed to create arena");
 	}
 
-	bool enable = true;
+	bool   enable = true;
 	size_t sz_b = sizeof(bool);
 	expect_d_eq(mallctl("background_thread", NULL, NULL, &enable, sz_b), 0,
 	    "Failed to enable background threads");
@@ -75,16 +79,18 @@ TEST_BEGIN(test_max_background_threads) {
 	size_t new_max_thds = max_n_thds - 1;
 	if (new_max_thds > 0) {
 		expect_d_eq(mallctl("max_background_threads", NULL, NULL,
-		    &new_max_thds, sz_m), 0,
-		    "Failed to set max background threads");
+		                &new_max_thds, sz_m),
+		    0, "Failed to set max background threads");
 		expect_zu_eq(n_background_threads, new_max_thds,
 		    "Number of background threads should decrease by 1.\n");
 	}
 	new_max_thds = 1;
-	expect_d_eq(mallctl("max_background_threads", NULL, NULL, &new_max_thds,
-	    sz_m), 0, "Failed to set max background threads");
-	expect_d_ne(mallctl("max_background_threads", NULL, NULL, &size_zero,
-	    sz_m), 0, "Should not allow zero background threads");
+	expect_d_eq(
+	    mallctl("max_background_threads", NULL, NULL, &new_max_thds, sz_m),
+	    0, "Failed to set max background threads");
+	expect_d_ne(
+	    mallctl("max_background_threads", NULL, NULL, &size_zero, sz_m), 0,
+	    "Should not allow zero background threads");
 	expect_zu_eq(n_background_threads, new_max_thds,
 	    "Number of background threads should be 1.\n");
 }
@@ -92,7 +98,5 @@ TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-		test_deferred,
-		test_max_background_threads);
+	return test_no_reentrancy(test_deferred, test_max_background_threads);
 }
diff --git a/test/unit/base.c b/test/unit/base.c
index 3e46626e..e6e82435 100644
--- a/test/unit/base.c
+++ b/test/unit/base.c
@@ -3,37 +3,31 @@
 #include "test/extent_hooks.h"
 
 static extent_hooks_t hooks_null = {
-	extent_alloc_hook,
-	NULL, /* dalloc */
-	NULL, /* destroy */
-	NULL, /* commit */
-	NULL, /* decommit */
-	NULL, /* purge_lazy */
-	NULL, /* purge_forced */
-	NULL, /* split */
-	NULL /* merge */
+    extent_alloc_hook, NULL, /* dalloc */
+    NULL,                    /* destroy */
+    NULL,                    /* commit */
+    NULL,                    /* decommit */
+    NULL,                    /* purge_lazy */
+    NULL,                    /* purge_forced */
+    NULL,                    /* split */
+    NULL                     /* merge */
 };
 
 static extent_hooks_t hooks_not_null = {
-	extent_alloc_hook,
-	extent_dalloc_hook,
-	extent_destroy_hook,
-	NULL, /* commit */
-	extent_decommit_hook,
-	extent_purge_lazy_hook,
-	extent_purge_forced_hook,
-	NULL, /* split */
-	NULL /* merge */
+    extent_alloc_hook, extent_dalloc_hook, extent_destroy_hook,
+    NULL, /* commit */
+    extent_decommit_hook, extent_purge_lazy_hook, extent_purge_forced_hook,
+    NULL, /* split */
+    NULL  /* merge */
 };
 
 TEST_BEGIN(test_base_hooks_default) {
 	base_t *base;
-	size_t allocated0, allocated1, edata_allocated,
-	    rtree_allocated, resident, mapped, n_thp;
+	size_t  allocated0, allocated1, edata_allocated, rtree_allocated,
+	    resident, mapped, n_thp;
 
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
-	base = base_new(tsdn, 0,
-	    (extent_hooks_t *)&ehooks_default_extent_hooks,
+	base = base_new(tsdn, 0, (extent_hooks_t *)&ehooks_default_extent_hooks,
 	    /* metadata_use_hooks */ true);
 
 	if (config_stats) {
@@ -42,13 +36,13 @@ TEST_BEGIN(test_base_hooks_default) {
 		expect_zu_ge(allocated0, sizeof(base_t),
 		    "Base header should count as allocated");
 		if (opt_metadata_thp == metadata_thp_always) {
-			expect_zu_gt(n_thp, 0,
-			    "Base should have 1 THP at least.");
+			expect_zu_gt(
+			    n_thp, 0, "Base should have 1 THP at least.");
 		}
 	}
 
-	expect_ptr_not_null(base_alloc(tsdn, base, 42, 1),
-	    "Unexpected base_alloc() failure");
+	expect_ptr_not_null(
+	    base_alloc(tsdn, base, 42, 1), "Unexpected base_alloc() failure");
 
 	if (config_stats) {
 		base_stats_get(tsdn, base, &allocated1, &edata_allocated,
@@ -63,9 +57,9 @@ TEST_END
 
 TEST_BEGIN(test_base_hooks_null) {
 	extent_hooks_t hooks_orig;
-	base_t *base;
-	size_t allocated0, allocated1, edata_allocated,
-	    rtree_allocated, resident, mapped, n_thp;
+	base_t        *base;
+	size_t         allocated0, allocated1, edata_allocated, rtree_allocated,
+	    resident, mapped, n_thp;
 
 	extent_hooks_prep();
 	try_dalloc = false;
@@ -86,13 +80,13 @@ TEST_BEGIN(test_base_hooks_null) {
 		expect_zu_ge(allocated0, sizeof(base_t),
 		    "Base header should count as allocated");
 		if (opt_metadata_thp == metadata_thp_always) {
-			expect_zu_gt(n_thp, 0,
-			    "Base should have 1 THP at least.");
+			expect_zu_gt(
+			    n_thp, 0, "Base should have 1 THP at least.");
 		}
 	}
 
-	expect_ptr_not_null(base_alloc(tsdn, base, 42, 1),
-	    "Unexpected base_alloc() failure");
+	expect_ptr_not_null(
+	    base_alloc(tsdn, base, 42, 1), "Unexpected base_alloc() failure");
 
 	if (config_stats) {
 		base_stats_get(tsdn, base, &allocated1, &edata_allocated,
@@ -109,8 +103,8 @@ TEST_END
 
 TEST_BEGIN(test_base_hooks_not_null) {
 	extent_hooks_t hooks_orig;
-	base_t *base;
-	void *p, *q, *r, *r_exp;
+	base_t        *base;
+	void          *p, *q, *r, *r_exp;
 
 	extent_hooks_prep();
 	try_dalloc = false;
@@ -133,33 +127,34 @@ TEST_BEGIN(test_base_hooks_not_null) {
 	 */
 	{
 		const size_t alignments[] = {
-			1,
-			QUANTUM,
-			QUANTUM << 1,
-			CACHELINE,
-			CACHELINE << 1,
+		    1,
+		    QUANTUM,
+		    QUANTUM << 1,
+		    CACHELINE,
+		    CACHELINE << 1,
 		};
 		unsigned i;
 
 		for (i = 0; i < sizeof(alignments) / sizeof(size_t); i++) {
 			size_t alignment = alignments[i];
-			size_t align_ceil = ALIGNMENT_CEILING(alignment,
-			    QUANTUM);
+			size_t align_ceil = ALIGNMENT_CEILING(
+			    alignment, QUANTUM);
 			p = base_alloc(tsdn, base, 1, alignment);
-			expect_ptr_not_null(p,
-			    "Unexpected base_alloc() failure");
+			expect_ptr_not_null(
+			    p, "Unexpected base_alloc() failure");
 			expect_ptr_eq(p,
-			    (void *)(ALIGNMENT_CEILING((uintptr_t)p,
-			    alignment)), "Expected quantum alignment");
+			    (void *)(ALIGNMENT_CEILING(
+			        (uintptr_t)p, alignment)),
+			    "Expected quantum alignment");
 			q = base_alloc(tsdn, base, alignment, alignment);
-			expect_ptr_not_null(q,
-			    "Unexpected base_alloc() failure");
+			expect_ptr_not_null(
+			    q, "Unexpected base_alloc() failure");
 			expect_ptr_eq((void *)((uintptr_t)p + align_ceil), q,
 			    "Minimal allocation should take up %zu bytes",
 			    align_ceil);
 			r = base_alloc(tsdn, base, 1, alignment);
-			expect_ptr_not_null(r,
-			    "Unexpected base_alloc() failure");
+			expect_ptr_not_null(
+			    r, "Unexpected base_alloc() failure");
 			expect_ptr_eq((void *)((uintptr_t)q + align_ceil), r,
 			    "Minimal allocation should take up %zu bytes",
 			    align_ceil);
@@ -193,21 +188,18 @@ TEST_BEGIN(test_base_hooks_not_null) {
 	 * Check for proper alignment support when normal blocks are too small.
 	 */
 	{
-		const size_t alignments[] = {
-			HUGEPAGE,
-			HUGEPAGE << 1
-		};
-		unsigned i;
+		const size_t alignments[] = {HUGEPAGE, HUGEPAGE << 1};
+		unsigned     i;
 
 		for (i = 0; i < sizeof(alignments) / sizeof(size_t); i++) {
 			size_t alignment = alignments[i];
 			p = base_alloc(tsdn, base, QUANTUM, alignment);
-			expect_ptr_not_null(p,
-			    "Unexpected base_alloc() failure");
+			expect_ptr_not_null(
+			    p, "Unexpected base_alloc() failure");
 			expect_ptr_eq(p,
-			    (void *)(ALIGNMENT_CEILING((uintptr_t)p,
-			    alignment)), "Expected %zu-byte alignment",
-			    alignment);
+			    (void *)(ALIGNMENT_CEILING(
+			        (uintptr_t)p, alignment)),
+			    "Expected %zu-byte alignment", alignment);
 		}
 	}
 
@@ -237,12 +229,11 @@ TEST_BEGIN(test_base_ehooks_get_for_metadata_default_hook) {
 	base = base_new(tsdn, 0, &hooks, /* metadata_use_hooks */ false);
 	ehooks_t *ehooks = base_ehooks_get_for_metadata(base);
 	expect_true(ehooks_are_default(ehooks),
-		"Expected default extent hook functions pointer");
+	    "Expected default extent hook functions pointer");
 	base_delete(tsdn, base);
 }
 TEST_END
 
-
 TEST_BEGIN(test_base_ehooks_get_for_metadata_custom_hook) {
 	extent_hooks_prep();
 	memcpy(&hooks, &hooks_not_null, sizeof(extent_hooks_t));
@@ -251,17 +242,15 @@ TEST_BEGIN(test_base_ehooks_get_for_metadata_custom_hook) {
 	base = base_new(tsdn, 0, &hooks, /* metadata_use_hooks */ true);
 	ehooks_t *ehooks = base_ehooks_get_for_metadata(base);
 	expect_ptr_eq(&hooks, ehooks_get_extent_hooks_ptr(ehooks),
-		"Expected user-specified extend hook functions pointer");
+	    "Expected user-specified extend hook functions pointer");
 	base_delete(tsdn, base);
 }
 TEST_END
 
 int
 main(void) {
-	return test(
-	    test_base_hooks_default,
-	    test_base_hooks_null,
+	return test(test_base_hooks_default, test_base_hooks_null,
 	    test_base_hooks_not_null,
-            test_base_ehooks_get_for_metadata_default_hook,
-            test_base_ehooks_get_for_metadata_custom_hook);
+	    test_base_ehooks_get_for_metadata_default_hook,
+	    test_base_ehooks_get_for_metadata_custom_hook);
 }
diff --git a/test/unit/batch_alloc.c b/test/unit/batch_alloc.c
index 2bd5968e..0c61bf77 100644
--- a/test/unit/batch_alloc.c
+++ b/test/unit/batch_alloc.c
@@ -6,8 +6,8 @@ static void *global_ptrs[BATCH_MAX];
 #define PAGE_ALIGNED(ptr) (((uintptr_t)ptr & PAGE_MASK) == 0)
 
 static void
-verify_batch_basic(tsd_t *tsd, void **ptrs, size_t batch, size_t usize,
-    bool zero) {
+verify_batch_basic(
+    tsd_t *tsd, void **ptrs, size_t batch, size_t usize, bool zero) {
 	for (size_t i = 0; i < batch; ++i) {
 		void *p = ptrs[i];
 		expect_zu_eq(isalloc(tsd_tsdn(tsd), p), usize, "");
@@ -46,7 +46,8 @@ verify_batch_locality(tsd_t *tsd, void **ptrs, size_t batch, size_t usize,
 		assert(i > 0);
 		void *q = ptrs[i - 1];
 		expect_true((uintptr_t)p > (uintptr_t)q
-		    && (size_t)((uintptr_t)p - (uintptr_t)q) == usize, "");
+		        && (size_t)((uintptr_t)p - (uintptr_t)q) == usize,
+		    "");
 	}
 }
 
@@ -62,16 +63,17 @@ struct batch_alloc_packet_s {
 	void **ptrs;
 	size_t num;
 	size_t size;
-	int flags;
+	int    flags;
 };
 
 static size_t
 batch_alloc_wrapper(void **ptrs, size_t num, size_t size, int flags) {
 	batch_alloc_packet_t batch_alloc_packet = {ptrs, num, size, flags};
-	size_t filled;
-	size_t len = sizeof(size_t);
+	size_t               filled;
+	size_t               len = sizeof(size_t);
 	assert_d_eq(mallctl("experimental.batch_alloc", &filled, &len,
-	    &batch_alloc_packet, sizeof(batch_alloc_packet)), 0, "");
+	                &batch_alloc_packet, sizeof(batch_alloc_packet)),
+	    0, "");
 	return filled;
 }
 
@@ -79,16 +81,16 @@ static void
 test_wrapper(size_t size, size_t alignment, bool zero, unsigned arena_flag) {
 	tsd_t *tsd = tsd_fetch();
 	assert(tsd != NULL);
-	const size_t usize =
-	    (alignment != 0 ? sz_sa2u(size, alignment) : sz_s2u(size));
-	const szind_t ind = sz_size2index(usize);
+	const size_t      usize = (alignment != 0 ? sz_sa2u(size, alignment)
+	                                          : sz_s2u(size));
+	const szind_t     ind = sz_size2index(usize);
 	const bin_info_t *bin_info = &bin_infos[ind];
-	const unsigned nregs = bin_info->nregs;
+	const unsigned    nregs = bin_info->nregs;
 	assert(nregs > 0);
 	arena_t *arena;
 	if (arena_flag != 0) {
-		arena = arena_get(tsd_tsdn(tsd), MALLOCX_ARENA_GET(arena_flag),
-		    false);
+		arena = arena_get(
+		    tsd_tsdn(tsd), MALLOCX_ARENA_GET(arena_flag), false);
 	} else {
 		arena = arena_choose(tsd, NULL);
 	}
@@ -122,13 +124,13 @@ test_wrapper(size_t size, size_t alignment, bool zero, unsigned arena_flag) {
 			}
 			size_t batch = base + (size_t)j;
 			assert(batch < BATCH_MAX);
-			size_t filled = batch_alloc_wrapper(global_ptrs, batch,
-			    size, flags);
+			size_t filled = batch_alloc_wrapper(
+			    global_ptrs, batch, size, flags);
 			assert_zu_eq(filled, batch, "");
-			verify_batch_basic(tsd, global_ptrs, batch, usize,
-			    zero);
-			verify_batch_locality(tsd, global_ptrs, batch, usize,
-			    arena, nregs);
+			verify_batch_basic(
+			    tsd, global_ptrs, batch, usize, zero);
+			verify_batch_locality(
+			    tsd, global_ptrs, batch, usize, arena, nregs);
 			release_batch(global_ptrs, batch, usize);
 		}
 	}
@@ -153,9 +155,10 @@ TEST_END
 
 TEST_BEGIN(test_batch_alloc_manual_arena) {
 	unsigned arena_ind;
-	size_t len_unsigned = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.create", &arena_ind, &len_unsigned, NULL,
-	    0), 0, "");
+	size_t   len_unsigned = sizeof(unsigned);
+	assert_d_eq(
+	    mallctl("arenas.create", &arena_ind, &len_unsigned, NULL, 0), 0,
+	    "");
 	test_wrapper(11, 0, false, MALLOCX_ARENA(arena_ind));
 }
 TEST_END
@@ -180,10 +183,7 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_batch_alloc,
-	    test_batch_alloc_zero,
-	    test_batch_alloc_aligned,
-	    test_batch_alloc_manual_arena,
+	return test(test_batch_alloc, test_batch_alloc_zero,
+	    test_batch_alloc_aligned, test_batch_alloc_manual_arena,
 	    test_batch_alloc_large);
 }
diff --git a/test/unit/batcher.c b/test/unit/batcher.c
index df9d3e5b..1052ca27 100644
--- a/test/unit/batcher.c
+++ b/test/unit/batcher.c
@@ -5,7 +5,7 @@
 TEST_BEGIN(test_simple) {
 	enum { NELEMS_MAX = 10, DATA_BASE_VAL = 100, NRUNS = 5 };
 	batcher_t batcher;
-	size_t data[NELEMS_MAX];
+	size_t    data[NELEMS_MAX];
 	for (size_t nelems = 0; nelems < NELEMS_MAX; nelems++) {
 		batcher_init(&batcher, nelems);
 		for (int run = 0; run < NRUNS; run++) {
@@ -13,8 +13,8 @@ TEST_BEGIN(test_simple) {
 				data[i] = (size_t)-1;
 			}
 			for (size_t i = 0; i < nelems; i++) {
-				size_t idx = batcher_push_begin(TSDN_NULL,
-				    &batcher, 1);
+				size_t idx = batcher_push_begin(
+				    TSDN_NULL, &batcher, 1);
 				assert_zu_eq(i, idx, "Wrong index");
 				assert_zu_eq((size_t)-1, data[idx],
 				    "Expected uninitialized slot");
@@ -22,8 +22,8 @@ TEST_BEGIN(test_simple) {
 				batcher_push_end(TSDN_NULL, &batcher);
 			}
 			if (nelems > 0) {
-				size_t idx = batcher_push_begin(TSDN_NULL,
-				    &batcher, 1);
+				size_t idx = batcher_push_begin(
+				    TSDN_NULL, &batcher, 1);
 				assert_zu_eq(BATCHER_NO_IDX, idx,
 				    "Shouldn't be able to push into a full "
 				    "batcher");
@@ -51,7 +51,7 @@ TEST_BEGIN(test_simple) {
 TEST_END
 
 TEST_BEGIN(test_multi_push) {
-	size_t idx, nelems;
+	size_t    idx, nelems;
 	batcher_t batcher;
 	batcher_init(&batcher, 11);
 	/* Push two at a time, 5 times, for 10 total. */
@@ -82,13 +82,13 @@ enum {
 
 typedef struct stress_test_data_s stress_test_data_t;
 struct stress_test_data_s {
-	batcher_t batcher;
-	mtx_t pop_mtx;
+	batcher_t    batcher;
+	mtx_t        pop_mtx;
 	atomic_u32_t thread_id;
 
-	uint32_t elems_data[STRESS_TEST_ELEMS];
-	size_t push_count[STRESS_TEST_ELEMS];
-	size_t pop_count[STRESS_TEST_ELEMS];
+	uint32_t    elems_data[STRESS_TEST_ELEMS];
+	size_t      push_count[STRESS_TEST_ELEMS];
+	size_t      pop_count[STRESS_TEST_ELEMS];
 	atomic_zu_t atomic_push_count[STRESS_TEST_ELEMS];
 	atomic_zu_t atomic_pop_count[STRESS_TEST_ELEMS];
 };
@@ -108,7 +108,8 @@ get_nth_set(bool elems_owned[STRESS_TEST_ELEMS], size_t n) {
 			return i;
 		}
 	}
-	assert_not_reached("Asked for the %zu'th set element when < %zu are "
+	assert_not_reached(
+	    "Asked for the %zu'th set element when < %zu are "
 	    "set",
 	    n, n);
 	/* Just to silence a compiler warning. */
@@ -118,20 +119,19 @@ get_nth_set(bool elems_owned[STRESS_TEST_ELEMS], size_t n) {
 static void *
 stress_test_thd(void *arg) {
 	stress_test_data_t *data = arg;
-	size_t prng = atomic_fetch_add_u32(&data->thread_id, 1,
-	    ATOMIC_RELAXED);
+	size_t prng = atomic_fetch_add_u32(&data->thread_id, 1, ATOMIC_RELAXED);
 
 	size_t nelems_owned = 0;
-	bool elems_owned[STRESS_TEST_ELEMS] = {0};
+	bool   elems_owned[STRESS_TEST_ELEMS] = {0};
 	size_t local_push_count[STRESS_TEST_ELEMS] = {0};
 	size_t local_pop_count[STRESS_TEST_ELEMS] = {0};
 
 	for (int i = 0; i < STRESS_TEST_OPS; i++) {
-		size_t rnd = prng_range_zu(&prng,
-		    STRESS_TEST_PUSH_TO_POP_RATIO);
+		size_t rnd = prng_range_zu(
+		    &prng, STRESS_TEST_PUSH_TO_POP_RATIO);
 		if (rnd == 0 || nelems_owned == 0) {
-			size_t nelems = batcher_pop_begin(TSDN_NULL,
-			    &data->batcher);
+			size_t nelems = batcher_pop_begin(
+			    TSDN_NULL, &data->batcher);
 			if (nelems == BATCHER_NO_IDX) {
 				continue;
 			}
@@ -147,19 +147,18 @@ stress_test_thd(void *arg) {
 			}
 			batcher_pop_end(TSDN_NULL, &data->batcher);
 		} else {
-			size_t elem_to_push_idx = prng_range_zu(&prng,
-			    nelems_owned);
-			size_t elem = get_nth_set(elems_owned,
-			    elem_to_push_idx);
-			assert_true(
-			    elems_owned[elem],
+			size_t elem_to_push_idx = prng_range_zu(
+			    &prng, nelems_owned);
+			size_t elem = get_nth_set(
+			    elems_owned, elem_to_push_idx);
+			assert_true(elems_owned[elem],
 			    "Should own element we're about to pop");
 			elems_owned[elem] = false;
 			local_push_count[elem]++;
 			data->push_count[elem]++;
 			nelems_owned--;
-			size_t idx = batcher_push_begin(TSDN_NULL,
-			    &data->batcher, 1);
+			size_t idx = batcher_push_begin(
+			    TSDN_NULL, &data->batcher, 1);
 			assert_zu_ne(idx, BATCHER_NO_IDX,
 			    "Batcher can't be full -- we have one of its "
 			    "elems!");
@@ -171,10 +170,10 @@ stress_test_thd(void *arg) {
 	/* Push all local elems back, flush local counts to the shared ones. */
 	size_t push_idx = 0;
 	if (nelems_owned != 0) {
-		push_idx = batcher_push_begin(TSDN_NULL, &data->batcher,
-		    nelems_owned);
-		assert_zu_ne(BATCHER_NO_IDX, push_idx,
-		    "Should be space to push");
+		push_idx = batcher_push_begin(
+		    TSDN_NULL, &data->batcher, nelems_owned);
+		assert_zu_ne(
+		    BATCHER_NO_IDX, push_idx, "Should be space to push");
 	}
 	for (size_t i = 0; i < STRESS_TEST_ELEMS; i++) {
 		if (elems_owned[i]) {
@@ -183,12 +182,10 @@ stress_test_thd(void *arg) {
 			local_push_count[i]++;
 			data->push_count[i]++;
 		}
-		atomic_fetch_add_zu(
-		    &data->atomic_push_count[i], local_push_count[i],
-		    ATOMIC_RELAXED);
-		atomic_fetch_add_zu(
-		    &data->atomic_pop_count[i], local_pop_count[i],
-		    ATOMIC_RELAXED);
+		atomic_fetch_add_zu(&data->atomic_push_count[i],
+		    local_push_count[i], ATOMIC_RELAXED);
+		atomic_fetch_add_zu(&data->atomic_pop_count[i],
+		    local_pop_count[i], ATOMIC_RELAXED);
 	}
 	if (nelems_owned != 0) {
 		batcher_push_end(TSDN_NULL, &data->batcher);
@@ -223,8 +220,8 @@ TEST_BEGIN(test_stress) {
 		thd_join(threads[i], NULL);
 	}
 	for (int i = 0; i < STRESS_TEST_ELEMS; i++) {
-		assert_zu_ne(0, data.push_count[i],
-		    "Should have done something!");
+		assert_zu_ne(
+		    0, data.push_count[i], "Should have done something!");
 		assert_zu_eq(data.push_count[i], data.pop_count[i],
 		    "every element should be pushed and popped an equal number "
 		    "of times");
diff --git a/test/unit/bin_batching.c b/test/unit/bin_batching.c
index a20062fd..a422586d 100644
--- a/test/unit/bin_batching.c
+++ b/test/unit/bin_batching.c
@@ -9,10 +9,10 @@ enum {
 
 typedef struct stress_thread_data_s stress_thread_data_t;
 struct stress_thread_data_s {
-	unsigned thd_id;
+	unsigned     thd_id;
 	atomic_zu_t *ready_thds;
 	atomic_zu_t *done_thds;
-	void **to_dalloc;
+	void       **to_dalloc;
 };
 
 static atomic_zu_t push_failure_count;
@@ -68,19 +68,19 @@ increment_pop_attempt(size_t elems_to_pop) {
 static void
 increment_slab_dalloc_count(unsigned slab_dalloc_count, bool list_empty) {
 	if (slab_dalloc_count > 0) {
-		atomic_fetch_add_zu(&dalloc_nonzero_slab_count, 1,
-		    ATOMIC_RELAXED);
+		atomic_fetch_add_zu(
+		    &dalloc_nonzero_slab_count, 1, ATOMIC_RELAXED);
 	} else {
-		atomic_fetch_add_zu(&dalloc_zero_slab_count, 1,
-		    ATOMIC_RELAXED);
+		atomic_fetch_add_zu(&dalloc_zero_slab_count, 1, ATOMIC_RELAXED);
 	}
 	if (!list_empty) {
-		atomic_fetch_add_zu(&dalloc_nonempty_list_count, 1,
-		    ATOMIC_RELAXED);
+		atomic_fetch_add_zu(
+		    &dalloc_nonempty_list_count, 1, ATOMIC_RELAXED);
 	}
 }
 
-static void flush_tcache() {
+static void
+flush_tcache() {
 	assert_d_eq(0, mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
 	    "Unexpected mallctl failure");
 }
@@ -88,7 +88,7 @@ static void flush_tcache() {
 static void *
 stress_thread(void *arg) {
 	stress_thread_data_t *data = arg;
-	uint64_t prng_state = data->thd_id;
+	uint64_t              prng_state = data->thd_id;
 	atomic_fetch_add_zu(data->ready_thds, 1, ATOMIC_RELAXED);
 	while (atomic_load_zu(data->ready_thds, ATOMIC_RELAXED)
 	    != STRESS_THREADS) {
@@ -99,7 +99,6 @@ stress_thread(void *arg) {
 		if (prng_range_u64(&prng_state, 3) == 0) {
 			flush_tcache();
 		}
-
 	}
 	flush_tcache();
 	atomic_fetch_add_zu(data->done_thds, 1, ATOMIC_RELAXED);
@@ -125,9 +124,9 @@ stress_run(void (*main_thread_fn)(), int nruns) {
 	atomic_store_zu(&dalloc_nonempty_list_count, 0, ATOMIC_RELAXED);
 
 	for (int run = 0; run < nruns; run++) {
-		thd_t thds[STRESS_THREADS];
+		thd_t                thds[STRESS_THREADS];
 		stress_thread_data_t thd_datas[STRESS_THREADS];
-		atomic_zu_t ready_thds;
+		atomic_zu_t          ready_thds;
 		atomic_store_zu(&ready_thds, 0, ATOMIC_RELAXED);
 		atomic_zu_t done_thds;
 		atomic_store_zu(&done_thds, 0, ATOMIC_RELAXED);
@@ -164,7 +163,7 @@ stress_run(void (*main_thread_fn)(), int nruns) {
 
 static void
 do_allocs_frees() {
-	enum {NALLOCS = 32};
+	enum { NALLOCS = 32 };
 	flush_tcache();
 	void *ptrs[NALLOCS];
 	for (int i = 0; i < NALLOCS; i++) {
@@ -182,7 +181,7 @@ test_arena_reset_main_fn() {
 }
 
 TEST_BEGIN(test_arena_reset) {
-	int err;
+	int      err;
 	unsigned arena;
 	unsigned old_arena;
 
@@ -256,17 +255,16 @@ TEST_BEGIN(test_races) {
 	    "Should have seen some pop successes");
 	assert_zu_lt(0, atomic_load_zu(&dalloc_zero_slab_count, ATOMIC_RELAXED),
 	    "Expected some frees that didn't empty a slab");
-	assert_zu_lt(0, atomic_load_zu(&dalloc_nonzero_slab_count,
-	    ATOMIC_RELAXED), "expected some frees that emptied a slab");
-	assert_zu_lt(0, atomic_load_zu(&dalloc_nonempty_list_count,
-	    ATOMIC_RELAXED), "expected some frees that used the empty list");
+	assert_zu_lt(0,
+	    atomic_load_zu(&dalloc_nonzero_slab_count, ATOMIC_RELAXED),
+	    "expected some frees that emptied a slab");
+	assert_zu_lt(0,
+	    atomic_load_zu(&dalloc_nonempty_list_count, ATOMIC_RELAXED),
+	    "expected some frees that used the empty list");
 }
 TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_arena_reset,
-	    test_races,
-	    test_fork);
+	return test_no_reentrancy(test_arena_reset, test_races, test_fork);
 }
diff --git a/test/unit/binshard.c b/test/unit/binshard.c
index 040ea54d..c3e1c2d6 100644
--- a/test/unit/binshard.c
+++ b/test/unit/binshard.c
@@ -7,9 +7,9 @@
 
 static void *
 thd_producer(void *varg) {
-	void **mem = varg;
+	void   **mem = varg;
 	unsigned arena, i;
-	size_t sz;
+	size_t   sz;
 
 	sz = sizeof(arena);
 	/* Remote arena. */
@@ -28,8 +28,8 @@ thd_producer(void *varg) {
 }
 
 TEST_BEGIN(test_producer_consumer) {
-	thd_t thds[NTHREADS];
-	void *mem[NTHREADS][REMOTE_NALLOC];
+	thd_t    thds[NTHREADS];
+	void    *mem[NTHREADS][REMOTE_NALLOC];
 	unsigned i;
 
 	/* Create producer threads to allocate. */
@@ -42,8 +42,8 @@ TEST_BEGIN(test_producer_consumer) {
 	/* Remote deallocation by the current thread. */
 	for (i = 0; i < NTHREADS; i++) {
 		for (unsigned j = 0; j < REMOTE_NALLOC; j++) {
-			expect_ptr_not_null(mem[i][j],
-			    "Unexpected remote allocation failure");
+			expect_ptr_not_null(
+			    mem[i][j], "Unexpected remote allocation failure");
 			dallocx(mem[i][j], 0);
 		}
 	}
@@ -52,7 +52,7 @@ TEST_END
 
 static void *
 thd_start(void *varg) {
-	void *ptr, *ptr2;
+	void    *ptr, *ptr2;
 	edata_t *edata;
 	unsigned shard1, shard2;
 
@@ -82,10 +82,10 @@ thd_start(void *varg) {
 }
 
 TEST_BEGIN(test_bin_shard_mt) {
-	test_skip_if(have_percpu_arena &&
-	    PERCPU_ARENA_ENABLED(opt_percpu_arena));
+	test_skip_if(
+	    have_percpu_arena && PERCPU_ARENA_ENABLED(opt_percpu_arena));
 
-	thd_t thds[NTHREADS];
+	thd_t    thds[NTHREADS];
 	unsigned i;
 	for (i = 0; i < NTHREADS; i++) {
 		thd_create(&thds[i], thd_start, NULL);
@@ -104,8 +104,8 @@ TEST_END
 
 TEST_BEGIN(test_bin_shard) {
 	unsigned nbins, i;
-	size_t mib[4], mib2[4];
-	size_t miblen, miblen2, len;
+	size_t   mib[4], mib2[4];
+	size_t   miblen, miblen2, len;
 
 	len = sizeof(nbins);
 	expect_d_eq(mallctl("arenas.nbins", (void *)&nbins, &len, NULL, 0), 0,
@@ -120,17 +120,19 @@ TEST_BEGIN(test_bin_shard) {
 
 	for (i = 0; i < nbins; i++) {
 		uint32_t nshards;
-		size_t size, sz1, sz2;
+		size_t   size, sz1, sz2;
 
 		mib[2] = i;
 		sz1 = sizeof(nshards);
-		expect_d_eq(mallctlbymib(mib, miblen, (void *)&nshards, &sz1,
-		    NULL, 0), 0, "Unexpected mallctlbymib() failure");
+		expect_d_eq(
+		    mallctlbymib(mib, miblen, (void *)&nshards, &sz1, NULL, 0),
+		    0, "Unexpected mallctlbymib() failure");
 
 		mib2[2] = i;
 		sz2 = sizeof(size);
-		expect_d_eq(mallctlbymib(mib2, miblen2, (void *)&size, &sz2,
-		    NULL, 0), 0, "Unexpected mallctlbymib() failure");
+		expect_d_eq(
+		    mallctlbymib(mib2, miblen2, (void *)&size, &sz2, NULL, 0),
+		    0, "Unexpected mallctlbymib() failure");
 
 		if (size >= 1 && size <= 128) {
 			expect_u_eq(nshards, 16, "Unexpected nshards");
@@ -148,7 +150,5 @@ TEST_END
 int
 main(void) {
 	return test_no_reentrancy(
-	    test_bin_shard,
-	    test_bin_shard_mt,
-	    test_producer_consumer);
+	    test_bin_shard, test_bin_shard_mt, test_producer_consumer);
 }
diff --git a/test/unit/bit_util.c b/test/unit/bit_util.c
index 4e9d2e16..986562d1 100644
--- a/test/unit/bit_util.c
+++ b/test/unit/bit_util.c
@@ -2,36 +2,37 @@
 
 #include "jemalloc/internal/bit_util.h"
 
-#define TEST_POW2_CEIL(t, suf, pri) do {				\
-	unsigned i, pow2;						\
-	t x;								\
-									\
-	expect_##suf##_eq(pow2_ceil_##suf(0), 0, "Unexpected result");	\
-									\
-	for (i = 0; i < sizeof(t) * 8; i++) {				\
-		expect_##suf##_eq(pow2_ceil_##suf(((t)1) << i), ((t)1)	\
-		    << i, "Unexpected result");				\
-	}								\
-									\
-	for (i = 2; i < sizeof(t) * 8; i++) {				\
-		expect_##suf##_eq(pow2_ceil_##suf((((t)1) << i) - 1),	\
-		    ((t)1) << i, "Unexpected result");			\
-	}								\
-									\
-	for (i = 0; i < sizeof(t) * 8 - 1; i++) {			\
-		expect_##suf##_eq(pow2_ceil_##suf((((t)1) << i) + 1),	\
-		    ((t)1) << (i+1), "Unexpected result");		\
-	}								\
-									\
-	for (pow2 = 1; pow2 < 25; pow2++) {				\
-		for (x = (((t)1) << (pow2-1)) + 1; x <= ((t)1) << pow2;	\
-		    x++) {						\
-			expect_##suf##_eq(pow2_ceil_##suf(x),		\
-			    ((t)1) << pow2,				\
-			    "Unexpected result, x=%"pri, x);		\
-		}							\
-	}								\
-} while (0)
+#define TEST_POW2_CEIL(t, suf, pri)                                            \
+	do {                                                                   \
+		unsigned i, pow2;                                              \
+		t        x;                                                    \
+                                                                               \
+		expect_##suf##_eq(pow2_ceil_##suf(0), 0, "Unexpected result"); \
+                                                                               \
+		for (i = 0; i < sizeof(t) * 8; i++) {                          \
+			expect_##suf##_eq(pow2_ceil_##suf(((t)1) << i),        \
+			    ((t)1) << i, "Unexpected result");                 \
+		}                                                              \
+                                                                               \
+		for (i = 2; i < sizeof(t) * 8; i++) {                          \
+			expect_##suf##_eq(pow2_ceil_##suf((((t)1) << i) - 1),  \
+			    ((t)1) << i, "Unexpected result");                 \
+		}                                                              \
+                                                                               \
+		for (i = 0; i < sizeof(t) * 8 - 1; i++) {                      \
+			expect_##suf##_eq(pow2_ceil_##suf((((t)1) << i) + 1),  \
+			    ((t)1) << (i + 1), "Unexpected result");           \
+		}                                                              \
+                                                                               \
+		for (pow2 = 1; pow2 < 25; pow2++) {                            \
+			for (x = (((t)1) << (pow2 - 1)) + 1;                   \
+			     x <= ((t)1) << pow2; x++) {                       \
+				expect_##suf##_eq(pow2_ceil_##suf(x),          \
+				    ((t)1) << pow2,                            \
+				    "Unexpected result, x=%" pri, x);          \
+			}                                                      \
+		}                                                              \
+	} while (0)
 
 TEST_BEGIN(test_pow2_ceil_u64) {
 	TEST_POW2_CEIL(uint64_t, u64, FMTu64);
@@ -54,10 +55,10 @@ expect_lg_ceil_range(size_t input, unsigned answer) {
 		expect_u_eq(0, answer, "Got %u as lg_ceil of 1", answer);
 		return;
 	}
-	expect_zu_le(input, (ZU(1) << answer),
-	    "Got %u as lg_ceil of %zu", answer, input);
-	expect_zu_gt(input, (ZU(1) << (answer - 1)),
-	    "Got %u as lg_ceil of %zu", answer, input);
+	expect_zu_le(input, (ZU(1) << answer), "Got %u as lg_ceil of %zu",
+	    answer, input);
+	expect_zu_gt(input, (ZU(1) << (answer - 1)), "Got %u as lg_ceil of %zu",
+	    answer, input);
 }
 
 static void
@@ -66,8 +67,8 @@ expect_lg_floor_range(size_t input, unsigned answer) {
 		expect_u_eq(0, answer, "Got %u as lg_floor of 1", answer);
 		return;
 	}
-	expect_zu_ge(input, (ZU(1) << answer),
-	    "Got %u as lg_floor of %zu", answer, input);
+	expect_zu_ge(input, (ZU(1) << answer), "Got %u as lg_floor of %zu",
+	    answer, input);
 	expect_zu_lt(input, (ZU(1) << (answer + 1)),
 	    "Got %u as lg_floor of %zu", answer, input);
 }
@@ -101,22 +102,24 @@ TEST_BEGIN(test_lg_ceil_floor) {
 }
 TEST_END
 
-#define TEST_FFS(t, suf, test_suf, pri) do {				\
-	for (unsigned i = 0; i < sizeof(t) * 8; i++) {			\
-		for (unsigned j = 0; j <= i; j++) {			\
-			for (unsigned k = 0; k <= j; k++) {		\
-				t x = (t)1 << i;			\
-				x |= (t)1 << j;				\
-				x |= (t)1 << k;				\
-				expect_##test_suf##_eq(ffs_##suf(x), k,	\
-				    "Unexpected result, x=%"pri, x);	\
-			}						\
-		}							\
-	}								\
-} while(0)
+#define TEST_FFS(t, suf, test_suf, pri)                                        \
+	do {                                                                   \
+		for (unsigned i = 0; i < sizeof(t) * 8; i++) {                 \
+			for (unsigned j = 0; j <= i; j++) {                    \
+				for (unsigned k = 0; k <= j; k++) {            \
+					t x = (t)1 << i;                       \
+					x |= (t)1 << j;                        \
+					x |= (t)1 << k;                        \
+					expect_##test_suf##_eq(ffs_##suf(x),   \
+					    k, "Unexpected result, x=%" pri,   \
+					    x);                                \
+				}                                              \
+			}                                                      \
+		}                                                              \
+	} while (0)
 
 TEST_BEGIN(test_ffs_u) {
-	TEST_FFS(unsigned, u, u,"u");
+	TEST_FFS(unsigned, u, u, "u");
 }
 TEST_END
 
@@ -145,22 +148,24 @@ TEST_BEGIN(test_ffs_zu) {
 }
 TEST_END
 
-#define TEST_FLS(t, suf, test_suf, pri) do {				\
-	for (unsigned i = 0; i < sizeof(t) * 8; i++) {			\
-		for (unsigned j = 0; j <= i; j++) {			\
-			for (unsigned k = 0; k <= j; k++) {		\
-				t x = (t)1 << i;			\
-				x |= (t)1 << j;				\
-				x |= (t)1 << k;				\
-				expect_##test_suf##_eq(fls_##suf(x), i,	\
-				    "Unexpected result, x=%"pri, x);	\
-			}						\
-		}							\
-	}								\
-} while(0)
+#define TEST_FLS(t, suf, test_suf, pri)                                        \
+	do {                                                                   \
+		for (unsigned i = 0; i < sizeof(t) * 8; i++) {                 \
+			for (unsigned j = 0; j <= i; j++) {                    \
+				for (unsigned k = 0; k <= j; k++) {            \
+					t x = (t)1 << i;                       \
+					x |= (t)1 << j;                        \
+					x |= (t)1 << k;                        \
+					expect_##test_suf##_eq(fls_##suf(x),   \
+					    i, "Unexpected result, x=%" pri,   \
+					    x);                                \
+				}                                              \
+			}                                                      \
+		}                                                              \
+	} while (0)
 
 TEST_BEGIN(test_fls_u) {
-	TEST_FLS(unsigned, u, u,"u");
+	TEST_FLS(unsigned, u, u, "u");
 }
 TEST_END
 
@@ -190,7 +195,7 @@ TEST_BEGIN(test_fls_zu) {
 TEST_END
 
 TEST_BEGIN(test_fls_u_slow) {
-	TEST_FLS(unsigned, u_slow, u,"u");
+	TEST_FLS(unsigned, u_slow, u, "u");
 }
 TEST_END
 
@@ -280,30 +285,11 @@ TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_pow2_ceil_u64,
-	    test_pow2_ceil_u32,
-	    test_pow2_ceil_zu,
-	    test_lg_ceil_floor,
-	    test_ffs_u,
-	    test_ffs_lu,
-	    test_ffs_llu,
-	    test_ffs_u32,
-	    test_ffs_u64,
-	    test_ffs_zu,
-	    test_fls_u,
-	    test_fls_lu,
-	    test_fls_llu,
-	    test_fls_u32,
-	    test_fls_u64,
-	    test_fls_zu,
-	    test_fls_u_slow,
-	    test_fls_lu_slow,
-	    test_fls_llu_slow,
-	    test_popcount_u,
-	    test_popcount_u_slow,
-	    test_popcount_lu,
-	    test_popcount_lu_slow,
-	    test_popcount_llu,
-	    test_popcount_llu_slow);
+	return test_no_reentrancy(test_pow2_ceil_u64, test_pow2_ceil_u32,
+	    test_pow2_ceil_zu, test_lg_ceil_floor, test_ffs_u, test_ffs_lu,
+	    test_ffs_llu, test_ffs_u32, test_ffs_u64, test_ffs_zu, test_fls_u,
+	    test_fls_lu, test_fls_llu, test_fls_u32, test_fls_u64, test_fls_zu,
+	    test_fls_u_slow, test_fls_lu_slow, test_fls_llu_slow,
+	    test_popcount_u, test_popcount_u_slow, test_popcount_lu,
+	    test_popcount_lu_slow, test_popcount_llu, test_popcount_llu_slow);
 }
diff --git a/test/unit/bitmap.c b/test/unit/bitmap.c
index 78e542b6..b3048cf3 100644
--- a/test/unit/bitmap.c
+++ b/test/unit/bitmap.c
@@ -9,14 +9,17 @@ test_bitmap_initializer_body(const bitmap_info_t *binfo, size_t nbits) {
 
 	expect_zu_eq(bitmap_size(binfo), bitmap_size(&binfo_dyn),
 	    "Unexpected difference between static and dynamic initialization, "
-	    "nbits=%zu", nbits);
+	    "nbits=%zu",
+	    nbits);
 	expect_zu_eq(binfo->nbits, binfo_dyn.nbits,
 	    "Unexpected difference between static and dynamic initialization, "
-	    "nbits=%zu", nbits);
+	    "nbits=%zu",
+	    nbits);
 #ifdef BITMAP_USE_TREE
 	expect_u_eq(binfo->nlevels, binfo_dyn.nlevels,
 	    "Unexpected difference between static and dynamic initialization, "
-	    "nbits=%zu", nbits);
+	    "nbits=%zu",
+	    nbits);
 	{
 		unsigned i;
 
@@ -24,7 +27,8 @@ test_bitmap_initializer_body(const bitmap_info_t *binfo, size_t nbits) {
 			expect_zu_eq(binfo->levels[i].group_offset,
 			    binfo_dyn.levels[i].group_offset,
 			    "Unexpected difference between static and dynamic "
-			    "initialization, nbits=%zu, level=%u", nbits, i);
+			    "initialization, nbits=%zu, level=%u",
+			    nbits, i);
 		}
 	}
 #else
@@ -34,12 +38,12 @@ test_bitmap_initializer_body(const bitmap_info_t *binfo, size_t nbits) {
 }
 
 TEST_BEGIN(test_bitmap_initializer) {
-#define NB(nbits) {							\
-		if (nbits <= BITMAP_MAXBITS) {				\
-			bitmap_info_t binfo =				\
-			    BITMAP_INFO_INITIALIZER(nbits);		\
-			test_bitmap_initializer_body(&binfo, nbits);	\
-		}							\
+#define NB(nbits)                                                              \
+	{                                                                      \
+		if (nbits <= BITMAP_MAXBITS) {                                 \
+			bitmap_info_t binfo = BITMAP_INFO_INITIALIZER(nbits);  \
+			test_bitmap_initializer_body(&binfo, nbits);           \
+		}                                                              \
 	}
 	NBITS_TAB
 #undef NB
@@ -47,11 +51,11 @@ TEST_BEGIN(test_bitmap_initializer) {
 TEST_END
 
 static size_t
-test_bitmap_size_body(const bitmap_info_t *binfo, size_t nbits,
-    size_t prev_size) {
+test_bitmap_size_body(
+    const bitmap_info_t *binfo, size_t nbits, size_t prev_size) {
 	size_t size = bitmap_size(binfo);
-	expect_zu_ge(size, (nbits >> 3),
-	    "Bitmap size is smaller than expected");
+	expect_zu_ge(
+	    size, (nbits >> 3), "Bitmap size is smaller than expected");
 	expect_zu_ge(size, prev_size, "Bitmap size is smaller than expected");
 	return size;
 }
@@ -65,10 +69,10 @@ TEST_BEGIN(test_bitmap_size) {
 		bitmap_info_init(&binfo, nbits);
 		prev_size = test_bitmap_size_body(&binfo, nbits, prev_size);
 	}
-#define NB(nbits) {							\
-		bitmap_info_t binfo = BITMAP_INFO_INITIALIZER(nbits);	\
-		prev_size = test_bitmap_size_body(&binfo, nbits,	\
-		    prev_size);						\
+#define NB(nbits)                                                              \
+	{                                                                      \
+		bitmap_info_t binfo = BITMAP_INFO_INITIALIZER(nbits);          \
+		prev_size = test_bitmap_size_body(&binfo, nbits, prev_size);   \
 	}
 	prev_size = 0;
 	NBITS_TAB
@@ -78,14 +82,14 @@ TEST_END
 
 static void
 test_bitmap_init_body(const bitmap_info_t *binfo, size_t nbits) {
-	size_t i;
+	size_t    i;
 	bitmap_t *bitmap = (bitmap_t *)malloc(bitmap_size(binfo));
 	expect_ptr_not_null(bitmap, "Unexpected malloc() failure");
 
 	bitmap_init(bitmap, binfo, false);
 	for (i = 0; i < nbits; i++) {
-		expect_false(bitmap_get(bitmap, binfo, i),
-		    "Bit should be unset");
+		expect_false(
+		    bitmap_get(bitmap, binfo, i), "Bit should be unset");
 	}
 
 	bitmap_init(bitmap, binfo, true);
@@ -104,9 +108,10 @@ TEST_BEGIN(test_bitmap_init) {
 		bitmap_info_init(&binfo, nbits);
 		test_bitmap_init_body(&binfo, nbits);
 	}
-#define NB(nbits) {							\
-		bitmap_info_t binfo = BITMAP_INFO_INITIALIZER(nbits);	\
-		test_bitmap_init_body(&binfo, nbits);			\
+#define NB(nbits)                                                              \
+	{                                                                      \
+		bitmap_info_t binfo = BITMAP_INFO_INITIALIZER(nbits);          \
+		test_bitmap_init_body(&binfo, nbits);                          \
 	}
 	NBITS_TAB
 #undef NB
@@ -115,7 +120,7 @@ TEST_END
 
 static void
 test_bitmap_set_body(const bitmap_info_t *binfo, size_t nbits) {
-	size_t i;
+	size_t    i;
 	bitmap_t *bitmap = (bitmap_t *)malloc(bitmap_size(binfo));
 	expect_ptr_not_null(bitmap, "Unexpected malloc() failure");
 	bitmap_init(bitmap, binfo, false);
@@ -135,9 +140,10 @@ TEST_BEGIN(test_bitmap_set) {
 		bitmap_info_init(&binfo, nbits);
 		test_bitmap_set_body(&binfo, nbits);
 	}
-#define NB(nbits) {							\
-		bitmap_info_t binfo = BITMAP_INFO_INITIALIZER(nbits);	\
-		test_bitmap_set_body(&binfo, nbits);			\
+#define NB(nbits)                                                              \
+	{                                                                      \
+		bitmap_info_t binfo = BITMAP_INFO_INITIALIZER(nbits);          \
+		test_bitmap_set_body(&binfo, nbits);                           \
 	}
 	NBITS_TAB
 #undef NB
@@ -146,7 +152,7 @@ TEST_END
 
 static void
 test_bitmap_unset_body(const bitmap_info_t *binfo, size_t nbits) {
-	size_t i;
+	size_t    i;
 	bitmap_t *bitmap = (bitmap_t *)malloc(bitmap_size(binfo));
 	expect_ptr_not_null(bitmap, "Unexpected malloc() failure");
 	bitmap_init(bitmap, binfo, false);
@@ -173,9 +179,10 @@ TEST_BEGIN(test_bitmap_unset) {
 		bitmap_info_init(&binfo, nbits);
 		test_bitmap_unset_body(&binfo, nbits);
 	}
-#define NB(nbits) {							\
-		bitmap_info_t binfo = BITMAP_INFO_INITIALIZER(nbits);	\
-		test_bitmap_unset_body(&binfo, nbits);			\
+#define NB(nbits)                                                              \
+	{                                                                      \
+		bitmap_info_t binfo = BITMAP_INFO_INITIALIZER(nbits);          \
+		test_bitmap_unset_body(&binfo, nbits);                         \
 	}
 	NBITS_TAB
 #undef NB
@@ -193,7 +200,7 @@ test_bitmap_xfu_body(const bitmap_info_t *binfo, size_t nbits) {
 		expect_zu_eq(bitmap_ffu(bitmap, binfo, 0), i,
 		    "First unset bit should be just after previous first unset "
 		    "bit");
-		expect_zu_eq(bitmap_ffu(bitmap, binfo, (i > 0) ? i-1 : i), i,
+		expect_zu_eq(bitmap_ffu(bitmap, binfo, (i > 0) ? i - 1 : i), i,
 		    "First unset bit should be just after previous first unset "
 		    "bit");
 		expect_zu_eq(bitmap_ffu(bitmap, binfo, i), i,
@@ -213,7 +220,7 @@ test_bitmap_xfu_body(const bitmap_info_t *binfo, size_t nbits) {
 		bitmap_unset(bitmap, binfo, i);
 		expect_zu_eq(bitmap_ffu(bitmap, binfo, 0), i,
 		    "First unset bit should the bit previously unset");
-		expect_zu_eq(bitmap_ffu(bitmap, binfo, (i > 0) ? i-1 : i), i,
+		expect_zu_eq(bitmap_ffu(bitmap, binfo, (i > 0) ? i - 1 : i), i,
 		    "First unset bit should the bit previously unset");
 		expect_zu_eq(bitmap_ffu(bitmap, binfo, i), i,
 		    "First unset bit should the bit previously unset");
@@ -232,7 +239,7 @@ test_bitmap_xfu_body(const bitmap_info_t *binfo, size_t nbits) {
 		expect_zu_eq(bitmap_ffu(bitmap, binfo, 0), i,
 		    "First unset bit should be just after the bit previously "
 		    "set");
-		expect_zu_eq(bitmap_ffu(bitmap, binfo, (i > 0) ? i-1 : i), i,
+		expect_zu_eq(bitmap_ffu(bitmap, binfo, (i > 0) ? i - 1 : i), i,
 		    "First unset bit should be just after the bit previously "
 		    "set");
 		expect_zu_eq(bitmap_ffu(bitmap, binfo, i), i,
@@ -245,7 +252,8 @@ test_bitmap_xfu_body(const bitmap_info_t *binfo, size_t nbits) {
 	}
 	expect_zu_eq(bitmap_ffu(bitmap, binfo, 0), nbits - 1,
 	    "First unset bit should be the last bit");
-	expect_zu_eq(bitmap_ffu(bitmap, binfo, (nbits > 1) ? nbits-2 : nbits-1),
+	expect_zu_eq(
+	    bitmap_ffu(bitmap, binfo, (nbits > 1) ? nbits - 2 : nbits - 1),
 	    nbits - 1, "First unset bit should be the last bit");
 	expect_zu_eq(bitmap_ffu(bitmap, binfo, nbits - 1), nbits - 1,
 	    "First unset bit should be the last bit");
@@ -258,26 +266,26 @@ test_bitmap_xfu_body(const bitmap_info_t *binfo, size_t nbits) {
 	 * bitmap_ffu() finds the correct bit for all five min_bit cases.
 	 */
 	if (nbits >= 3) {
-		for (size_t i = 0; i < nbits-2; i++) {
+		for (size_t i = 0; i < nbits - 2; i++) {
 			bitmap_unset(bitmap, binfo, i);
-			bitmap_unset(bitmap, binfo, i+2);
+			bitmap_unset(bitmap, binfo, i + 2);
 			if (i > 0) {
-				expect_zu_eq(bitmap_ffu(bitmap, binfo, i-1), i,
-				    "Unexpected first unset bit");
+				expect_zu_eq(bitmap_ffu(bitmap, binfo, i - 1),
+				    i, "Unexpected first unset bit");
 			}
 			expect_zu_eq(bitmap_ffu(bitmap, binfo, i), i,
 			    "Unexpected first unset bit");
-			expect_zu_eq(bitmap_ffu(bitmap, binfo, i+1), i+2,
+			expect_zu_eq(bitmap_ffu(bitmap, binfo, i + 1), i + 2,
 			    "Unexpected first unset bit");
-			expect_zu_eq(bitmap_ffu(bitmap, binfo, i+2), i+2,
+			expect_zu_eq(bitmap_ffu(bitmap, binfo, i + 2), i + 2,
 			    "Unexpected first unset bit");
 			if (i + 3 < nbits) {
-				expect_zu_eq(bitmap_ffu(bitmap, binfo, i+3),
+				expect_zu_eq(bitmap_ffu(bitmap, binfo, i + 3),
 				    nbits, "Unexpected first unset bit");
 			}
 			expect_zu_eq(bitmap_sfu(bitmap, binfo), i,
 			    "Unexpected first unset bit");
-			expect_zu_eq(bitmap_sfu(bitmap, binfo), i+2,
+			expect_zu_eq(bitmap_sfu(bitmap, binfo), i + 2,
 			    "Unexpected first unset bit");
 		}
 	}
@@ -288,24 +296,24 @@ test_bitmap_xfu_body(const bitmap_info_t *binfo, size_t nbits) {
 	 * cases.
 	 */
 	if (nbits >= 3) {
-		bitmap_unset(bitmap, binfo, nbits-1);
-		for (size_t i = 0; i < nbits-1; i++) {
+		bitmap_unset(bitmap, binfo, nbits - 1);
+		for (size_t i = 0; i < nbits - 1; i++) {
 			bitmap_unset(bitmap, binfo, i);
 			if (i > 0) {
-				expect_zu_eq(bitmap_ffu(bitmap, binfo, i-1), i,
-				    "Unexpected first unset bit");
+				expect_zu_eq(bitmap_ffu(bitmap, binfo, i - 1),
+				    i, "Unexpected first unset bit");
 			}
 			expect_zu_eq(bitmap_ffu(bitmap, binfo, i), i,
 			    "Unexpected first unset bit");
-			expect_zu_eq(bitmap_ffu(bitmap, binfo, i+1), nbits-1,
-			    "Unexpected first unset bit");
-			expect_zu_eq(bitmap_ffu(bitmap, binfo, nbits-1),
-			    nbits-1, "Unexpected first unset bit");
+			expect_zu_eq(bitmap_ffu(bitmap, binfo, i + 1),
+			    nbits - 1, "Unexpected first unset bit");
+			expect_zu_eq(bitmap_ffu(bitmap, binfo, nbits - 1),
+			    nbits - 1, "Unexpected first unset bit");
 
 			expect_zu_eq(bitmap_sfu(bitmap, binfo), i,
 			    "Unexpected first unset bit");
 		}
-		expect_zu_eq(bitmap_sfu(bitmap, binfo), nbits-1,
+		expect_zu_eq(bitmap_sfu(bitmap, binfo), nbits - 1,
 		    "Unexpected first unset bit");
 	}
 
@@ -322,9 +330,10 @@ TEST_BEGIN(test_bitmap_xfu) {
 		bitmap_info_init(&binfo, nbits);
 		test_bitmap_xfu_body(&binfo, nbits);
 	}
-#define NB(nbits) {							\
-		bitmap_info_t binfo = BITMAP_INFO_INITIALIZER(nbits);	\
-		test_bitmap_xfu_body(&binfo, nbits);			\
+#define NB(nbits)                                                              \
+	{                                                                      \
+		bitmap_info_t binfo = BITMAP_INFO_INITIALIZER(nbits);          \
+		test_bitmap_xfu_body(&binfo, nbits);                           \
 	}
 	NBITS_TAB
 #undef NB
@@ -333,11 +342,6 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_bitmap_initializer,
-	    test_bitmap_size,
-	    test_bitmap_init,
-	    test_bitmap_set,
-	    test_bitmap_unset,
-	    test_bitmap_xfu);
+	return test(test_bitmap_initializer, test_bitmap_size, test_bitmap_init,
+	    test_bitmap_set, test_bitmap_unset, test_bitmap_xfu);
 }
diff --git a/test/unit/buf_writer.c b/test/unit/buf_writer.c
index d5e63a0e..643e430c 100644
--- a/test/unit/buf_writer.c
+++ b/test/unit/buf_writer.c
@@ -5,24 +5,24 @@
 #define TEST_BUF_SIZE 16
 #define UNIT_MAX (TEST_BUF_SIZE * 3)
 
-static size_t test_write_len;
-static char test_buf[TEST_BUF_SIZE];
+static size_t   test_write_len;
+static char     test_buf[TEST_BUF_SIZE];
 static uint64_t arg;
 static uint64_t arg_store;
 
 static void
 test_write_cb(void *cbopaque, const char *s) {
 	size_t prev_test_write_len = test_write_len;
-	test_write_len += strlen(s); /* only increase the length */
+	test_write_len += strlen(s);       /* only increase the length */
 	arg_store = *(uint64_t *)cbopaque; /* only pass along the argument */
-	assert_zu_le(prev_test_write_len, test_write_len,
-	    "Test write overflowed");
+	assert_zu_le(
+	    prev_test_write_len, test_write_len, "Test write overflowed");
 }
 
 static void
 test_buf_writer_body(tsdn_t *tsdn, buf_writer_t *buf_writer) {
-	char s[UNIT_MAX + 1];
-	size_t n_unit, remain, i;
+	char    s[UNIT_MAX + 1];
+	size_t  n_unit, remain, i;
 	ssize_t unit;
 
 	assert(buf_writer->buf != NULL);
@@ -41,7 +41,8 @@ test_buf_writer_body(tsdn_t *tsdn, buf_writer_t *buf_writer) {
 				remain += unit;
 				if (remain > buf_writer->buf_size) {
 					/* Flushes should have happened. */
-					assert_u64_eq(arg_store, arg, "Call "
+					assert_u64_eq(arg_store, arg,
+					    "Call "
 					    "back argument didn't get through");
 					remain %= buf_writer->buf_size;
 					if (remain == 0) {
@@ -51,12 +52,14 @@ test_buf_writer_body(tsdn_t *tsdn, buf_writer_t *buf_writer) {
 				}
 				assert_zu_eq(test_write_len + remain, i * unit,
 				    "Incorrect length after writing %zu strings"
-				    " of length %zu", i, unit);
+				    " of length %zu",
+				    i, unit);
 			}
 			buf_writer_flush(buf_writer);
 			expect_zu_eq(test_write_len, n_unit * unit,
 			    "Incorrect length after flushing at the end of"
-			    " writing %zu strings of length %zu", n_unit, unit);
+			    " writing %zu strings of length %zu",
+			    n_unit, unit);
 		}
 	}
 	buf_writer_terminate(tsdn, buf_writer);
@@ -64,9 +67,9 @@ test_buf_writer_body(tsdn_t *tsdn, buf_writer_t *buf_writer) {
 
 TEST_BEGIN(test_buf_write_static) {
 	buf_writer_t buf_writer;
-	tsdn_t *tsdn = tsdn_fetch();
+	tsdn_t      *tsdn = tsdn_fetch();
 	assert_false(buf_writer_init(tsdn, &buf_writer, test_write_cb, &arg,
-	    test_buf, TEST_BUF_SIZE),
+	                 test_buf, TEST_BUF_SIZE),
 	    "buf_writer_init() should not encounter error on static buffer");
 	test_buf_writer_body(tsdn, &buf_writer);
 }
@@ -74,22 +77,24 @@ TEST_END
 
 TEST_BEGIN(test_buf_write_dynamic) {
 	buf_writer_t buf_writer;
-	tsdn_t *tsdn = tsdn_fetch();
+	tsdn_t      *tsdn = tsdn_fetch();
 	assert_false(buf_writer_init(tsdn, &buf_writer, test_write_cb, &arg,
-	    NULL, TEST_BUF_SIZE), "buf_writer_init() should not OOM");
+	                 NULL, TEST_BUF_SIZE),
+	    "buf_writer_init() should not OOM");
 	test_buf_writer_body(tsdn, &buf_writer);
 }
 TEST_END
 
 TEST_BEGIN(test_buf_write_oom) {
 	buf_writer_t buf_writer;
-	tsdn_t *tsdn = tsdn_fetch();
+	tsdn_t      *tsdn = tsdn_fetch();
 	assert_true(buf_writer_init(tsdn, &buf_writer, test_write_cb, &arg,
-	    NULL, SC_LARGE_MAXCLASS + 1), "buf_writer_init() should OOM");
+	                NULL, SC_LARGE_MAXCLASS + 1),
+	    "buf_writer_init() should OOM");
 	assert(buf_writer.buf == NULL);
 
-	char s[UNIT_MAX + 1];
-	size_t n_unit, i;
+	char    s[UNIT_MAX + 1];
+	size_t  n_unit, i;
 	ssize_t unit;
 
 	memset(s, 'a', UNIT_MAX);
@@ -107,20 +112,22 @@ TEST_BEGIN(test_buf_write_oom) {
 				    "Call back argument didn't get through");
 				assert_zu_eq(test_write_len, i * unit,
 				    "Incorrect length after writing %zu strings"
-				    " of length %zu", i, unit);
+				    " of length %zu",
+				    i, unit);
 			}
 			buf_writer_flush(&buf_writer);
 			expect_zu_eq(test_write_len, n_unit * unit,
 			    "Incorrect length after flushing at the end of"
-			    " writing %zu strings of length %zu", n_unit, unit);
+			    " writing %zu strings of length %zu",
+			    n_unit, unit);
 		}
 	}
 	buf_writer_terminate(tsdn, &buf_writer);
 }
 TEST_END
 
-static int test_read_count;
-static size_t test_read_len;
+static int      test_read_count;
+static size_t   test_read_len;
 static uint64_t arg_sum;
 
 ssize_t
@@ -142,8 +149,8 @@ test_read_cb(void *cbopaque, void *buf, size_t limit) {
 		memset(buf, 'a', read_len);
 		size_t prev_test_read_len = test_read_len;
 		test_read_len += read_len;
-		assert_zu_le(prev_test_read_len, test_read_len,
-		    "Test read overflowed");
+		assert_zu_le(
+		    prev_test_read_len, test_read_len, "Test read overflowed");
 		return read_len;
 	}
 }
@@ -168,9 +175,9 @@ test_buf_writer_pipe_body(tsdn_t *tsdn, buf_writer_t *buf_writer) {
 
 TEST_BEGIN(test_buf_write_pipe) {
 	buf_writer_t buf_writer;
-	tsdn_t *tsdn = tsdn_fetch();
+	tsdn_t      *tsdn = tsdn_fetch();
 	assert_false(buf_writer_init(tsdn, &buf_writer, test_write_cb, &arg,
-	    test_buf, TEST_BUF_SIZE),
+	                 test_buf, TEST_BUF_SIZE),
 	    "buf_writer_init() should not encounter error on static buffer");
 	test_buf_writer_pipe_body(tsdn, &buf_writer);
 }
@@ -178,19 +185,16 @@ TEST_END
 
 TEST_BEGIN(test_buf_write_pipe_oom) {
 	buf_writer_t buf_writer;
-	tsdn_t *tsdn = tsdn_fetch();
+	tsdn_t      *tsdn = tsdn_fetch();
 	assert_true(buf_writer_init(tsdn, &buf_writer, test_write_cb, &arg,
-	    NULL, SC_LARGE_MAXCLASS + 1), "buf_writer_init() should OOM");
+	                NULL, SC_LARGE_MAXCLASS + 1),
+	    "buf_writer_init() should OOM");
 	test_buf_writer_pipe_body(tsdn, &buf_writer);
 }
 TEST_END
 
 int
 main(void) {
-	return test(
-	    test_buf_write_static,
-	    test_buf_write_dynamic,
-	    test_buf_write_oom,
-	    test_buf_write_pipe,
-	    test_buf_write_pipe_oom);
+	return test(test_buf_write_static, test_buf_write_dynamic,
+	    test_buf_write_oom, test_buf_write_pipe, test_buf_write_pipe_oom);
 }
diff --git a/test/unit/cache_bin.c b/test/unit/cache_bin.c
index 1bb750d7..dc1dbe36 100644
--- a/test/unit/cache_bin.c
+++ b/test/unit/cache_bin.c
@@ -3,7 +3,7 @@
 static void
 do_fill_test(cache_bin_t *bin, void **ptrs, cache_bin_sz_t ncached_max,
     cache_bin_sz_t nfill_attempt, cache_bin_sz_t nfill_succeed) {
-	bool success;
+	bool  success;
 	void *ptr;
 	assert_true(cache_bin_ncached_get_local(bin) == 0, "");
 	CACHE_BIN_PTR_ARRAY_DECLARE(arr, nfill_attempt);
@@ -12,17 +12,16 @@ do_fill_test(cache_bin_t *bin, void **ptrs, cache_bin_sz_t ncached_max,
 		arr.ptr[i] = &ptrs[i];
 	}
 	cache_bin_finish_fill(bin, &arr, nfill_succeed);
-	expect_true(cache_bin_ncached_get_local(bin) == nfill_succeed,
-	    "");
+	expect_true(cache_bin_ncached_get_local(bin) == nfill_succeed, "");
 	cache_bin_low_water_set(bin);
 
 	for (cache_bin_sz_t i = 0; i < nfill_succeed; i++) {
 		ptr = cache_bin_alloc(bin, &success);
 		expect_true(success, "");
-		expect_ptr_eq(ptr, (void *)&ptrs[i],
-		    "Should pop in order filled");
-		expect_true(cache_bin_low_water_get(bin)
-		    == nfill_succeed - i - 1, "");
+		expect_ptr_eq(
+		    ptr, (void *)&ptrs[i], "Should pop in order filled");
+		expect_true(
+		    cache_bin_low_water_get(bin) == nfill_succeed - i - 1, "");
 	}
 	expect_true(cache_bin_ncached_get_local(bin) == 0, "");
 	expect_true(cache_bin_low_water_get(bin) == 0, "");
@@ -46,16 +45,15 @@ do_flush_test(cache_bin_t *bin, void **ptrs, cache_bin_sz_t nfill,
 	}
 	cache_bin_finish_flush(bin, &arr, nflush);
 
-	expect_true(cache_bin_ncached_get_local(bin) == nfill - nflush,
-	    "");
+	expect_true(cache_bin_ncached_get_local(bin) == nfill - nflush, "");
 	while (cache_bin_ncached_get_local(bin) > 0) {
 		cache_bin_alloc(bin, &success);
 	}
 }
 
 static void
-do_batch_alloc_test(cache_bin_t *bin, void **ptrs, cache_bin_sz_t nfill,
-    size_t batch) {
+do_batch_alloc_test(
+    cache_bin_t *bin, void **ptrs, cache_bin_sz_t nfill, size_t batch) {
 	assert_true(cache_bin_ncached_get_local(bin) == 0, "");
 	CACHE_BIN_PTR_ARRAY_DECLARE(arr, nfill);
 	cache_bin_init_ptr_array_for_fill(bin, &arr, nfill);
@@ -72,8 +70,8 @@ do_batch_alloc_test(cache_bin_t *bin, void **ptrs, cache_bin_sz_t nfill,
 	for (cache_bin_sz_t i = 0; i < (cache_bin_sz_t)n; i++) {
 		expect_ptr_eq(out[i], &ptrs[i], "");
 	}
-	expect_true(cache_bin_low_water_get(bin) == nfill -
-	    (cache_bin_sz_t)n, "");
+	expect_true(
+	    cache_bin_low_water_get(bin) == nfill - (cache_bin_sz_t)n, "");
 	while (cache_bin_ncached_get_local(bin) > 0) {
 		bool success;
 		cache_bin_alloc(bin, &success);
@@ -98,8 +96,8 @@ test_bin_init(cache_bin_t *bin, cache_bin_info_t *info) {
 
 TEST_BEGIN(test_cache_bin) {
 	const int ncached_max = 100;
-	bool success;
-	void *ptr;
+	bool      success;
+	void     *ptr;
 
 	cache_bin_info_t info;
 	cache_bin_info_init(&info, ncached_max);
@@ -125,7 +123,7 @@ TEST_BEGIN(test_cache_bin) {
 	 */
 	void **ptrs = mallocx(sizeof(void *) * (ncached_max + 1), 0);
 	assert_ptr_not_null(ptrs, "Unexpected mallocx failure");
-	for  (cache_bin_sz_t i = 0; i < ncached_max; i++) {
+	for (cache_bin_sz_t i = 0; i < ncached_max; i++) {
 		expect_true(cache_bin_ncached_get_local(&bin) == i, "");
 		success = cache_bin_dalloc_easy(&bin, &ptrs[i]);
 		expect_true(success,
@@ -133,18 +131,17 @@ TEST_BEGIN(test_cache_bin) {
 		expect_true(cache_bin_low_water_get(&bin) == 0,
 		    "Pushes and pops shouldn't change low water of zero.");
 	}
-	expect_true(cache_bin_ncached_get_local(&bin) == ncached_max,
-	    "");
+	expect_true(cache_bin_ncached_get_local(&bin) == ncached_max, "");
 	success = cache_bin_dalloc_easy(&bin, &ptrs[ncached_max]);
 	expect_false(success, "Shouldn't be able to dalloc into a full bin.");
 
 	cache_bin_low_water_set(&bin);
 
 	for (cache_bin_sz_t i = 0; i < ncached_max; i++) {
-		expect_true(cache_bin_low_water_get(&bin)
-		    == ncached_max - i, "");
-		expect_true(cache_bin_ncached_get_local(&bin)
-		    == ncached_max - i, "");
+		expect_true(
+		    cache_bin_low_water_get(&bin) == ncached_max - i, "");
+		expect_true(
+		    cache_bin_ncached_get_local(&bin) == ncached_max - i, "");
 		/*
 		 * This should fail -- the easy variant can't change the low
 		 * water mark.
@@ -152,20 +149,21 @@ TEST_BEGIN(test_cache_bin) {
 		ptr = cache_bin_alloc_easy(&bin, &success);
 		expect_ptr_null(ptr, "");
 		expect_false(success, "");
-		expect_true(cache_bin_low_water_get(&bin)
-		    == ncached_max - i, "");
-		expect_true(cache_bin_ncached_get_local(&bin)
-		    == ncached_max - i, "");
+		expect_true(
+		    cache_bin_low_water_get(&bin) == ncached_max - i, "");
+		expect_true(
+		    cache_bin_ncached_get_local(&bin) == ncached_max - i, "");
 
 		/* This should succeed, though. */
 		ptr = cache_bin_alloc(&bin, &success);
 		expect_true(success, "");
 		expect_ptr_eq(ptr, &ptrs[ncached_max - i - 1],
 		    "Alloc should pop in stack order");
-		expect_true(cache_bin_low_water_get(&bin)
-		    == ncached_max - i - 1, "");
-		expect_true(cache_bin_ncached_get_local(&bin)
-		    == ncached_max - i - 1, "");
+		expect_true(
+		    cache_bin_low_water_get(&bin) == ncached_max - i - 1, "");
+		expect_true(
+		    cache_bin_ncached_get_local(&bin) == ncached_max - i - 1,
+		    "");
 	}
 	/* Now we're empty -- all alloc attempts should fail. */
 	expect_true(cache_bin_ncached_get_local(&bin) == 0, "");
@@ -184,8 +182,7 @@ TEST_BEGIN(test_cache_bin) {
 	for (cache_bin_sz_t i = ncached_max / 2; i < ncached_max; i++) {
 		cache_bin_dalloc_easy(&bin, &ptrs[i]);
 	}
-	expect_true(cache_bin_ncached_get_local(&bin) == ncached_max,
-	    "");
+	expect_true(cache_bin_ncached_get_local(&bin) == ncached_max, "");
 	for (cache_bin_sz_t i = ncached_max - 1; i >= ncached_max / 2; i--) {
 		/*
 		 * Size is bigger than low water -- the reduced version should
@@ -208,20 +205,16 @@ TEST_BEGIN(test_cache_bin) {
 
 	/* Test fill. */
 	/* Try to fill all, succeed fully. */
-	do_fill_test(&bin, ptrs, ncached_max, ncached_max,
-	    ncached_max);
+	do_fill_test(&bin, ptrs, ncached_max, ncached_max, ncached_max);
 	/* Try to fill all, succeed partially. */
-	do_fill_test(&bin, ptrs, ncached_max, ncached_max,
-	    ncached_max / 2);
+	do_fill_test(&bin, ptrs, ncached_max, ncached_max, ncached_max / 2);
 	/* Try to fill all, fail completely. */
 	do_fill_test(&bin, ptrs, ncached_max, ncached_max, 0);
 
 	/* Try to fill some, succeed fully. */
-	do_fill_test(&bin, ptrs, ncached_max, ncached_max / 2,
-	    ncached_max / 2);
+	do_fill_test(&bin, ptrs, ncached_max, ncached_max / 2, ncached_max / 2);
 	/* Try to fill some, succeed partially. */
-	do_fill_test(&bin, ptrs, ncached_max, ncached_max / 2,
-	    ncached_max / 4);
+	do_fill_test(&bin, ptrs, ncached_max, ncached_max / 2, ncached_max / 4);
 	/* Try to fill some, fail completely. */
 	do_fill_test(&bin, ptrs, ncached_max, ncached_max / 2, 0);
 
@@ -262,11 +255,10 @@ TEST_END
 static void
 do_flush_stashed_test(cache_bin_t *bin, void **ptrs, cache_bin_sz_t nfill,
     cache_bin_sz_t nstash) {
-	expect_true(cache_bin_ncached_get_local(bin) == 0,
-	    "Bin not empty");
-	expect_true(cache_bin_nstashed_get_local(bin) == 0,
-	    "Bin not empty");
-	expect_true(nfill + nstash <= bin->bin_info.ncached_max, "Exceeded max");
+	expect_true(cache_bin_ncached_get_local(bin) == 0, "Bin not empty");
+	expect_true(cache_bin_nstashed_get_local(bin) == 0, "Bin not empty");
+	expect_true(
+	    nfill + nstash <= bin->bin_info.ncached_max, "Exceeded max");
 
 	bool ret;
 	/* Fill */
@@ -274,16 +266,16 @@ do_flush_stashed_test(cache_bin_t *bin, void **ptrs, cache_bin_sz_t nfill,
 		ret = cache_bin_dalloc_easy(bin, &ptrs[i]);
 		expect_true(ret, "Unexpected fill failure");
 	}
-	expect_true(cache_bin_ncached_get_local(bin) == nfill,
-	    "Wrong cached count");
+	expect_true(
+	    cache_bin_ncached_get_local(bin) == nfill, "Wrong cached count");
 
 	/* Stash */
 	for (cache_bin_sz_t i = 0; i < nstash; i++) {
 		ret = cache_bin_stash(bin, &ptrs[i + nfill]);
 		expect_true(ret, "Unexpected stash failure");
 	}
-	expect_true(cache_bin_nstashed_get_local(bin) == nstash,
-	    "Wrong stashed count");
+	expect_true(
+	    cache_bin_nstashed_get_local(bin) == nstash, "Wrong stashed count");
 
 	if (nfill + nstash == bin->bin_info.ncached_max) {
 		ret = cache_bin_dalloc_easy(bin, &ptrs[0]);
@@ -300,20 +292,20 @@ do_flush_stashed_test(cache_bin_t *bin, void **ptrs, cache_bin_sz_t nfill,
 		expect_true((uintptr_t)ptr < (uintptr_t)&ptrs[nfill],
 		    "Should not alloc stashed ptrs");
 	}
-	expect_true(cache_bin_ncached_get_local(bin) == 0,
-	    "Wrong cached count");
-	expect_true(cache_bin_nstashed_get_local(bin) == nstash,
-	    "Wrong stashed count");
+	expect_true(
+	    cache_bin_ncached_get_local(bin) == 0, "Wrong cached count");
+	expect_true(
+	    cache_bin_nstashed_get_local(bin) == nstash, "Wrong stashed count");
 
 	cache_bin_alloc(bin, &ret);
 	expect_false(ret, "Should not alloc stashed");
 
 	/* Clear stashed ones */
 	cache_bin_finish_flush_stashed(bin);
-	expect_true(cache_bin_ncached_get_local(bin) == 0,
-	    "Wrong cached count");
-	expect_true(cache_bin_nstashed_get_local(bin) == 0,
-	    "Wrong stashed count");
+	expect_true(
+	    cache_bin_ncached_get_local(bin) == 0, "Wrong cached count");
+	expect_true(
+	    cache_bin_nstashed_get_local(bin) == 0, "Wrong stashed count");
 
 	cache_bin_alloc(bin, &ret);
 	expect_false(ret, "Should not alloc from empty bin");
@@ -322,7 +314,7 @@ do_flush_stashed_test(cache_bin_t *bin, void **ptrs, cache_bin_sz_t nfill,
 TEST_BEGIN(test_cache_bin_stash) {
 	const int ncached_max = 100;
 
-	cache_bin_t bin;
+	cache_bin_t      bin;
 	cache_bin_info_t info;
 	cache_bin_info_init(&info, ncached_max);
 	test_bin_init(&bin, &info);
@@ -335,15 +327,17 @@ TEST_BEGIN(test_cache_bin_stash) {
 	assert_ptr_not_null(ptrs, "Unexpected mallocx failure");
 	bool ret;
 	for (cache_bin_sz_t i = 0; i < ncached_max; i++) {
-		expect_true(cache_bin_ncached_get_local(&bin) ==
-		    (i / 2 + i % 2), "Wrong ncached value");
-		expect_true(cache_bin_nstashed_get_local(&bin) ==
-		    i / 2, "Wrong nstashed value");
+		expect_true(
+		    cache_bin_ncached_get_local(&bin) == (i / 2 + i % 2),
+		    "Wrong ncached value");
+		expect_true(cache_bin_nstashed_get_local(&bin) == i / 2,
+		    "Wrong nstashed value");
 		if (i % 2 == 0) {
 			cache_bin_dalloc_easy(&bin, &ptrs[i]);
 		} else {
 			ret = cache_bin_stash(&bin, &ptrs[i]);
-			expect_true(ret, "Should be able to stash into a "
+			expect_true(ret,
+			    "Should be able to stash into a "
 			    "non-full cache bin");
 		}
 	}
@@ -360,7 +354,8 @@ TEST_BEGIN(test_cache_bin_stash) {
 			expect_true(diff % 2 == 0, "Should be able to alloc");
 		} else {
 			expect_false(ret, "Should not alloc stashed");
-			expect_true(cache_bin_nstashed_get_local(&bin) == ncached_max / 2,
+			expect_true(cache_bin_nstashed_get_local(&bin)
+			        == ncached_max / 2,
 			    "Wrong nstashed value");
 		}
 	}
@@ -368,19 +363,14 @@ TEST_BEGIN(test_cache_bin_stash) {
 	test_bin_init(&bin, &info);
 	do_flush_stashed_test(&bin, ptrs, ncached_max, 0);
 	do_flush_stashed_test(&bin, ptrs, 0, ncached_max);
-	do_flush_stashed_test(&bin, ptrs, ncached_max / 2,
-	    ncached_max / 2);
-	do_flush_stashed_test(&bin, ptrs, ncached_max / 4,
-	    ncached_max / 2);
-	do_flush_stashed_test(&bin, ptrs, ncached_max / 2,
-	    ncached_max / 4);
-	do_flush_stashed_test(&bin, ptrs, ncached_max / 4,
-	    ncached_max / 4);
+	do_flush_stashed_test(&bin, ptrs, ncached_max / 2, ncached_max / 2);
+	do_flush_stashed_test(&bin, ptrs, ncached_max / 4, ncached_max / 2);
+	do_flush_stashed_test(&bin, ptrs, ncached_max / 2, ncached_max / 4);
+	do_flush_stashed_test(&bin, ptrs, ncached_max / 4, ncached_max / 4);
 }
 TEST_END
 
 int
 main(void) {
-	return test(test_cache_bin,
-		test_cache_bin_stash);
+	return test(test_cache_bin, test_cache_bin_stash);
 }
diff --git a/test/unit/ckh.c b/test/unit/ckh.c
index 36142acd..f07892ac 100644
--- a/test/unit/ckh.c
+++ b/test/unit/ckh.c
@@ -2,55 +2,51 @@
 
 TEST_BEGIN(test_new_delete) {
 	tsd_t *tsd;
-	ckh_t ckh;
+	ckh_t  ckh;
 
 	tsd = tsd_fetch();
 
-	expect_false(ckh_new(tsd, &ckh, 2, ckh_string_hash,
-	    ckh_string_keycomp), "Unexpected ckh_new() error");
+	expect_false(ckh_new(tsd, &ckh, 2, ckh_string_hash, ckh_string_keycomp),
+	    "Unexpected ckh_new() error");
 	ckh_delete(tsd, &ckh);
 
-	expect_false(ckh_new(tsd, &ckh, 3, ckh_pointer_hash,
-	    ckh_pointer_keycomp), "Unexpected ckh_new() error");
+	expect_false(
+	    ckh_new(tsd, &ckh, 3, ckh_pointer_hash, ckh_pointer_keycomp),
+	    "Unexpected ckh_new() error");
 	ckh_delete(tsd, &ckh);
 }
 TEST_END
 
 TEST_BEGIN(test_count_insert_search_remove) {
-	tsd_t *tsd;
-	ckh_t ckh;
-	const char *strs[] = {
-	    "a string",
-	    "A string",
-	    "a string.",
-	    "A string."
-	};
+	tsd_t      *tsd;
+	ckh_t       ckh;
+	const char *strs[] = {"a string", "A string", "a string.", "A string."};
 	const char *missing = "A string not in the hash table.";
-	size_t i;
+	size_t      i;
 
 	tsd = tsd_fetch();
 
-	expect_false(ckh_new(tsd, &ckh, 2, ckh_string_hash,
-	    ckh_string_keycomp), "Unexpected ckh_new() error");
+	expect_false(ckh_new(tsd, &ckh, 2, ckh_string_hash, ckh_string_keycomp),
+	    "Unexpected ckh_new() error");
 	expect_zu_eq(ckh_count(&ckh), 0,
 	    "ckh_count() should return %zu, but it returned %zu", ZU(0),
 	    ckh_count(&ckh));
 
 	/* Insert. */
-	for (i = 0; i < sizeof(strs)/sizeof(const char *); i++) {
+	for (i = 0; i < sizeof(strs) / sizeof(const char *); i++) {
 		ckh_insert(tsd, &ckh, strs[i], strs[i]);
-		expect_zu_eq(ckh_count(&ckh), i+1,
-		    "ckh_count() should return %zu, but it returned %zu", i+1,
+		expect_zu_eq(ckh_count(&ckh), i + 1,
+		    "ckh_count() should return %zu, but it returned %zu", i + 1,
 		    ckh_count(&ckh));
 	}
 
 	/* Search. */
-	for (i = 0; i < sizeof(strs)/sizeof(const char *); i++) {
+	for (i = 0; i < sizeof(strs) / sizeof(const char *); i++) {
 		union {
-			void *p;
+			void       *p;
 			const char *s;
 		} k, v;
-		void **kp, **vp;
+		void      **kp, **vp;
 		const char *ks, *vs;
 
 		kp = (i & 1) ? &k.p : NULL;
@@ -62,21 +58,21 @@ TEST_BEGIN(test_count_insert_search_remove) {
 
 		ks = (i & 1) ? strs[i] : (const char *)NULL;
 		vs = (i & 2) ? strs[i] : (const char *)NULL;
-		expect_ptr_eq((void *)ks, (void *)k.s, "Key mismatch, i=%zu",
-		    i);
-		expect_ptr_eq((void *)vs, (void *)v.s, "Value mismatch, i=%zu",
-		    i);
+		expect_ptr_eq(
+		    (void *)ks, (void *)k.s, "Key mismatch, i=%zu", i);
+		expect_ptr_eq(
+		    (void *)vs, (void *)v.s, "Value mismatch, i=%zu", i);
 	}
 	expect_true(ckh_search(&ckh, missing, NULL, NULL),
 	    "Unexpected ckh_search() success");
 
 	/* Remove. */
-	for (i = 0; i < sizeof(strs)/sizeof(const char *); i++) {
+	for (i = 0; i < sizeof(strs) / sizeof(const char *); i++) {
 		union {
-			void *p;
+			void       *p;
 			const char *s;
 		} k, v;
-		void **kp, **vp;
+		void      **kp, **vp;
 		const char *ks, *vs;
 
 		kp = (i & 1) ? &k.p : NULL;
@@ -88,14 +84,14 @@ TEST_BEGIN(test_count_insert_search_remove) {
 
 		ks = (i & 1) ? strs[i] : (const char *)NULL;
 		vs = (i & 2) ? strs[i] : (const char *)NULL;
-		expect_ptr_eq((void *)ks, (void *)k.s, "Key mismatch, i=%zu",
-		    i);
-		expect_ptr_eq((void *)vs, (void *)v.s, "Value mismatch, i=%zu",
-		    i);
+		expect_ptr_eq(
+		    (void *)ks, (void *)k.s, "Key mismatch, i=%zu", i);
+		expect_ptr_eq(
+		    (void *)vs, (void *)v.s, "Value mismatch, i=%zu", i);
 		expect_zu_eq(ckh_count(&ckh),
-		    sizeof(strs)/sizeof(const char *) - i - 1,
+		    sizeof(strs) / sizeof(const char *) - i - 1,
 		    "ckh_count() should return %zu, but it returned %zu",
-		        sizeof(strs)/sizeof(const char *) - i - 1,
+		    sizeof(strs) / sizeof(const char *) - i - 1,
 		    ckh_count(&ckh));
 	}
 
@@ -106,18 +102,19 @@ TEST_END
 TEST_BEGIN(test_insert_iter_remove) {
 #define NITEMS ZU(1000)
 	tsd_t *tsd;
-	ckh_t ckh;
+	ckh_t  ckh;
 	void **p[NITEMS];
-	void *q, *r;
+	void  *q, *r;
 	size_t i;
 
 	tsd = tsd_fetch();
 
-	expect_false(ckh_new(tsd, &ckh, 2, ckh_pointer_hash,
-	    ckh_pointer_keycomp), "Unexpected ckh_new() error");
+	expect_false(
+	    ckh_new(tsd, &ckh, 2, ckh_pointer_hash, ckh_pointer_keycomp),
+	    "Unexpected ckh_new() error");
 
 	for (i = 0; i < NITEMS; i++) {
-		p[i] = mallocx(i+1, 0);
+		p[i] = mallocx(i + 1, 0);
 		expect_ptr_not_null(p[i], "Unexpected mallocx() failure");
 	}
 
@@ -151,7 +148,7 @@ TEST_BEGIN(test_insert_iter_remove) {
 		}
 
 		{
-			bool seen[NITEMS];
+			bool   seen[NITEMS];
 			size_t tabind;
 
 			memset(seen, 0, sizeof(seen));
@@ -195,8 +192,8 @@ TEST_BEGIN(test_insert_iter_remove) {
 	}
 
 	expect_zu_eq(ckh_count(&ckh), 0,
-	    "ckh_count() should return %zu, but it returned %zu",
-	    ZU(0), ckh_count(&ckh));
+	    "ckh_count() should return %zu, but it returned %zu", ZU(0),
+	    ckh_count(&ckh));
 	ckh_delete(tsd, &ckh);
 #undef NITEMS
 }
@@ -204,8 +201,6 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_new_delete,
-	    test_count_insert_search_remove,
+	return test(test_new_delete, test_count_insert_search_remove,
 	    test_insert_iter_remove);
 }
diff --git a/test/unit/counter.c b/test/unit/counter.c
index 277baac1..04100daa 100644
--- a/test/unit/counter.c
+++ b/test/unit/counter.c
@@ -11,7 +11,7 @@ TEST_BEGIN(test_counter_accum) {
 	counter_accum_init(&c, interval);
 
 	tsd_t *tsd = tsd_fetch();
-	bool trigger;
+	bool   trigger;
 	for (unsigned i = 0; i < n; i++) {
 		trigger = counter_accum(tsd_tsdn(tsd), &c, increment);
 		accum += increment;
@@ -39,8 +39,8 @@ static void *
 thd_start(void *varg) {
 	counter_accum_t *c = (counter_accum_t *)varg;
 
-	tsd_t *tsd = tsd_fetch();
-	bool trigger;
+	tsd_t    *tsd = tsd_fetch();
+	bool      trigger;
 	uintptr_t n_triggered = 0;
 	for (unsigned i = 0; i < N_ITER_THD; i++) {
 		trigger = counter_accum(tsd_tsdn(tsd), c, ITER_INCREMENT);
@@ -50,12 +50,11 @@ thd_start(void *varg) {
 	return (void *)n_triggered;
 }
 
-
 TEST_BEGIN(test_counter_mt) {
 	counter_accum_t shared_c;
 	counter_accum_init(&shared_c, interval);
 
-	thd_t thds[N_THDS];
+	thd_t    thds[N_THDS];
 	unsigned i;
 	for (i = 0; i < N_THDS; i++) {
 		thd_create(&thds[i], thd_start, (void *)&shared_c);
@@ -74,7 +73,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_counter_accum,
-	    test_counter_mt);
+	return test(test_counter_accum, test_counter_mt);
 }
diff --git a/test/unit/decay.c b/test/unit/decay.c
index bdb6d0a3..10740a85 100644
--- a/test/unit/decay.c
+++ b/test/unit/decay.c
@@ -22,12 +22,11 @@ TEST_BEGIN(test_decay_init) {
 TEST_END
 
 TEST_BEGIN(test_decay_ms_valid) {
-	expect_false(decay_ms_valid(-7),
-	    "Misclassified negative decay as valid");
+	expect_false(
+	    decay_ms_valid(-7), "Misclassified negative decay as valid");
 	expect_true(decay_ms_valid(-1),
 	    "Misclassified -1 (never decay) as invalid decay");
-	expect_true(decay_ms_valid(8943),
-	    "Misclassified valid decay");
+	expect_true(decay_ms_valid(8943), "Misclassified valid decay");
 	if (SSIZE_MAX > NSTIME_SEC_MAX) {
 		expect_false(
 		    decay_ms_valid((ssize_t)(NSTIME_SEC_MAX * KQU(1000) + 39)),
@@ -111,12 +110,12 @@ TEST_BEGIN(test_decay_empty) {
 	assert_false(err, "");
 
 	uint64_t time_between_calls = decay_epoch_duration_ns(&decay) / 5;
-	int nepochs = 0;
+	int      nepochs = 0;
 	for (uint64_t i = 0; i < decay_ns / time_between_calls * 10; i++) {
 		size_t dirty_pages = 0;
 		nstime_init(&curtime, i * time_between_calls);
-		bool epoch_advanced = decay_maybe_advance_epoch(&decay,
-		    &curtime, dirty_pages);
+		bool epoch_advanced = decay_maybe_advance_epoch(
+		    &decay, &curtime, dirty_pages);
 		if (epoch_advanced) {
 			nepochs++;
 			expect_zu_eq(decay_npages_limit_get(&decay), 0,
@@ -158,30 +157,32 @@ TEST_BEGIN(test_decay) {
 	nstime_init(&epochtime, decay_epoch_duration_ns(&decay));
 
 	const size_t dirty_pages_per_epoch = 1000;
-	size_t dirty_pages = 0;
-	uint64_t epoch_ns = decay_epoch_duration_ns(&decay);
-	bool epoch_advanced = false;
+	size_t       dirty_pages = 0;
+	uint64_t     epoch_ns = decay_epoch_duration_ns(&decay);
+	bool         epoch_advanced = false;
 
 	/* Populate backlog with some dirty pages */
 	for (uint64_t i = 0; i < nepoch_init; i++) {
 		nstime_add(&curtime, &epochtime);
 		dirty_pages += dirty_pages_per_epoch;
-		epoch_advanced |= decay_maybe_advance_epoch(&decay, &curtime,
-		    dirty_pages);
+		epoch_advanced |= decay_maybe_advance_epoch(
+		    &decay, &curtime, dirty_pages);
 	}
 	expect_true(epoch_advanced, "Epoch never advanced");
 
 	size_t npages_limit = decay_npages_limit_get(&decay);
-	expect_zu_gt(npages_limit, 0, "npages_limit is incorrectly equal "
+	expect_zu_gt(npages_limit, 0,
+	    "npages_limit is incorrectly equal "
 	    "to zero after dirty pages have been added");
 
 	/* Keep dirty pages unchanged and verify that npages_limit decreases */
 	for (uint64_t i = nepoch_init; i * epoch_ns < decay_ns; ++i) {
 		nstime_add(&curtime, &epochtime);
-		epoch_advanced = decay_maybe_advance_epoch(&decay, &curtime,
-				    dirty_pages);
+		epoch_advanced = decay_maybe_advance_epoch(
+		    &decay, &curtime, dirty_pages);
 		if (epoch_advanced) {
-			size_t npages_limit_new = decay_npages_limit_get(&decay);
+			size_t npages_limit_new = decay_npages_limit_get(
+			    &decay);
 			expect_zu_lt(npages_limit_new, npages_limit,
 			    "napges_limit failed to decay");
 
@@ -189,20 +190,22 @@ TEST_BEGIN(test_decay) {
 		}
 	}
 
-	expect_zu_gt(npages_limit, 0, "npages_limit decayed to zero earlier "
+	expect_zu_gt(npages_limit, 0,
+	    "npages_limit decayed to zero earlier "
 	    "than decay_ms since last dirty page was added");
 
 	/* Completely push all dirty pages out of the backlog */
 	epoch_advanced = false;
 	for (uint64_t i = 0; i < nepoch_init; i++) {
 		nstime_add(&curtime, &epochtime);
-		epoch_advanced |= decay_maybe_advance_epoch(&decay, &curtime,
-		    dirty_pages);
+		epoch_advanced |= decay_maybe_advance_epoch(
+		    &decay, &curtime, dirty_pages);
 	}
 	expect_true(epoch_advanced, "Epoch never advanced");
 
 	npages_limit = decay_npages_limit_get(&decay);
-	expect_zu_eq(npages_limit, 0, "npages_limit didn't decay to 0 after "
+	expect_zu_eq(npages_limit, 0,
+	    "npages_limit didn't decay to 0 after "
 	    "decay_ms since last bump in dirty pages");
 }
 TEST_END
@@ -230,29 +233,29 @@ TEST_BEGIN(test_decay_ns_until_purge) {
 	    "Failed to return unbounded wait time for zero threshold");
 
 	const size_t dirty_pages_per_epoch = 1000;
-	size_t dirty_pages = 0;
-	bool epoch_advanced = false;
+	size_t       dirty_pages = 0;
+	bool         epoch_advanced = false;
 	for (uint64_t i = 0; i < nepoch_init; i++) {
 		nstime_add(&curtime, &epochtime);
 		dirty_pages += dirty_pages_per_epoch;
-		epoch_advanced |= decay_maybe_advance_epoch(&decay, &curtime,
-		    dirty_pages);
+		epoch_advanced |= decay_maybe_advance_epoch(
+		    &decay, &curtime, dirty_pages);
 	}
 	expect_true(epoch_advanced, "Epoch never advanced");
 
-	uint64_t ns_until_purge_all = decay_ns_until_purge(&decay,
-	    dirty_pages, dirty_pages);
+	uint64_t ns_until_purge_all = decay_ns_until_purge(
+	    &decay, dirty_pages, dirty_pages);
 	expect_u64_ge(ns_until_purge_all, decay_ns,
 	    "Incorrectly calculated time to purge all pages");
 
-	uint64_t ns_until_purge_none = decay_ns_until_purge(&decay,
-	    dirty_pages, 0);
+	uint64_t ns_until_purge_none = decay_ns_until_purge(
+	    &decay, dirty_pages, 0);
 	expect_u64_eq(ns_until_purge_none, decay_epoch_duration_ns(&decay) * 2,
 	    "Incorrectly calculated time to purge 0 pages");
 
 	uint64_t npages_threshold = dirty_pages / 2;
-	uint64_t ns_until_purge_half = decay_ns_until_purge(&decay,
-	    dirty_pages, npages_threshold);
+	uint64_t ns_until_purge_half = decay_ns_until_purge(
+	    &decay, dirty_pages, npages_threshold);
 
 	nstime_t waittime;
 	nstime_init(&waittime, ns_until_purge_half);
@@ -263,7 +266,7 @@ TEST_BEGIN(test_decay_ns_until_purge) {
 	expect_zu_lt(npages_limit, dirty_pages,
 	    "npages_limit failed to decrease after waiting");
 	size_t expected = dirty_pages - npages_limit;
-	int deviation = abs((int)expected - (int)(npages_threshold));
+	int    deviation = abs((int)expected - (int)(npages_threshold));
 	expect_d_lt(deviation, (int)(npages_threshold / 2),
 	    "After waiting, number of pages is out of the expected interval "
 	    "[0.5 * npages_threshold .. 1.5 * npages_threshold]");
@@ -272,12 +275,7 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_decay_init,
-	    test_decay_ms_valid,
-	    test_decay_npages_purge_in,
-	    test_decay_maybe_advance_epoch,
-	    test_decay_empty,
-	    test_decay,
-	    test_decay_ns_until_purge);
+	return test(test_decay_init, test_decay_ms_valid,
+	    test_decay_npages_purge_in, test_decay_maybe_advance_epoch,
+	    test_decay_empty, test_decay, test_decay_ns_until_purge);
 }
diff --git a/test/unit/div.c b/test/unit/div.c
index 29aea665..53447f4a 100644
--- a/test/unit/div.c
+++ b/test/unit/div.c
@@ -11,12 +11,12 @@ TEST_BEGIN(test_div_exhaustive) {
 			max = 1000 * 1000;
 		}
 		for (size_t dividend = 0; dividend < 1000 * divisor;
-		    dividend += divisor) {
-			size_t quotient = div_compute(
-			    &div_info, dividend);
+		     dividend += divisor) {
+			size_t quotient = div_compute(&div_info, dividend);
 			expect_zu_eq(dividend, quotient * divisor,
 			    "With divisor = %zu, dividend = %zu, "
-			    "got quotient %zu", divisor, dividend, quotient);
+			    "got quotient %zu",
+			    divisor, dividend, quotient);
 		}
 	}
 }
@@ -24,6 +24,5 @@ TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_div_exhaustive);
+	return test_no_reentrancy(test_div_exhaustive);
 }
diff --git a/test/unit/double_free.c b/test/unit/double_free.c
index b6ae8f75..4bd6ab73 100644
--- a/test/unit/double_free.c
+++ b/test/unit/double_free.c
@@ -4,7 +4,8 @@
 #include "jemalloc/internal/safety_check.h"
 
 bool fake_abort_called;
-void fake_abort(const char *message) {
+void
+fake_abort(const char *message) {
 	(void)message;
 	fake_abort_called = true;
 }
@@ -23,10 +24,9 @@ test_double_free_post(void) {
 
 static bool
 tcache_enabled(void) {
-	bool enabled;
+	bool   enabled;
 	size_t sz = sizeof(enabled);
-	assert_d_eq(
-	    mallctl("thread.tcache.enabled", &enabled, &sz, NULL, 0), 0,
+	assert_d_eq(mallctl("thread.tcache.enabled", &enabled, &sz, NULL, 0), 0,
 	    "Unexpected mallctl failure");
 	return enabled;
 }
@@ -41,7 +41,7 @@ TEST_BEGIN(test_large_double_free_tcache) {
 
 	test_double_free_pre();
 	char *ptr = malloc(SC_LARGE_MINCLASS);
-	bool guarded = extent_is_guarded(tsdn_fetch(), ptr);
+	bool  guarded = extent_is_guarded(tsdn_fetch(), ptr);
 	free(ptr);
 	if (!guarded) {
 		free(ptr);
@@ -64,7 +64,7 @@ TEST_BEGIN(test_large_double_free_no_tcache) {
 
 	test_double_free_pre();
 	char *ptr = mallocx(SC_LARGE_MINCLASS, MALLOCX_TCACHE_NONE);
-	bool guarded = extent_is_guarded(tsdn_fetch(), ptr);
+	bool  guarded = extent_is_guarded(tsdn_fetch(), ptr);
 	dallocx(ptr, MALLOCX_TCACHE_NONE);
 	if (!guarded) {
 		dallocx(ptr, MALLOCX_TCACHE_NONE);
@@ -87,7 +87,7 @@ TEST_BEGIN(test_small_double_free_tcache) {
 
 	test_double_free_pre();
 	char *ptr = malloc(1);
-	bool guarded = extent_is_guarded(tsdn_fetch(), ptr);
+	bool  guarded = extent_is_guarded(tsdn_fetch(), ptr);
 	free(ptr);
 	if (!guarded) {
 		free(ptr);
@@ -115,7 +115,7 @@ TEST_BEGIN(test_small_double_free_arena) {
 	 */
 	char *ptr1 = malloc(1);
 	char *ptr = malloc(1);
-	bool guarded = extent_is_guarded(tsdn_fetch(), ptr);
+	bool  guarded = extent_is_guarded(tsdn_fetch(), ptr);
 	free(ptr);
 	if (!guarded) {
 		mallctl("thread.tcache.flush", NULL, NULL, NULL, 0);
@@ -135,9 +135,7 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_large_double_free_no_tcache,
-	    test_large_double_free_tcache,
-	    test_small_double_free_tcache,
+	return test(test_large_double_free_no_tcache,
+	    test_large_double_free_tcache, test_small_double_free_tcache,
 	    test_small_double_free_arena);
 }
diff --git a/test/unit/edata_cache.c b/test/unit/edata_cache.c
index af1110a9..16ed58b2 100644
--- a/test/unit/edata_cache.c
+++ b/test/unit/edata_cache.c
@@ -49,16 +49,16 @@ TEST_END
 
 static size_t
 ecf_count(edata_cache_fast_t *ecf) {
-	size_t count = 0;
+	size_t   count = 0;
 	edata_t *cur;
-	ql_foreach(cur, &ecf->list.head, ql_link_inactive) {
+	ql_foreach (cur, &ecf->list.head, ql_link_inactive) {
 		count++;
 	}
 	return count;
 }
 
 TEST_BEGIN(test_edata_cache_fast_simple) {
-	edata_cache_t ec;
+	edata_cache_t      ec;
 	edata_cache_fast_t ecf;
 
 	test_edata_cache_init(&ec);
@@ -96,7 +96,7 @@ TEST_BEGIN(test_edata_cache_fast_simple) {
 TEST_END
 
 TEST_BEGIN(test_edata_cache_fill) {
-	edata_cache_t ec;
+	edata_cache_t      ec;
 	edata_cache_fast_t ecf;
 
 	test_edata_cache_init(&ec);
@@ -179,7 +179,7 @@ TEST_BEGIN(test_edata_cache_fill) {
 TEST_END
 
 TEST_BEGIN(test_edata_cache_disable) {
-	edata_cache_t ec;
+	edata_cache_t      ec;
 	edata_cache_fast_t ecf;
 
 	test_edata_cache_init(&ec);
@@ -198,7 +198,8 @@ TEST_BEGIN(test_edata_cache_disable) {
 
 	expect_zu_eq(0, ecf_count(&ecf), "");
 	expect_zu_eq(EDATA_CACHE_FAST_FILL,
-	    atomic_load_zu(&ec.count, ATOMIC_RELAXED), "Disabling should flush");
+	    atomic_load_zu(&ec.count, ATOMIC_RELAXED),
+	    "Disabling should flush");
 
 	edata_t *edata = edata_cache_fast_get(TSDN_NULL, &ecf);
 	expect_zu_eq(0, ecf_count(&ecf), "");
@@ -218,9 +219,6 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_edata_cache,
-	    test_edata_cache_fast_simple,
-	    test_edata_cache_fill,
-	    test_edata_cache_disable);
+	return test(test_edata_cache, test_edata_cache_fast_simple,
+	    test_edata_cache_fill, test_edata_cache_disable);
 }
diff --git a/test/unit/emitter.c b/test/unit/emitter.c
index af0da90d..dc53b9eb 100644
--- a/test/unit/emitter.c
+++ b/test/unit/emitter.c
@@ -12,9 +12,9 @@ static bool print_escaped = false;
 
 typedef struct buf_descriptor_s buf_descriptor_t;
 struct buf_descriptor_s {
-	char *buf;
+	char  *buf;
 	size_t len;
-	bool mid_quote;
+	bool   mid_quote;
 };
 
 /*
@@ -56,8 +56,8 @@ forwarding_cb(void *buf_descriptor_v, const char *str) {
 		}
 	}
 
-	size_t written = malloc_snprintf(buf_descriptor->buf,
-	    buf_descriptor->len, "%s", str);
+	size_t written = malloc_snprintf(
+	    buf_descriptor->buf, buf_descriptor->len, "%s", str);
 	expect_zu_eq(written, strlen(str), "Buffer overflow!");
 	buf_descriptor->buf += written;
 	buf_descriptor->len -= written;
@@ -66,19 +66,18 @@ forwarding_cb(void *buf_descriptor_v, const char *str) {
 
 static void
 expect_emit_output(void (*emit_fn)(emitter_t *),
-    const char *expected_json_output,
-    const char *expected_json_compact_output,
+    const char *expected_json_output, const char *expected_json_compact_output,
     const char *expected_table_output) {
-	emitter_t emitter;
-	char buf[MALLOC_PRINTF_BUFSIZE];
+	emitter_t        emitter;
+	char             buf[MALLOC_PRINTF_BUFSIZE];
 	buf_descriptor_t buf_descriptor;
 
 	buf_descriptor.buf = buf;
 	buf_descriptor.len = MALLOC_PRINTF_BUFSIZE;
 	buf_descriptor.mid_quote = false;
 
-	emitter_init(&emitter, emitter_output_json, &forwarding_cb,
-	    &buf_descriptor);
+	emitter_init(
+	    &emitter, emitter_output_json, &forwarding_cb, &buf_descriptor);
 	(*emit_fn)(&emitter);
 	expect_str_eq(expected_json_output, buf, "json output failure");
 
@@ -89,24 +88,24 @@ expect_emit_output(void (*emit_fn)(emitter_t *),
 	emitter_init(&emitter, emitter_output_json_compact, &forwarding_cb,
 	    &buf_descriptor);
 	(*emit_fn)(&emitter);
-	expect_str_eq(expected_json_compact_output, buf,
-	    "compact json output failure");
+	expect_str_eq(
+	    expected_json_compact_output, buf, "compact json output failure");
 
 	buf_descriptor.buf = buf;
 	buf_descriptor.len = MALLOC_PRINTF_BUFSIZE;
 	buf_descriptor.mid_quote = false;
 
-	emitter_init(&emitter, emitter_output_table, &forwarding_cb,
-	    &buf_descriptor);
+	emitter_init(
+	    &emitter, emitter_output_table, &forwarding_cb, &buf_descriptor);
 	(*emit_fn)(&emitter);
 	expect_str_eq(expected_table_output, buf, "table output failure");
 }
 
 static void
 emit_dict(emitter_t *emitter) {
-	bool b_false = false;
-	bool b_true = true;
-	int i_123 = 123;
+	bool        b_false = false;
+	bool        b_true = true;
+	int         i_123 = 123;
 	const char *str = "a string";
 
 	emitter_begin(emitter);
@@ -122,48 +121,49 @@ emit_dict(emitter_t *emitter) {
 }
 
 static const char *dict_json =
-"{\n"
-"\t\"foo\": {\n"
-"\t\t\"abc\": false,\n"
-"\t\t\"def\": true,\n"
-"\t\t\"ghi\": 123,\n"
-"\t\t\"jkl\": \"a string\"\n"
-"\t}\n"
-"}\n";
+    "{\n"
+    "\t\"foo\": {\n"
+    "\t\t\"abc\": false,\n"
+    "\t\t\"def\": true,\n"
+    "\t\t\"ghi\": 123,\n"
+    "\t\t\"jkl\": \"a string\"\n"
+    "\t}\n"
+    "}\n";
 static const char *dict_json_compact =
-"{"
-	"\"foo\":{"
-		"\"abc\":false,"
-		"\"def\":true,"
-		"\"ghi\":123,"
-		"\"jkl\":\"a string\""
-	"}"
-"}";
+    "{"
+    "\"foo\":{"
+    "\"abc\":false,"
+    "\"def\":true,"
+    "\"ghi\":123,"
+    "\"jkl\":\"a string\""
+    "}"
+    "}";
 static const char *dict_table =
-"This is the foo table:\n"
-"  ABC: false\n"
-"  DEF: true\n"
-"  GHI: 123 (note_key1: \"a string\")\n"
-"  JKL: \"a string\" (note_key2: false)\n";
+    "This is the foo table:\n"
+    "  ABC: false\n"
+    "  DEF: true\n"
+    "  GHI: 123 (note_key1: \"a string\")\n"
+    "  JKL: \"a string\" (note_key2: false)\n";
 
 static void
 emit_table_printf(emitter_t *emitter) {
 	emitter_begin(emitter);
 	emitter_table_printf(emitter, "Table note 1\n");
-	emitter_table_printf(emitter, "Table note 2 %s\n",
-	    "with format string");
+	emitter_table_printf(
+	    emitter, "Table note 2 %s\n", "with format string");
 	emitter_end(emitter);
 }
 
 static const char *table_printf_json =
-"{\n"
-"}\n";
+    "{\n"
+    "}\n";
 static const char *table_printf_json_compact = "{}";
 static const char *table_printf_table =
-"Table note 1\n"
-"Table note 2 with format string\n";
+    "Table note 1\n"
+    "Table note 2 with format string\n";
 
-static void emit_nested_dict(emitter_t *emitter) {
+static void
+emit_nested_dict(emitter_t *emitter) {
 	int val = 123;
 	emitter_begin(emitter);
 	emitter_dict_begin(emitter, "json1", "Dict 1");
@@ -174,53 +174,53 @@ static void emit_nested_dict(emitter_t *emitter) {
 	emitter_dict_end(emitter); /* Close 3 */
 	emitter_dict_end(emitter); /* Close 1 */
 	emitter_dict_begin(emitter, "json4", "Dict 4");
-	emitter_kv(emitter, "primitive", "Another primitive",
-	    emitter_type_int, &val);
+	emitter_kv(
+	    emitter, "primitive", "Another primitive", emitter_type_int, &val);
 	emitter_dict_end(emitter); /* Close 4 */
 	emitter_end(emitter);
 }
 
 static const char *nested_dict_json =
-"{\n"
-"\t\"json1\": {\n"
-"\t\t\"json2\": {\n"
-"\t\t\t\"primitive\": 123\n"
-"\t\t},\n"
-"\t\t\"json3\": {\n"
-"\t\t}\n"
-"\t},\n"
-"\t\"json4\": {\n"
-"\t\t\"primitive\": 123\n"
-"\t}\n"
-"}\n";
+    "{\n"
+    "\t\"json1\": {\n"
+    "\t\t\"json2\": {\n"
+    "\t\t\t\"primitive\": 123\n"
+    "\t\t},\n"
+    "\t\t\"json3\": {\n"
+    "\t\t}\n"
+    "\t},\n"
+    "\t\"json4\": {\n"
+    "\t\t\"primitive\": 123\n"
+    "\t}\n"
+    "}\n";
 static const char *nested_dict_json_compact =
-"{"
-	"\"json1\":{"
-		"\"json2\":{"
-			"\"primitive\":123"
-		"},"
-		"\"json3\":{"
-		"}"
-	"},"
-	"\"json4\":{"
-		"\"primitive\":123"
-	"}"
-"}";
+    "{"
+    "\"json1\":{"
+    "\"json2\":{"
+    "\"primitive\":123"
+    "},"
+    "\"json3\":{"
+    "}"
+    "},"
+    "\"json4\":{"
+    "\"primitive\":123"
+    "}"
+    "}";
 static const char *nested_dict_table =
-"Dict 1\n"
-"  Dict 2\n"
-"    A primitive: 123\n"
-"  Dict 3\n"
-"Dict 4\n"
-"  Another primitive: 123\n";
+    "Dict 1\n"
+    "  Dict 2\n"
+    "    A primitive: 123\n"
+    "  Dict 3\n"
+    "Dict 4\n"
+    "  Another primitive: 123\n";
 
 static void
 emit_types(emitter_t *emitter) {
-	bool b = false;
-	int i = -123;
-	unsigned u = 123;
-	ssize_t zd = -456;
-	size_t zu = 456;
+	bool        b = false;
+	int         i = -123;
+	unsigned    u = 123;
+	ssize_t     zd = -456;
+	size_t      zu = 456;
 	const char *str = "string";
 	const char *long_str =
 	    "abcdefghijklmnopqrstuvwxyz "
@@ -254,55 +254,55 @@ emit_types(emitter_t *emitter) {
 }
 
 static const char *types_json =
-"{\n"
-"\t\"k1\": false,\n"
-"\t\"k2\": -123,\n"
-"\t\"k3\": 123,\n"
-"\t\"k4\": -456,\n"
-"\t\"k5\": 456,\n"
-"\t\"k6\": \"string\",\n"
-"\t\"k7\": \"abcdefghijklmnopqrstuvwxyz "
-	    "abcdefghijklmnopqrstuvwxyz "
-	    "abcdefghijklmnopqrstuvwxyz "
-	    "abcdefghijklmnopqrstuvwxyz "
-	    "abcdefghijklmnopqrstuvwxyz "
-	    "abcdefghijklmnopqrstuvwxyz "
-	    "abcdefghijklmnopqrstuvwxyz "
-	    "abcdefghijklmnopqrstuvwxyz "
-	    "abcdefghijklmnopqrstuvwxyz "
-	    "abcdefghijklmnopqrstuvwxyz\",\n"
-"\t\"k8\": 789,\n"
-"\t\"k9\": 10000000000\n"
-"}\n";
+    "{\n"
+    "\t\"k1\": false,\n"
+    "\t\"k2\": -123,\n"
+    "\t\"k3\": 123,\n"
+    "\t\"k4\": -456,\n"
+    "\t\"k5\": 456,\n"
+    "\t\"k6\": \"string\",\n"
+    "\t\"k7\": \"abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz\",\n"
+    "\t\"k8\": 789,\n"
+    "\t\"k9\": 10000000000\n"
+    "}\n";
 static const char *types_json_compact =
-"{"
-	"\"k1\":false,"
-	"\"k2\":-123,"
-	"\"k3\":123,"
-	"\"k4\":-456,"
-	"\"k5\":456,"
-	"\"k6\":\"string\","
-	"\"k7\":\"abcdefghijklmnopqrstuvwxyz "
-	    "abcdefghijklmnopqrstuvwxyz "
-	    "abcdefghijklmnopqrstuvwxyz "
-	    "abcdefghijklmnopqrstuvwxyz "
-	    "abcdefghijklmnopqrstuvwxyz "
-	    "abcdefghijklmnopqrstuvwxyz "
-	    "abcdefghijklmnopqrstuvwxyz "
-	    "abcdefghijklmnopqrstuvwxyz "
-	    "abcdefghijklmnopqrstuvwxyz "
-	    "abcdefghijklmnopqrstuvwxyz\","
-	"\"k8\":789,"
-	"\"k9\":10000000000"
-"}";
+    "{"
+    "\"k1\":false,"
+    "\"k2\":-123,"
+    "\"k3\":123,"
+    "\"k4\":-456,"
+    "\"k5\":456,"
+    "\"k6\":\"string\","
+    "\"k7\":\"abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz\","
+    "\"k8\":789,"
+    "\"k9\":10000000000"
+    "}";
 static const char *types_table =
-"K1: false\n"
-"K2: -123\n"
-"K3: 123\n"
-"K4: -456\n"
-"K5: 456\n"
-"K6: \"string\"\n"
-"K7: \"abcdefghijklmnopqrstuvwxyz "
+    "K1: false\n"
+    "K2: -123\n"
+    "K3: 123\n"
+    "K4: -456\n"
+    "K5: 456\n"
+    "K6: \"string\"\n"
+    "K7: \"abcdefghijklmnopqrstuvwxyz "
     "abcdefghijklmnopqrstuvwxyz "
     "abcdefghijklmnopqrstuvwxyz "
     "abcdefghijklmnopqrstuvwxyz "
@@ -312,8 +312,8 @@ static const char *types_table =
     "abcdefghijklmnopqrstuvwxyz "
     "abcdefghijklmnopqrstuvwxyz "
     "abcdefghijklmnopqrstuvwxyz\"\n"
-"K8: 789\n"
-"K9: 10000000000\n";
+    "K8: 789\n"
+    "K9: 10000000000\n";
 
 static void
 emit_modal(emitter_t *emitter) {
@@ -336,37 +336,37 @@ emit_modal(emitter_t *emitter) {
 }
 
 const char *modal_json =
-"{\n"
-"\t\"j0\": {\n"
-"\t\t\"j1\": {\n"
-"\t\t\t\"i1\": 123,\n"
-"\t\t\t\"i2\": 123,\n"
-"\t\t\t\"i4\": 123\n"
-"\t\t},\n"
-"\t\t\"i5\": 123,\n"
-"\t\t\"i6\": 123\n"
-"\t}\n"
-"}\n";
+    "{\n"
+    "\t\"j0\": {\n"
+    "\t\t\"j1\": {\n"
+    "\t\t\t\"i1\": 123,\n"
+    "\t\t\t\"i2\": 123,\n"
+    "\t\t\t\"i4\": 123\n"
+    "\t\t},\n"
+    "\t\t\"i5\": 123,\n"
+    "\t\t\"i6\": 123\n"
+    "\t}\n"
+    "}\n";
 const char *modal_json_compact =
-"{"
-	"\"j0\":{"
-		"\"j1\":{"
-			"\"i1\":123,"
-			"\"i2\":123,"
-			"\"i4\":123"
-		"},"
-		"\"i5\":123,"
-		"\"i6\":123"
-	"}"
-"}";
+    "{"
+    "\"j0\":{"
+    "\"j1\":{"
+    "\"i1\":123,"
+    "\"i2\":123,"
+    "\"i4\":123"
+    "},"
+    "\"i5\":123,"
+    "\"i6\":123"
+    "}"
+    "}";
 const char *modal_table =
-"T0\n"
-"  I1: 123\n"
-"  I3: 123\n"
-"  T1\n"
-"    I4: 123\n"
-"    I5: 123\n"
-"  I6: 123\n";
+    "T0\n"
+    "  I1: 123\n"
+    "  I3: 123\n"
+    "  T1\n"
+    "    I4: 123\n"
+    "    I5: 123\n"
+    "  I6: 123\n";
 
 static void
 emit_json_array(emitter_t *emitter) {
@@ -387,121 +387,124 @@ emit_json_array(emitter_t *emitter) {
 	emitter_json_kv(emitter, "bar", emitter_type_int, &ival);
 	emitter_json_kv(emitter, "baz", emitter_type_int, &ival);
 	emitter_json_object_end(emitter); /* Close arr[3]. */
-	emitter_json_array_end(emitter); /* Close arr. */
+	emitter_json_array_end(emitter);  /* Close arr. */
 	emitter_json_object_end(emitter); /* Close dict. */
 	emitter_end(emitter);
 }
 
 static const char *json_array_json =
-"{\n"
-"\t\"dict\": {\n"
-"\t\t\"arr\": [\n"
-"\t\t\t{\n"
-"\t\t\t\t\"foo\": 123\n"
-"\t\t\t},\n"
-"\t\t\t123,\n"
-"\t\t\t123,\n"
-"\t\t\t{\n"
-"\t\t\t\t\"bar\": 123,\n"
-"\t\t\t\t\"baz\": 123\n"
-"\t\t\t}\n"
-"\t\t]\n"
-"\t}\n"
-"}\n";
+    "{\n"
+    "\t\"dict\": {\n"
+    "\t\t\"arr\": [\n"
+    "\t\t\t{\n"
+    "\t\t\t\t\"foo\": 123\n"
+    "\t\t\t},\n"
+    "\t\t\t123,\n"
+    "\t\t\t123,\n"
+    "\t\t\t{\n"
+    "\t\t\t\t\"bar\": 123,\n"
+    "\t\t\t\t\"baz\": 123\n"
+    "\t\t\t}\n"
+    "\t\t]\n"
+    "\t}\n"
+    "}\n";
 static const char *json_array_json_compact =
-"{"
-	"\"dict\":{"
-		"\"arr\":["
-			"{"
-				"\"foo\":123"
-			"},"
-			"123,"
-			"123,"
-			"{"
-				"\"bar\":123,"
-				"\"baz\":123"
-			"}"
-		"]"
-	"}"
-"}";
+    "{"
+    "\"dict\":{"
+    "\"arr\":["
+    "{"
+    "\"foo\":123"
+    "},"
+    "123,"
+    "123,"
+    "{"
+    "\"bar\":123,"
+    "\"baz\":123"
+    "}"
+    "]"
+    "}"
+    "}";
 static const char *json_array_table = "";
 
 static void
 emit_json_nested_array(emitter_t *emitter) {
-	int ival = 123;
+	int   ival = 123;
 	char *sval = "foo";
 	emitter_begin(emitter);
 	emitter_json_array_begin(emitter);
-		emitter_json_array_begin(emitter);
-		emitter_json_value(emitter, emitter_type_int, &ival);
-		emitter_json_value(emitter, emitter_type_string, &sval);
-		emitter_json_value(emitter, emitter_type_int, &ival);
-		emitter_json_value(emitter, emitter_type_string, &sval);
-		emitter_json_array_end(emitter);
-		emitter_json_array_begin(emitter);
-		emitter_json_value(emitter, emitter_type_int, &ival);
-		emitter_json_array_end(emitter);
-		emitter_json_array_begin(emitter);
-		emitter_json_value(emitter, emitter_type_string, &sval);
-		emitter_json_value(emitter, emitter_type_int, &ival);
-		emitter_json_array_end(emitter);
-		emitter_json_array_begin(emitter);
-		emitter_json_array_end(emitter);
+	emitter_json_array_begin(emitter);
+	emitter_json_value(emitter, emitter_type_int, &ival);
+	emitter_json_value(emitter, emitter_type_string, &sval);
+	emitter_json_value(emitter, emitter_type_int, &ival);
+	emitter_json_value(emitter, emitter_type_string, &sval);
+	emitter_json_array_end(emitter);
+	emitter_json_array_begin(emitter);
+	emitter_json_value(emitter, emitter_type_int, &ival);
+	emitter_json_array_end(emitter);
+	emitter_json_array_begin(emitter);
+	emitter_json_value(emitter, emitter_type_string, &sval);
+	emitter_json_value(emitter, emitter_type_int, &ival);
+	emitter_json_array_end(emitter);
+	emitter_json_array_begin(emitter);
+	emitter_json_array_end(emitter);
 	emitter_json_array_end(emitter);
 	emitter_end(emitter);
 }
 
 static const char *json_nested_array_json =
-"{\n"
-"\t[\n"
-"\t\t[\n"
-"\t\t\t123,\n"
-"\t\t\t\"foo\",\n"
-"\t\t\t123,\n"
-"\t\t\t\"foo\"\n"
-"\t\t],\n"
-"\t\t[\n"
-"\t\t\t123\n"
-"\t\t],\n"
-"\t\t[\n"
-"\t\t\t\"foo\",\n"
-"\t\t\t123\n"
-"\t\t],\n"
-"\t\t[\n"
-"\t\t]\n"
-"\t]\n"
-"}\n";
+    "{\n"
+    "\t[\n"
+    "\t\t[\n"
+    "\t\t\t123,\n"
+    "\t\t\t\"foo\",\n"
+    "\t\t\t123,\n"
+    "\t\t\t\"foo\"\n"
+    "\t\t],\n"
+    "\t\t[\n"
+    "\t\t\t123\n"
+    "\t\t],\n"
+    "\t\t[\n"
+    "\t\t\t\"foo\",\n"
+    "\t\t\t123\n"
+    "\t\t],\n"
+    "\t\t[\n"
+    "\t\t]\n"
+    "\t]\n"
+    "}\n";
 static const char *json_nested_array_json_compact =
-"{"
-	"["
-		"["
-			"123,"
-			"\"foo\","
-			"123,"
-			"\"foo\""
-		"],"
-		"["
-			"123"
-		"],"
-		"["
-			"\"foo\","
-			"123"
-		"],"
-		"["
-		"]"
-	"]"
-"}";
+    "{"
+    "["
+    "["
+    "123,"
+    "\"foo\","
+    "123,"
+    "\"foo\""
+    "],"
+    "["
+    "123"
+    "],"
+    "["
+    "\"foo\","
+    "123"
+    "],"
+    "["
+    "]"
+    "]"
+    "}";
 static const char *json_nested_array_table = "";
 
 static void
 emit_table_row(emitter_t *emitter) {
 	emitter_begin(emitter);
 	emitter_row_t row;
-	emitter_col_t abc = {emitter_justify_left, 10, emitter_type_title, {0}, {0, 0}};
+	emitter_col_t abc = {
+	    emitter_justify_left, 10, emitter_type_title, {0}, {0, 0}};
 	abc.str_val = "ABC title";
-	emitter_col_t def = {emitter_justify_right, 15, emitter_type_title, {0}, {0, 0}};
+	emitter_col_t def = {
+	    emitter_justify_right, 15, emitter_type_title, {0}, {0, 0}};
 	def.str_val = "DEF title";
-	emitter_col_t ghi = {emitter_justify_right, 5, emitter_type_title, {0}, {0, 0}};
+	emitter_col_t ghi = {
+	    emitter_justify_right, 5, emitter_type_title, {0}, {0, 0}};
 	ghi.str_val = "GHI";
 
 	emitter_row_init(&row);
@@ -536,21 +539,21 @@ emit_table_row(emitter_t *emitter) {
 }
 
 static const char *table_row_json =
-"{\n"
-"}\n";
+    "{\n"
+    "}\n";
 static const char *table_row_json_compact = "{}";
 static const char *table_row_table =
-"ABC title       DEF title  GHI\n"
-"123                  true  456\n"
-"789                 false 1011\n"
-"\"a string\"          false  ghi\n";
+    "ABC title       DEF title  GHI\n"
+    "123                  true  456\n"
+    "789                 false 1011\n"
+    "\"a string\"          false  ghi\n";
 
-#define GENERATE_TEST(feature)					\
-TEST_BEGIN(test_##feature) {					\
-	expect_emit_output(emit_##feature, feature##_json,	\
-	    feature##_json_compact, feature##_table);		\
-}								\
-TEST_END
+#define GENERATE_TEST(feature)                                                 \
+	TEST_BEGIN(test_##feature) {                                           \
+		expect_emit_output(emit_##feature, feature##_json,             \
+		    feature##_json_compact, feature##_table);                  \
+	}                                                                      \
+	TEST_END
 
 GENERATE_TEST(dict)
 GENERATE_TEST(table_printf)
@@ -563,13 +566,7 @@ GENERATE_TEST(table_row)
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_dict,
-	    test_table_printf,
-	    test_nested_dict,
-	    test_types,
-	    test_modal,
-	    test_json_array,
-	    test_json_nested_array,
-	    test_table_row);
+	return test_no_reentrancy(test_dict, test_table_printf,
+	    test_nested_dict, test_types, test_modal, test_json_array,
+	    test_json_nested_array, test_table_row);
 }
diff --git a/test/unit/extent_quantize.c b/test/unit/extent_quantize.c
index e6bbd539..c178240e 100644
--- a/test/unit/extent_quantize.c
+++ b/test/unit/extent_quantize.c
@@ -2,9 +2,9 @@
 
 TEST_BEGIN(test_small_extent_size) {
 	unsigned nbins, i;
-	size_t sz, extent_size;
-	size_t mib[4];
-	size_t miblen = sizeof(mib) / sizeof(size_t);
+	size_t   sz, extent_size;
+	size_t   mib[4];
+	size_t   miblen = sizeof(mib) / sizeof(size_t);
 
 	/*
 	 * Iterate over all small size classes, get their extent sizes, and
@@ -21,25 +21,26 @@ TEST_BEGIN(test_small_extent_size) {
 		mib[2] = i;
 		sz = sizeof(size_t);
 		expect_d_eq(mallctlbymib(mib, miblen, (void *)&extent_size, &sz,
-		    NULL, 0), 0, "Unexpected mallctlbymib failure");
-		expect_zu_eq(extent_size,
-		    sz_psz_quantize_floor(extent_size),
+		                NULL, 0),
+		    0, "Unexpected mallctlbymib failure");
+		expect_zu_eq(extent_size, sz_psz_quantize_floor(extent_size),
 		    "Small extent quantization should be a no-op "
-		    "(extent_size=%zu)", extent_size);
-		expect_zu_eq(extent_size,
-		    sz_psz_quantize_ceil(extent_size),
+		    "(extent_size=%zu)",
+		    extent_size);
+		expect_zu_eq(extent_size, sz_psz_quantize_ceil(extent_size),
 		    "Small extent quantization should be a no-op "
-		    "(extent_size=%zu)", extent_size);
+		    "(extent_size=%zu)",
+		    extent_size);
 	}
 }
 TEST_END
 
 TEST_BEGIN(test_large_extent_size) {
-	bool cache_oblivious;
+	bool     cache_oblivious;
 	unsigned nlextents, i;
-	size_t sz, extent_size_prev, ceil_prev;
-	size_t mib[4];
-	size_t miblen = sizeof(mib) / sizeof(size_t);
+	size_t   sz, extent_size_prev, ceil_prev;
+	size_t   mib[4];
+	size_t   miblen = sizeof(mib) / sizeof(size_t);
 
 	/*
 	 * Iterate over all large size classes, get their extent sizes, and
@@ -48,11 +49,13 @@ TEST_BEGIN(test_large_extent_size) {
 
 	sz = sizeof(bool);
 	expect_d_eq(mallctl("opt.cache_oblivious", (void *)&cache_oblivious,
-	    &sz, NULL, 0), 0, "Unexpected mallctl failure");
+	                &sz, NULL, 0),
+	    0, "Unexpected mallctl failure");
 
 	sz = sizeof(unsigned);
-	expect_d_eq(mallctl("arenas.nlextents", (void *)&nlextents, &sz, NULL,
-	    0), 0, "Unexpected mallctl failure");
+	expect_d_eq(
+	    mallctl("arenas.nlextents", (void *)&nlextents, &sz, NULL, 0), 0,
+	    "Unexpected mallctl failure");
 
 	expect_d_eq(mallctlnametomib("arenas.lextent.0.size", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib failure");
@@ -62,20 +65,21 @@ TEST_BEGIN(test_large_extent_size) {
 		mib[2] = i;
 		sz = sizeof(size_t);
 		expect_d_eq(mallctlbymib(mib, miblen, (void *)&lextent_size,
-		    &sz, NULL, 0), 0, "Unexpected mallctlbymib failure");
-		extent_size = cache_oblivious ? lextent_size + PAGE :
-		    lextent_size;
+		                &sz, NULL, 0),
+		    0, "Unexpected mallctlbymib failure");
+		extent_size = cache_oblivious ? lextent_size + PAGE
+		                              : lextent_size;
 		floor = sz_psz_quantize_floor(extent_size);
 		ceil = sz_psz_quantize_ceil(extent_size);
 
 		expect_zu_eq(extent_size, floor,
 		    "Extent quantization should be a no-op for precise size "
-		    "(lextent_size=%zu, extent_size=%zu)", lextent_size,
-		    extent_size);
+		    "(lextent_size=%zu, extent_size=%zu)",
+		    lextent_size, extent_size);
 		expect_zu_eq(extent_size, ceil,
 		    "Extent quantization should be a no-op for precise size "
-		    "(lextent_size=%zu, extent_size=%zu)", lextent_size,
-		    extent_size);
+		    "(lextent_size=%zu, extent_size=%zu)",
+		    lextent_size, extent_size);
 
 		if (i > 0) {
 			expect_zu_eq(extent_size_prev,
@@ -85,23 +89,22 @@ TEST_BEGIN(test_large_extent_size) {
 				expect_zu_eq(ceil_prev, extent_size,
 				    "Ceiling should be a precise size "
 				    "(extent_size_prev=%zu, ceil_prev=%zu, "
-				    "extent_size=%zu)", extent_size_prev,
-				    ceil_prev, extent_size);
+				    "extent_size=%zu)",
+				    extent_size_prev, ceil_prev, extent_size);
 			}
 		}
 		if (i + 1 < nlextents) {
 			extent_size_prev = floor;
-			ceil_prev = sz_psz_quantize_ceil(extent_size +
-			    PAGE);
+			ceil_prev = sz_psz_quantize_ceil(extent_size + PAGE);
 		}
 	}
 }
 TEST_END
 
 TEST_BEGIN(test_monotonic) {
-#define SZ_MAX	ZU(4 * 1024 * 1024)
+#define SZ_MAX ZU(4 * 1024 * 1024)
 	unsigned i;
-	size_t floor_prev, ceil_prev;
+	size_t   floor_prev, ceil_prev;
 
 	floor_prev = 0;
 	ceil_prev = 0;
@@ -117,12 +120,15 @@ TEST_BEGIN(test_monotonic) {
 		    floor, extent_size, ceil);
 		expect_zu_ge(ceil, extent_size,
 		    "Ceiling should be >= (floor=%zu, extent_size=%zu, "
-		    "ceil=%zu)", floor, extent_size, ceil);
+		    "ceil=%zu)",
+		    floor, extent_size, ceil);
 
-		expect_zu_le(floor_prev, floor, "Floor should be monotonic "
+		expect_zu_le(floor_prev, floor,
+		    "Floor should be monotonic "
 		    "(floor_prev=%zu, floor=%zu, extent_size=%zu, ceil=%zu)",
 		    floor_prev, floor, extent_size, ceil);
-		expect_zu_le(ceil_prev, ceil, "Ceiling should be monotonic "
+		expect_zu_le(ceil_prev, ceil,
+		    "Ceiling should be monotonic "
 		    "(floor=%zu, extent_size=%zu, ceil_prev=%zu, ceil=%zu)",
 		    floor, extent_size, ceil_prev, ceil);
 
@@ -135,7 +141,5 @@ TEST_END
 int
 main(void) {
 	return test(
-	    test_small_extent_size,
-	    test_large_extent_size,
-	    test_monotonic);
+	    test_small_extent_size, test_large_extent_size, test_monotonic);
 }
diff --git a/test/unit/fb.c b/test/unit/fb.c
index ad72c75a..26a33fd9 100644
--- a/test/unit/fb.c
+++ b/test/unit/fb.c
@@ -5,21 +5,19 @@
 
 static void
 do_test_init(size_t nbits) {
-	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
+	size_t      sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
 	fb_group_t *fb = malloc(sz);
 	/* Junk fb's contents. */
 	memset(fb, 99, sz);
 	fb_init(fb, nbits);
 	for (size_t i = 0; i < nbits; i++) {
-		expect_false(fb_get(fb, nbits, i),
-		    "bitmap should start empty");
+		expect_false(fb_get(fb, nbits, i), "bitmap should start empty");
 	}
 	free(fb);
 }
 
 TEST_BEGIN(test_fb_init) {
-#define NB(nbits) \
-	do_test_init(nbits);
+#define NB(nbits) do_test_init(nbits);
 	NBITS_TAB
 #undef NB
 }
@@ -27,7 +25,7 @@ TEST_END
 
 static void
 do_test_get_set_unset(size_t nbits) {
-	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
+	size_t      sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
 	fb_group_t *fb = malloc(sz);
 	fb_init(fb, nbits);
 	/* Set the bits divisible by 3. */
@@ -56,8 +54,7 @@ do_test_get_set_unset(size_t nbits) {
 }
 
 TEST_BEGIN(test_get_set_unset) {
-#define NB(nbits) \
-	do_test_get_set_unset(nbits);
+#define NB(nbits) do_test_get_set_unset(nbits);
 	NBITS_TAB
 #undef NB
 }
@@ -65,7 +62,7 @@ TEST_END
 
 static ssize_t
 find_3_5_compute(ssize_t i, size_t nbits, bool bit, bool forward) {
-	for(; i < (ssize_t)nbits && i >= 0; i += (forward ? 1 : -1)) {
+	for (; i < (ssize_t)nbits && i >= 0; i += (forward ? 1 : -1)) {
 		bool expected_bit = i % 3 == 0 || i % 5 == 0;
 		if (expected_bit == bit) {
 			return i;
@@ -76,7 +73,7 @@ find_3_5_compute(ssize_t i, size_t nbits, bool bit, bool forward) {
 
 static void
 do_test_search_simple(size_t nbits) {
-	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
+	size_t      sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
 	fb_group_t *fb = malloc(sz);
 	fb_init(fb, nbits);
 
@@ -96,7 +93,7 @@ do_test_search_simple(size_t nbits) {
 		expect_zu_eq(ffs_compute, ffs_search, "ffs mismatch at %zu", i);
 
 		ssize_t fls_compute = find_3_5_compute(i, nbits, true, false);
-		size_t fls_search = fb_fls(fb, nbits, i);
+		size_t  fls_search = fb_fls(fb, nbits, i);
 		expect_zu_eq(fls_compute, fls_search, "fls mismatch at %zu", i);
 
 		size_t ffu_compute = find_3_5_compute(i, nbits, false, true);
@@ -112,8 +109,7 @@ do_test_search_simple(size_t nbits) {
 }
 
 TEST_BEGIN(test_search_simple) {
-#define NB(nbits) \
-	do_test_search_simple(nbits);
+#define NB(nbits) do_test_search_simple(nbits);
 	NBITS_TAB
 #undef NB
 }
@@ -145,15 +141,17 @@ expect_exhaustive_results(fb_group_t *mostly_full, fb_group_t *mostly_empty,
 		    "mismatch at %zu, %zu", position, special_bit);
 		expect_zd_eq(special_bit, fb_fls(mostly_empty, nbits, position),
 		    "mismatch at %zu, %zu", position, special_bit);
-		expect_zu_eq(position + 1, fb_ffu(mostly_empty, nbits, position),
+		expect_zu_eq(position + 1,
+		    fb_ffu(mostly_empty, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+		expect_zd_eq(position - 1,
+		    fb_flu(mostly_empty, nbits, position),
 		    "mismatch at %zu, %zu", position, special_bit);
-		expect_zd_eq(position - 1, fb_flu(mostly_empty, nbits,
-		    position), "mismatch at %zu, %zu", position, special_bit);
 
 		expect_zu_eq(position + 1, fb_ffs(mostly_full, nbits, position),
 		    "mismatch at %zu, %zu", position, special_bit);
-		expect_zd_eq(position - 1, fb_fls(mostly_full, nbits,
-		    position), "mismatch at %zu, %zu", position, special_bit);
+		expect_zd_eq(position - 1, fb_fls(mostly_full, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
 		expect_zu_eq(position, fb_ffu(mostly_full, nbits, position),
 		    "mismatch at %zu, %zu", position, special_bit);
 		expect_zd_eq(position, fb_flu(mostly_full, nbits, position),
@@ -162,8 +160,8 @@ expect_exhaustive_results(fb_group_t *mostly_full, fb_group_t *mostly_empty,
 		/* position > special_bit. */
 		expect_zu_eq(nbits, fb_ffs(mostly_empty, nbits, position),
 		    "mismatch at %zu, %zu", position, special_bit);
-		expect_zd_eq(special_bit, fb_fls(mostly_empty, nbits,
-		    position), "mismatch at %zu, %zu", position, special_bit);
+		expect_zd_eq(special_bit, fb_fls(mostly_empty, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
 		expect_zu_eq(position, fb_ffu(mostly_empty, nbits, position),
 		    "mismatch at %zu, %zu", position, special_bit);
 		expect_zd_eq(position, fb_flu(mostly_empty, nbits, position),
@@ -186,7 +184,7 @@ do_test_search_exhaustive(size_t nbits) {
 	if (nbits > 1000) {
 		return;
 	}
-	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
+	size_t      sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
 	fb_group_t *empty = malloc(sz);
 	fb_init(empty, nbits);
 	fb_group_t *full = malloc(sz);
@@ -209,8 +207,7 @@ do_test_search_exhaustive(size_t nbits) {
 }
 
 TEST_BEGIN(test_search_exhaustive) {
-#define NB(nbits) \
-	do_test_search_exhaustive(nbits);
+#define NB(nbits) do_test_search_exhaustive(nbits);
 	NBITS_TAB
 #undef NB
 }
@@ -222,8 +219,8 @@ TEST_BEGIN(test_range_simple) {
 	 * big enough that usages of things like weirdnum (below) near the
 	 * beginning fit comfortably into the beginning of the bitmap.
 	 */
-	size_t nbits = 64 * 10;
-	size_t ngroups = FB_NGROUPS(nbits);
+	size_t      nbits = 64 * 10;
+	size_t      ngroups = FB_NGROUPS(nbits);
 	fb_group_t *fb = malloc(sizeof(fb_group_t) * ngroups);
 	fb_init(fb, nbits);
 	for (size_t i = 0; i < nbits; i++) {
@@ -255,7 +252,7 @@ TEST_END
 
 static void
 do_test_empty_full_exhaustive(size_t nbits) {
-	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
+	size_t      sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
 	fb_group_t *empty = malloc(sz);
 	fb_init(empty, nbits);
 	fb_group_t *full = malloc(sz);
@@ -273,15 +270,15 @@ do_test_empty_full_exhaustive(size_t nbits) {
 
 		expect_false(fb_empty(empty, nbits), "error at bit %zu", i);
 		if (nbits != 1) {
-			expect_false(fb_full(empty, nbits),
-			    "error at bit %zu", i);
-			expect_false(fb_empty(full, nbits),
-			    "error at bit %zu", i);
+			expect_false(
+			    fb_full(empty, nbits), "error at bit %zu", i);
+			expect_false(
+			    fb_empty(full, nbits), "error at bit %zu", i);
 		} else {
-			expect_true(fb_full(empty, nbits),
-			    "error at bit %zu", i);
-			expect_true(fb_empty(full, nbits),
-			    "error at bit %zu", i);
+			expect_true(
+			    fb_full(empty, nbits), "error at bit %zu", i);
+			expect_true(
+			    fb_empty(full, nbits), "error at bit %zu", i);
 		}
 		expect_false(fb_full(full, nbits), "error at bit %zu", i);
 
@@ -294,8 +291,7 @@ do_test_empty_full_exhaustive(size_t nbits) {
 }
 
 TEST_BEGIN(test_empty_full) {
-#define NB(nbits) \
-	do_test_empty_full_exhaustive(nbits);
+#define NB(nbits) do_test_empty_full_exhaustive(nbits);
 	NBITS_TAB
 #undef NB
 }
@@ -306,8 +302,8 @@ TEST_END
  * built closely on top of it.
  */
 TEST_BEGIN(test_iter_range_simple) {
-	size_t set_limit = 30;
-	size_t nbits = 100;
+	size_t     set_limit = 30;
+	size_t     nbits = 100;
 	fb_group_t fb[FB_NGROUPS(100)];
 
 	fb_init(fb, nbits);
@@ -318,7 +314,7 @@ TEST_BEGIN(test_iter_range_simple) {
 	 */
 	size_t begin = (size_t)-1;
 	size_t len = (size_t)-1;
-	bool result;
+	bool   result;
 
 	/* A set of checks with only the first set_limit bits *set*. */
 	fb_set_range(fb, nbits, 0, set_limit);
@@ -410,7 +406,6 @@ TEST_BEGIN(test_iter_range_simple) {
 		expect_zu_eq(0, begin, "Incorrect begin at %zu", i);
 		expect_zu_eq(set_limit, len, "Incorrect len at %zu", i);
 	}
-
 }
 TEST_END
 
@@ -426,11 +421,11 @@ fb_iter_simple(fb_group_t *fb, size_t nbits, size_t start, size_t *r_begin,
 	ssize_t stride = (forward ? (ssize_t)1 : (ssize_t)-1);
 	ssize_t range_begin = (ssize_t)start;
 	for (; range_begin != (ssize_t)nbits && range_begin != -1;
-	    range_begin += stride) {
+	     range_begin += stride) {
 		if (fb_get(fb, nbits, range_begin) == val) {
 			ssize_t range_end = range_begin;
 			for (; range_end != (ssize_t)nbits && range_end != -1;
-			    range_end += stride) {
+			     range_end += stride) {
 				if (fb_get(fb, nbits, range_end) != val) {
 					break;
 				}
@@ -470,26 +465,26 @@ fb_range_longest_simple(fb_group_t *fb, size_t nbits, bool val) {
 }
 
 static void
-expect_iter_results_at(fb_group_t *fb, size_t nbits, size_t pos,
-    bool val, bool forward) {
-	bool iter_res;
+expect_iter_results_at(
+    fb_group_t *fb, size_t nbits, size_t pos, bool val, bool forward) {
+	bool              iter_res;
 	size_t iter_begin JEMALLOC_CC_SILENCE_INIT(0);
-	size_t iter_len JEMALLOC_CC_SILENCE_INIT(0);
+	size_t iter_len   JEMALLOC_CC_SILENCE_INIT(0);
 	if (val) {
 		if (forward) {
-			iter_res = fb_srange_iter(fb, nbits, pos,
-			    &iter_begin, &iter_len);
+			iter_res = fb_srange_iter(
+			    fb, nbits, pos, &iter_begin, &iter_len);
 		} else {
-			iter_res = fb_srange_riter(fb, nbits, pos,
-			    &iter_begin, &iter_len);
+			iter_res = fb_srange_riter(
+			    fb, nbits, pos, &iter_begin, &iter_len);
 		}
 	} else {
 		if (forward) {
-			iter_res = fb_urange_iter(fb, nbits, pos,
-			    &iter_begin, &iter_len);
+			iter_res = fb_urange_iter(
+			    fb, nbits, pos, &iter_begin, &iter_len);
 		} else {
-			iter_res = fb_urange_riter(fb, nbits, pos,
-			    &iter_begin, &iter_len);
+			iter_res = fb_urange_riter(
+			    fb, nbits, pos, &iter_begin, &iter_len);
 		}
 	}
 
@@ -500,15 +495,15 @@ expect_iter_results_at(fb_group_t *fb, size_t nbits, size_t pos,
 	 */
 	size_t simple_iter_begin = 0;
 	size_t simple_iter_len = 0;
-	simple_iter_res = fb_iter_simple(fb, nbits, pos, &simple_iter_begin,
-	    &simple_iter_len, val, forward);
+	simple_iter_res = fb_iter_simple(
+	    fb, nbits, pos, &simple_iter_begin, &simple_iter_len, val, forward);
 
 	expect_b_eq(iter_res, simple_iter_res, "Result mismatch at %zu", pos);
 	if (iter_res && simple_iter_res) {
 		assert_zu_eq(iter_begin, simple_iter_begin,
 		    "Begin mismatch at %zu", pos);
-		expect_zu_eq(iter_len, simple_iter_len,
-		    "Length mismatch at %zu", pos);
+		expect_zu_eq(
+		    iter_len, simple_iter_len, "Length mismatch at %zu", pos);
 	}
 }
 
@@ -543,7 +538,7 @@ do_test_iter_range_exhaustive(size_t nbits) {
 	if (nbits > 1000) {
 		return;
 	}
-	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
+	size_t      sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
 	fb_group_t *fb = malloc(sz);
 	fb_init(fb, nbits);
 
@@ -558,7 +553,7 @@ do_test_iter_range_exhaustive(size_t nbits) {
 	expect_iter_results(fb, nbits);
 
 	fb_unset_range(fb, nbits, 0, nbits);
-	fb_set_range(fb, nbits, 0, nbits / 2 == 0 ? 1: nbits / 2);
+	fb_set_range(fb, nbits, 0, nbits / 2 == 0 ? 1 : nbits / 2);
 	expect_iter_results(fb, nbits);
 
 	free(fb);
@@ -569,8 +564,7 @@ do_test_iter_range_exhaustive(size_t nbits) {
  * computation.
  */
 TEST_BEGIN(test_iter_range_exhaustive) {
-#define NB(nbits) \
-	do_test_iter_range_exhaustive(nbits);
+#define NB(nbits) do_test_iter_range_exhaustive(nbits);
 	NBITS_TAB
 #undef NB
 }
@@ -581,8 +575,8 @@ TEST_END
  * returns the number of set bits in [scount_start, scount_end).
  */
 static size_t
-scount_contiguous(size_t set_start, size_t set_end, size_t scount_start,
-    size_t scount_end) {
+scount_contiguous(
+    size_t set_start, size_t set_end, size_t scount_start, size_t scount_end) {
 	/* No overlap. */
 	if (set_end <= scount_start || scount_end <= set_start) {
 		return 0;
@@ -611,8 +605,8 @@ scount_contiguous(size_t set_start, size_t set_end, size_t scount_start,
 }
 
 static size_t
-ucount_contiguous(size_t set_start, size_t set_end, size_t ucount_start,
-    size_t ucount_end) {
+ucount_contiguous(
+    size_t set_start, size_t set_end, size_t ucount_start, size_t ucount_end) {
 	/* No overlap. */
 	if (set_end <= ucount_start || ucount_end <= set_start) {
 		return ucount_end - ucount_start;
@@ -641,34 +635,33 @@ ucount_contiguous(size_t set_start, size_t set_end, size_t ucount_start,
 }
 
 static void
-expect_count_match_contiguous(fb_group_t *fb, size_t nbits, size_t set_start,
-    size_t set_end) {
+expect_count_match_contiguous(
+    fb_group_t *fb, size_t nbits, size_t set_start, size_t set_end) {
 	for (size_t i = 0; i < nbits; i++) {
 		for (size_t j = i + 1; j <= nbits; j++) {
 			size_t cnt = j - i;
-			size_t scount_expected = scount_contiguous(set_start,
-			    set_end, i, j);
+			size_t scount_expected = scount_contiguous(
+			    set_start, set_end, i, j);
 			size_t scount_computed = fb_scount(fb, nbits, i, cnt);
 			expect_zu_eq(scount_expected, scount_computed,
 			    "fb_scount error with nbits=%zu, start=%zu, "
 			    "cnt=%zu, with bits set in [%zu, %zu)",
 			    nbits, i, cnt, set_start, set_end);
 
-			size_t ucount_expected = ucount_contiguous(set_start,
-			    set_end, i, j);
+			size_t ucount_expected = ucount_contiguous(
+			    set_start, set_end, i, j);
 			size_t ucount_computed = fb_ucount(fb, nbits, i, cnt);
 			assert_zu_eq(ucount_expected, ucount_computed,
 			    "fb_ucount error with nbits=%zu, start=%zu, "
 			    "cnt=%zu, with bits set in [%zu, %zu)",
 			    nbits, i, cnt, set_start, set_end);
-
 		}
 	}
 }
 
 static void
 do_test_count_contiguous(size_t nbits) {
-	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
+	size_t      sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
 	fb_group_t *fb = malloc(sz);
 
 	fb_init(fb, nbits);
@@ -688,7 +681,7 @@ do_test_count_contiguous(size_t nbits) {
 }
 
 TEST_BEGIN(test_count_contiguous_simple) {
-	enum {nbits = 300};
+	enum { nbits = 300 };
 	fb_group_t fb[FB_NGROUPS(nbits)];
 	fb_init(fb, nbits);
 	/* Just an arbitrary number. */
@@ -718,10 +711,10 @@ TEST_BEGIN(test_count_contiguous_simple) {
 TEST_END
 
 TEST_BEGIN(test_count_contiguous) {
-#define NB(nbits) \
-	/* This test is *particularly* slow in debug builds. */ \
-	if ((!config_debug && nbits < 300) || nbits < 150) { \
-		do_test_count_contiguous(nbits); \
+#define NB(nbits)                                                              \
+	/* This test is *particularly* slow in debug builds. */                \
+	if ((!config_debug && nbits < 300) || nbits < 150) {                   \
+		do_test_count_contiguous(nbits);                               \
 	}
 	NBITS_TAB
 #undef NB
@@ -729,15 +722,15 @@ TEST_BEGIN(test_count_contiguous) {
 TEST_END
 
 static void
-expect_count_match_alternating(fb_group_t *fb_even, fb_group_t *fb_odd,
-    size_t nbits) {
+expect_count_match_alternating(
+    fb_group_t *fb_even, fb_group_t *fb_odd, size_t nbits) {
 	for (size_t i = 0; i < nbits; i++) {
 		for (size_t j = i + 1; j <= nbits; j++) {
 			size_t cnt = j - i;
 			size_t odd_scount = cnt / 2
 			    + (size_t)(cnt % 2 == 1 && i % 2 == 1);
-			size_t odd_scount_computed = fb_scount(fb_odd, nbits,
-			    i, j - i);
+			size_t odd_scount_computed = fb_scount(
+			    fb_odd, nbits, i, j - i);
 			assert_zu_eq(odd_scount, odd_scount_computed,
 			    "fb_scount error with nbits=%zu, start=%zu, "
 			    "cnt=%zu, with alternating bits set.",
@@ -745,8 +738,8 @@ expect_count_match_alternating(fb_group_t *fb_even, fb_group_t *fb_odd,
 
 			size_t odd_ucount = cnt / 2
 			    + (size_t)(cnt % 2 == 1 && i % 2 == 0);
-			size_t odd_ucount_computed = fb_ucount(fb_odd, nbits,
-			    i, j - i);
+			size_t odd_ucount_computed = fb_ucount(
+			    fb_odd, nbits, i, j - i);
 			assert_zu_eq(odd_ucount, odd_ucount_computed,
 			    "fb_ucount error with nbits=%zu, start=%zu, "
 			    "cnt=%zu, with alternating bits set.",
@@ -754,8 +747,8 @@ expect_count_match_alternating(fb_group_t *fb_even, fb_group_t *fb_odd,
 
 			size_t even_scount = cnt / 2
 			    + (size_t)(cnt % 2 == 1 && i % 2 == 0);
-			size_t even_scount_computed = fb_scount(fb_even, nbits,
-			    i, j - i);
+			size_t even_scount_computed = fb_scount(
+			    fb_even, nbits, i, j - i);
 			assert_zu_eq(even_scount, even_scount_computed,
 			    "fb_scount error with nbits=%zu, start=%zu, "
 			    "cnt=%zu, with alternating bits set.",
@@ -763,8 +756,8 @@ expect_count_match_alternating(fb_group_t *fb_even, fb_group_t *fb_odd,
 
 			size_t even_ucount = cnt / 2
 			    + (size_t)(cnt % 2 == 1 && i % 2 == 1);
-			size_t even_ucount_computed = fb_ucount(fb_even, nbits,
-			    i, j - i);
+			size_t even_ucount_computed = fb_ucount(
+			    fb_even, nbits, i, j - i);
 			assert_zu_eq(even_ucount, even_ucount_computed,
 			    "fb_ucount error with nbits=%zu, start=%zu, "
 			    "cnt=%zu, with alternating bits set.",
@@ -778,7 +771,7 @@ do_test_count_alternating(size_t nbits) {
 	if (nbits > 1000) {
 		return;
 	}
-	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
+	size_t      sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
 	fb_group_t *fb_even = malloc(sz);
 	fb_group_t *fb_odd = malloc(sz);
 
@@ -800,8 +793,7 @@ do_test_count_alternating(size_t nbits) {
 }
 
 TEST_BEGIN(test_count_alternating) {
-#define NB(nbits) \
-	do_test_count_alternating(nbits);
+#define NB(nbits) do_test_count_alternating(nbits);
 	NBITS_TAB
 #undef NB
 }
@@ -809,8 +801,9 @@ TEST_END
 
 static void
 do_test_bit_op(size_t nbits, bool (*op)(bool a, bool b),
-    void (*fb_op)(fb_group_t *dst, fb_group_t *src1, fb_group_t *src2, size_t nbits)) {
-	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
+    void (*fb_op)(
+        fb_group_t *dst, fb_group_t *src1, fb_group_t *src2, size_t nbits)) {
+	size_t      sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
 	fb_group_t *fb1 = malloc(sz);
 	fb_group_t *fb2 = malloc(sz);
 	fb_group_t *fb_result = malloc(sz);
@@ -853,8 +846,10 @@ do_test_bit_op(size_t nbits, bool (*op)(bool a, bool b),
 		bool bit2 = ((prng2 & (1ULL << (i % 64))) != 0);
 
 		/* Original bitmaps shouldn't change. */
-		expect_b_eq(bit1, fb_get(fb1, nbits, i), "difference at bit %zu", i);
-		expect_b_eq(bit2, fb_get(fb2, nbits, i), "difference at bit %zu", i);
+		expect_b_eq(
+		    bit1, fb_get(fb1, nbits, i), "difference at bit %zu", i);
+		expect_b_eq(
+		    bit2, fb_get(fb2, nbits, i), "difference at bit %zu", i);
 
 		/* New one should be bitwise and. */
 		expect_b_eq(op(bit1, bit2), fb_get(fb_result, nbits, i),
@@ -883,8 +878,7 @@ do_test_bit_and(size_t nbits) {
 }
 
 TEST_BEGIN(test_bit_and) {
-#define NB(nbits) \
-	do_test_bit_and(nbits);
+#define NB(nbits) do_test_bit_and(nbits);
 	NBITS_TAB
 #undef NB
 }
@@ -901,8 +895,7 @@ do_test_bit_or(size_t nbits) {
 }
 
 TEST_BEGIN(test_bit_or) {
-#define NB(nbits) \
-	do_test_bit_or(nbits);
+#define NB(nbits) do_test_bit_or(nbits);
 	NBITS_TAB
 #undef NB
 }
@@ -915,8 +908,8 @@ binary_not(bool a, bool b) {
 }
 
 static void
-fb_bit_not_shim(fb_group_t *dst, fb_group_t *src1, fb_group_t *src2,
-    size_t nbits) {
+fb_bit_not_shim(
+    fb_group_t *dst, fb_group_t *src1, fb_group_t *src2, size_t nbits) {
 	(void)src2;
 	fb_bit_not(dst, src1, nbits);
 }
@@ -927,8 +920,7 @@ do_test_bit_not(size_t nbits) {
 }
 
 TEST_BEGIN(test_bit_not) {
-#define NB(nbits) \
-	do_test_bit_not(nbits);
+#define NB(nbits) do_test_bit_not(nbits);
 	NBITS_TAB
 #undef NB
 }
@@ -936,19 +928,9 @@ TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_fb_init,
-	    test_get_set_unset,
-	    test_search_simple,
-	    test_search_exhaustive,
-	    test_range_simple,
-	    test_empty_full,
-	    test_iter_range_simple,
-	    test_iter_range_exhaustive,
-	    test_count_contiguous_simple,
-	    test_count_contiguous,
-	    test_count_alternating,
-	    test_bit_and,
-	    test_bit_or,
-	    test_bit_not);
+	return test_no_reentrancy(test_fb_init, test_get_set_unset,
+	    test_search_simple, test_search_exhaustive, test_range_simple,
+	    test_empty_full, test_iter_range_simple, test_iter_range_exhaustive,
+	    test_count_contiguous_simple, test_count_contiguous,
+	    test_count_alternating, test_bit_and, test_bit_or, test_bit_not);
 }
diff --git a/test/unit/fork.c b/test/unit/fork.c
index 1a4c575e..e52d0a6c 100644
--- a/test/unit/fork.c
+++ b/test/unit/fork.c
@@ -8,7 +8,7 @@ TEST_BEGIN(test_fork) {
 
 	/* Set up a manually managed arena for test. */
 	unsigned arena_ind;
-	size_t sz = sizeof(unsigned);
+	size_t   sz = sizeof(unsigned);
 	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
 	    0, "Unexpected mallctl() failure");
 
@@ -16,8 +16,8 @@ TEST_BEGIN(test_fork) {
 	unsigned old_arena_ind;
 	sz = sizeof(old_arena_ind);
 	expect_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz,
-	    (void *)&arena_ind, sizeof(arena_ind)), 0,
-	    "Unexpected mallctl() failure");
+	                (void *)&arena_ind, sizeof(arena_ind)),
+	    0, "Unexpected mallctl() failure");
 
 	p = malloc(1);
 	expect_ptr_not_null(p, "Unexpected malloc() failure");
@@ -108,7 +108,5 @@ TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_fork,
-	    test_fork_multithreaded);
+	return test_no_reentrancy(test_fork, test_fork_multithreaded);
 }
diff --git a/test/unit/fxp.c b/test/unit/fxp.c
index 27f10976..02020efe 100644
--- a/test/unit/fxp.c
+++ b/test/unit/fxp.c
@@ -28,7 +28,7 @@ fxp_close(fxp_t a, fxp_t b) {
 static fxp_t
 xparse_fxp(const char *str) {
 	fxp_t result;
-	bool err = fxp_parse(&result, str, NULL);
+	bool  err = fxp_parse(&result, str, NULL);
 	assert_false(err, "Invalid fxp string: %s", str);
 	return result;
 }
@@ -36,14 +36,14 @@ xparse_fxp(const char *str) {
 static void
 expect_parse_accurate(const char *str, const char *parse_str) {
 	double true_val = strtod(str, NULL);
-	fxp_t fxp_val;
-	char *end;
-	bool err = fxp_parse(&fxp_val, parse_str, &end);
+	fxp_t  fxp_val;
+	char  *end;
+	bool   err = fxp_parse(&fxp_val, parse_str, &end);
 	expect_false(err, "Unexpected parse failure");
-	expect_ptr_eq(parse_str + strlen(str), end,
-	    "Didn't parse whole string");
-	expect_true(double_close(fxp2double(fxp_val), true_val),
-	    "Misparsed %s", str);
+	expect_ptr_eq(
+	    parse_str + strlen(str), end, "Didn't parse whole string");
+	expect_true(
+	    double_close(fxp2double(fxp_val), true_val), "Misparsed %s", str);
 }
 
 static void
@@ -100,12 +100,12 @@ static void
 expect_parse_failure(const char *str) {
 	fxp_t result = FXP_INIT_INT(333);
 	char *end = (void *)0x123;
-	bool err = fxp_parse(&result, str, &end);
+	bool  err = fxp_parse(&result, str, &end);
 	expect_true(err, "Expected a parse error on: %s", str);
-	expect_ptr_eq((void *)0x123, end,
-	    "Parse error shouldn't change results");
-	expect_u32_eq(result, FXP_INIT_INT(333),
-	    "Parse error shouldn't change results");
+	expect_ptr_eq(
+	    (void *)0x123, end, "Parse error shouldn't change results");
+	expect_u32_eq(
+	    result, FXP_INIT_INT(333), "Parse error shouldn't change results");
 }
 
 TEST_BEGIN(test_parse_invalid) {
@@ -129,7 +129,6 @@ expect_init_percent(unsigned percent, const char *str) {
 	    "Expect representations of FXP_INIT_PERCENT(%u) and "
 	    "fxp_parse(\"%s\") to be equal; got %x and %x",
 	    percent, str, result_init, result_parse);
-
 }
 
 /*
@@ -145,12 +144,12 @@ TEST_BEGIN(test_init_percent) {
 TEST_END
 
 static void
-expect_add(const char *astr, const char *bstr, const char* resultstr) {
+expect_add(const char *astr, const char *bstr, const char *resultstr) {
 	fxp_t a = xparse_fxp(astr);
 	fxp_t b = xparse_fxp(bstr);
 	fxp_t result = xparse_fxp(resultstr);
-	expect_true(fxp_close(fxp_add(a, b), result),
-	    "Expected %s + %s == %s", astr, bstr, resultstr);
+	expect_true(fxp_close(fxp_add(a, b), result), "Expected %s + %s == %s",
+	    astr, bstr, resultstr);
 }
 
 TEST_BEGIN(test_add_simple) {
@@ -164,12 +163,12 @@ TEST_BEGIN(test_add_simple) {
 TEST_END
 
 static void
-expect_sub(const char *astr, const char *bstr, const char* resultstr) {
+expect_sub(const char *astr, const char *bstr, const char *resultstr) {
 	fxp_t a = xparse_fxp(astr);
 	fxp_t b = xparse_fxp(bstr);
 	fxp_t result = xparse_fxp(resultstr);
-	expect_true(fxp_close(fxp_sub(a, b), result),
-	    "Expected %s - %s == %s", astr, bstr, resultstr);
+	expect_true(fxp_close(fxp_sub(a, b), result), "Expected %s - %s == %s",
+	    astr, bstr, resultstr);
 }
 
 TEST_BEGIN(test_sub_simple) {
@@ -183,12 +182,12 @@ TEST_BEGIN(test_sub_simple) {
 TEST_END
 
 static void
-expect_mul(const char *astr, const char *bstr, const char* resultstr) {
+expect_mul(const char *astr, const char *bstr, const char *resultstr) {
 	fxp_t a = xparse_fxp(astr);
 	fxp_t b = xparse_fxp(bstr);
 	fxp_t result = xparse_fxp(resultstr);
-	expect_true(fxp_close(fxp_mul(a, b), result),
-	    "Expected %s * %s == %s", astr, bstr, resultstr);
+	expect_true(fxp_close(fxp_mul(a, b), result), "Expected %s * %s == %s",
+	    astr, bstr, resultstr);
 }
 
 TEST_BEGIN(test_mul_simple) {
@@ -202,12 +201,12 @@ TEST_BEGIN(test_mul_simple) {
 TEST_END
 
 static void
-expect_div(const char *astr, const char *bstr, const char* resultstr) {
+expect_div(const char *astr, const char *bstr, const char *resultstr) {
 	fxp_t a = xparse_fxp(astr);
 	fxp_t b = xparse_fxp(bstr);
 	fxp_t result = xparse_fxp(resultstr);
-	expect_true(fxp_close(fxp_div(a, b), result),
-	    "Expected %s / %s == %s", astr, bstr, resultstr);
+	expect_true(fxp_close(fxp_div(a, b), result), "Expected %s / %s == %s",
+	    astr, bstr, resultstr);
 }
 
 TEST_BEGIN(test_div_simple) {
@@ -223,11 +222,11 @@ TEST_END
 
 static void
 expect_round(const char *str, uint32_t rounded_down, uint32_t rounded_nearest) {
-	fxp_t fxp = xparse_fxp(str);
+	fxp_t    fxp = xparse_fxp(str);
 	uint32_t fxp_rounded_down = fxp_round_down(fxp);
 	uint32_t fxp_rounded_nearest = fxp_round_nearest(fxp);
-	expect_u32_eq(rounded_down, fxp_rounded_down,
-	    "Mistake rounding %s down", str);
+	expect_u32_eq(
+	    rounded_down, fxp_rounded_down, "Mistake rounding %s down", str);
 	expect_u32_eq(rounded_nearest, fxp_rounded_nearest,
 	    "Mistake rounding %s to nearest", str);
 }
@@ -248,11 +247,11 @@ TEST_END
 
 static void
 expect_mul_frac(size_t a, const char *fracstr, size_t expected) {
-	fxp_t frac = xparse_fxp(fracstr);
+	fxp_t  frac = xparse_fxp(fracstr);
 	size_t result = fxp_mul_frac(a, frac);
 	expect_true(double_close(expected, result),
-	    "Expected %zu * %s == %zu (fracmul); got %zu", a, fracstr,
-	    expected, result);
+	    "Expected %zu * %s == %zu (fracmul); got %zu", a, fracstr, expected,
+	    result);
 }
 
 TEST_BEGIN(test_mul_frac_simple) {
@@ -273,7 +272,7 @@ TEST_END
 static void
 expect_print(const char *str) {
 	fxp_t fxp = xparse_fxp(str);
-	char buf[FXP_BUF_SIZE];
+	char  buf[FXP_BUF_SIZE];
 	fxp_print(fxp, buf);
 	expect_d_eq(0, strcmp(str, buf), "Couldn't round-trip print %s", str);
 }
@@ -298,33 +297,32 @@ TEST_BEGIN(test_print_simple) {
 TEST_END
 
 TEST_BEGIN(test_stress) {
-	const char *numbers[] = {
-		"0.0", "0.1", "0.2", "0.3", "0.4",
-		"0.5", "0.6", "0.7", "0.8", "0.9",
+	const char *numbers[] = {"0.0", "0.1", "0.2", "0.3", "0.4", "0.5",
+	    "0.6", "0.7", "0.8", "0.9",
 
-		"1.0", "1.1", "1.2", "1.3", "1.4",
-		"1.5", "1.6", "1.7", "1.8", "1.9",
+	    "1.0", "1.1", "1.2", "1.3", "1.4", "1.5", "1.6", "1.7", "1.8",
+	    "1.9",
 
-		"2.0", "2.1", "2.2", "2.3", "2.4",
-		"2.5", "2.6", "2.7", "2.8", "2.9",
+	    "2.0", "2.1", "2.2", "2.3", "2.4", "2.5", "2.6", "2.7", "2.8",
+	    "2.9",
 
-		"17.0", "17.1", "17.2", "17.3", "17.4",
-		"17.5", "17.6", "17.7", "17.8", "17.9",
+	    "17.0", "17.1", "17.2", "17.3", "17.4", "17.5", "17.6", "17.7",
+	    "17.8", "17.9",
 
-		"18.0", "18.1", "18.2", "18.3", "18.4",
-		"18.5", "18.6", "18.7", "18.8", "18.9",
+	    "18.0", "18.1", "18.2", "18.3", "18.4", "18.5", "18.6", "18.7",
+	    "18.8", "18.9",
 
-		"123.0", "123.1", "123.2", "123.3", "123.4",
-		"123.5", "123.6", "123.7", "123.8", "123.9",
+	    "123.0", "123.1", "123.2", "123.3", "123.4", "123.5", "123.6",
+	    "123.7", "123.8", "123.9",
 
-		"124.0", "124.1", "124.2", "124.3", "124.4",
-		"124.5", "124.6", "124.7", "124.8", "124.9",
+	    "124.0", "124.1", "124.2", "124.3", "124.4", "124.5", "124.6",
+	    "124.7", "124.8", "124.9",
 
-		"125.0", "125.1", "125.2", "125.3", "125.4",
-		"125.5", "125.6", "125.7", "125.8", "125.9"};
-	size_t numbers_len = sizeof(numbers)/sizeof(numbers[0]);
+	    "125.0", "125.1", "125.2", "125.3", "125.4", "125.5", "125.6",
+	    "125.7", "125.8", "125.9"};
+	size_t      numbers_len = sizeof(numbers) / sizeof(numbers[0]);
 	for (size_t i = 0; i < numbers_len; i++) {
-		fxp_t fxp_a = xparse_fxp(numbers[i]);
+		fxp_t  fxp_a = xparse_fxp(numbers[i]);
 		double double_a = strtod(numbers[i], NULL);
 
 		uint32_t fxp_rounded_down = fxp_round_down(fxp_a);
@@ -338,37 +336,35 @@ TEST_BEGIN(test_stress) {
 		    "Incorrectly rounded-to-nearest %s", numbers[i]);
 
 		for (size_t j = 0; j < numbers_len; j++) {
-			fxp_t fxp_b = xparse_fxp(numbers[j]);
+			fxp_t  fxp_b = xparse_fxp(numbers[j]);
 			double double_b = strtod(numbers[j], NULL);
 
-			fxp_t fxp_sum = fxp_add(fxp_a, fxp_b);
+			fxp_t  fxp_sum = fxp_add(fxp_a, fxp_b);
 			double double_sum = double_a + double_b;
 			expect_true(
 			    double_close(fxp2double(fxp_sum), double_sum),
 			    "Miscomputed %s + %s", numbers[i], numbers[j]);
 
 			if (double_a > double_b) {
-				fxp_t fxp_diff = fxp_sub(fxp_a, fxp_b);
+				fxp_t  fxp_diff = fxp_sub(fxp_a, fxp_b);
 				double double_diff = double_a - double_b;
-				expect_true(
-				    double_close(fxp2double(fxp_diff),
-				    double_diff),
+				expect_true(double_close(fxp2double(fxp_diff),
+				                double_diff),
 				    "Miscomputed %s - %s", numbers[i],
 				    numbers[j]);
 			}
 
-			fxp_t fxp_prod = fxp_mul(fxp_a, fxp_b);
+			fxp_t  fxp_prod = fxp_mul(fxp_a, fxp_b);
 			double double_prod = double_a * double_b;
 			expect_true(
 			    double_close(fxp2double(fxp_prod), double_prod),
 			    "Miscomputed %s * %s", numbers[i], numbers[j]);
 
 			if (double_b != 0.0) {
-				fxp_t fxp_quot = fxp_div(fxp_a, fxp_b);
+				fxp_t  fxp_quot = fxp_div(fxp_a, fxp_b);
 				double double_quot = double_a / double_b;
-				expect_true(
-				    double_close(fxp2double(fxp_quot),
-				    double_quot),
+				expect_true(double_close(fxp2double(fxp_quot),
+				                double_quot),
 				    "Miscomputed %s / %s", numbers[i],
 				    numbers[j]);
 			}
@@ -379,16 +375,8 @@ TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_parse_valid,
-	    test_parse_invalid,
-	    test_init_percent,
-	    test_add_simple,
-	    test_sub_simple,
-	    test_mul_simple,
-	    test_div_simple,
-	    test_round_simple,
-	    test_mul_frac_simple,
-	    test_print_simple,
-	    test_stress);
+	return test_no_reentrancy(test_parse_valid, test_parse_invalid,
+	    test_init_percent, test_add_simple, test_sub_simple,
+	    test_mul_simple, test_div_simple, test_round_simple,
+	    test_mul_frac_simple, test_print_simple, test_stress);
 }
diff --git a/test/unit/hash.c b/test/unit/hash.c
index 7276333d..e39110fc 100644
--- a/test/unit/hash.c
+++ b/test/unit/hash.c
@@ -39,24 +39,32 @@ typedef enum {
 static int
 hash_variant_bits(hash_variant_t variant) {
 	switch (variant) {
-	case hash_variant_x86_32: return 32;
-	case hash_variant_x86_128: return 128;
-	case hash_variant_x64_128: return 128;
-	default: not_reached();
+	case hash_variant_x86_32:
+		return 32;
+	case hash_variant_x86_128:
+		return 128;
+	case hash_variant_x64_128:
+		return 128;
+	default:
+		not_reached();
 	}
 }
 
 static const char *
 hash_variant_string(hash_variant_t variant) {
 	switch (variant) {
-	case hash_variant_x86_32: return "hash_x86_32";
-	case hash_variant_x86_128: return "hash_x86_128";
-	case hash_variant_x64_128: return "hash_x64_128";
-	default: not_reached();
+	case hash_variant_x86_32:
+		return "hash_x86_32";
+	case hash_variant_x86_128:
+		return "hash_x86_128";
+	case hash_variant_x64_128:
+		return "hash_x64_128";
+	default:
+		not_reached();
 	}
 }
 
-#define KEY_SIZE	256
+#define KEY_SIZE 256
 static void
 hash_variant_verify_key(hash_variant_t variant, uint8_t *key) {
 	const int hashbytes = hash_variant_bits(variant) / 8;
@@ -79,20 +87,24 @@ hash_variant_verify_key(hash_variant_t variant, uint8_t *key) {
 		switch (variant) {
 		case hash_variant_x86_32: {
 			uint32_t out;
-			out = hash_x86_32(key, i, 256-i);
-			memcpy(&hashes[i*hashbytes], &out, hashbytes);
+			out = hash_x86_32(key, i, 256 - i);
+			memcpy(&hashes[i * hashbytes], &out, hashbytes);
 			break;
-		} case hash_variant_x86_128: {
+		}
+		case hash_variant_x86_128: {
 			uint64_t out[2];
-			hash_x86_128(key, i, 256-i, out);
-			memcpy(&hashes[i*hashbytes], out, hashbytes);
+			hash_x86_128(key, i, 256 - i, out);
+			memcpy(&hashes[i * hashbytes], out, hashbytes);
 			break;
-		} case hash_variant_x64_128: {
+		}
+		case hash_variant_x64_128: {
 			uint64_t out[2];
-			hash_x64_128(key, i, 256-i, out);
-			memcpy(&hashes[i*hashbytes], out, hashbytes);
+			hash_x64_128(key, i, 256 - i, out);
+			memcpy(&hashes[i * hashbytes], out, hashbytes);
 			break;
-		} default: not_reached();
+		}
+		default:
+			not_reached();
 		}
 	}
 
@@ -102,36 +114,50 @@ hash_variant_verify_key(hash_variant_t variant, uint8_t *key) {
 		uint32_t out = hash_x86_32(hashes, hashes_size, 0);
 		memcpy(final, &out, sizeof(out));
 		break;
-	} case hash_variant_x86_128: {
+	}
+	case hash_variant_x86_128: {
 		uint64_t out[2];
 		hash_x86_128(hashes, hashes_size, 0, out);
 		memcpy(final, out, sizeof(out));
 		break;
-	} case hash_variant_x64_128: {
+	}
+	case hash_variant_x64_128: {
 		uint64_t out[2];
 		hash_x64_128(hashes, hashes_size, 0, out);
 		memcpy(final, out, sizeof(out));
 		break;
-	} default: not_reached();
+	}
+	default:
+		not_reached();
 	}
 
-	computed =
-	    ((uint32_t)final[0] << 0) |
-	    ((uint32_t)final[1] << 8) |
-	    ((uint32_t)final[2] << 16) |
-	    ((uint32_t)final[3] << 24);
+	computed = ((uint32_t) final[0] << 0) | ((uint32_t) final[1] << 8)
+	    | ((uint32_t) final[2] << 16) | ((uint32_t) final[3] << 24);
 
 	switch (variant) {
 #ifdef JEMALLOC_BIG_ENDIAN
-	case hash_variant_x86_32: expected = 0x6213303eU; break;
-	case hash_variant_x86_128: expected = 0x266820caU; break;
-	case hash_variant_x64_128: expected = 0xcc622b6fU; break;
+	case hash_variant_x86_32:
+		expected = 0x6213303eU;
+		break;
+	case hash_variant_x86_128:
+		expected = 0x266820caU;
+		break;
+	case hash_variant_x64_128:
+		expected = 0xcc622b6fU;
+		break;
 #else
-	case hash_variant_x86_32: expected = 0xb0f57ee3U; break;
-	case hash_variant_x86_128: expected = 0xb3ece62aU; break;
-	case hash_variant_x64_128: expected = 0x6384ba69U; break;
+	case hash_variant_x86_32:
+		expected = 0xb0f57ee3U;
+		break;
+	case hash_variant_x86_128:
+		expected = 0xb3ece62aU;
+		break;
+	case hash_variant_x64_128:
+		expected = 0x6384ba69U;
+		break;
 #endif
-	default: not_reached();
+	default:
+		not_reached();
 	}
 
 	expect_u32_eq(computed, expected,
@@ -141,8 +167,8 @@ hash_variant_verify_key(hash_variant_t variant, uint8_t *key) {
 
 static void
 hash_variant_verify(hash_variant_t variant) {
-#define MAX_ALIGN	16
-	uint8_t key[KEY_SIZE + (MAX_ALIGN - 1)];
+#define MAX_ALIGN 16
+	uint8_t  key[KEY_SIZE + (MAX_ALIGN - 1)];
 	unsigned i;
 
 	for (i = 0; i < MAX_ALIGN; i++) {
@@ -169,8 +195,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_hash_x86_32,
-	    test_hash_x86_128,
-	    test_hash_x64_128);
+	return test(test_hash_x86_32, test_hash_x86_128, test_hash_x64_128);
 }
diff --git a/test/unit/hook.c b/test/unit/hook.c
index f2a7f190..3a6b3c13 100644
--- a/test/unit/hook.c
+++ b/test/unit/hook.c
@@ -2,12 +2,12 @@
 
 #include "jemalloc/internal/hook.h"
 
-static void *arg_extra;
-static int arg_type;
-static void *arg_result;
-static void *arg_address;
-static size_t arg_old_usize;
-static size_t arg_new_usize;
+static void     *arg_extra;
+static int       arg_type;
+static void     *arg_result;
+static void     *arg_address;
+static size_t    arg_old_usize;
+static size_t    arg_new_usize;
 static uintptr_t arg_result_raw;
 static uintptr_t arg_args_raw[4];
 
@@ -71,8 +71,8 @@ set_args_raw(uintptr_t *args_raw, int nargs) {
 
 static void
 expect_args_raw(uintptr_t *args_raw_expected, int nargs) {
-	int cmp = memcmp(args_raw_expected, arg_args_raw,
-	    sizeof(uintptr_t) * nargs);
+	int cmp = memcmp(
+	    args_raw_expected, arg_args_raw, sizeof(uintptr_t) * nargs);
 	expect_d_eq(cmp, 0, "Raw args mismatch");
 }
 
@@ -95,8 +95,8 @@ test_alloc_hook(void *extra, hook_alloc_t type, void *result,
 }
 
 static void
-test_dalloc_hook(void *extra, hook_dalloc_t type, void *address,
-    uintptr_t args_raw[3]) {
+test_dalloc_hook(
+    void *extra, hook_dalloc_t type, void *address, uintptr_t args_raw[3]) {
 	call_count++;
 	arg_extra = extra;
 	arg_type = (int)type;
@@ -122,16 +122,15 @@ test_expand_hook(void *extra, hook_expand_t type, void *address,
 
 TEST_BEGIN(test_hooks_basic) {
 	/* Just verify that the record their arguments correctly. */
-	hooks_t hooks = {
-		&test_alloc_hook, &test_dalloc_hook, &test_expand_hook,
-		(void *)111};
-	void *handle = hook_install(TSDN_NULL, &hooks);
+	hooks_t hooks = {&test_alloc_hook, &test_dalloc_hook, &test_expand_hook,
+	    (void *)111};
+	void   *handle = hook_install(TSDN_NULL, &hooks);
 	uintptr_t args_raw[4] = {10, 20, 30, 40};
 
 	/* Alloc */
 	reset_args();
-	hook_invoke_alloc(hook_alloc_posix_memalign, (void *)222, 333,
-	    args_raw);
+	hook_invoke_alloc(
+	    hook_alloc_posix_memalign, (void *)222, 333, args_raw);
 	expect_ptr_eq(arg_extra, (void *)111, "Passed wrong user pointer");
 	expect_d_eq((int)hook_alloc_posix_memalign, arg_type,
 	    "Passed wrong alloc type");
@@ -142,18 +141,18 @@ TEST_BEGIN(test_hooks_basic) {
 	/* Dalloc */
 	reset_args();
 	hook_invoke_dalloc(hook_dalloc_sdallocx, (void *)222, args_raw);
-	expect_d_eq((int)hook_dalloc_sdallocx, arg_type,
-	    "Passed wrong dalloc type");
+	expect_d_eq(
+	    (int)hook_dalloc_sdallocx, arg_type, "Passed wrong dalloc type");
 	expect_ptr_eq((void *)111, arg_extra, "Passed wrong user pointer");
 	expect_ptr_eq((void *)222, arg_address, "Passed wrong address");
 	expect_args_raw(args_raw, 3);
 
 	/* Expand */
 	reset_args();
-	hook_invoke_expand(hook_expand_xallocx, (void *)222, 333, 444, 555,
-	    args_raw);
-	expect_d_eq((int)hook_expand_xallocx, arg_type,
-	    "Passed wrong expand type");
+	hook_invoke_expand(
+	    hook_expand_xallocx, (void *)222, 333, 444, 555, args_raw);
+	expect_d_eq(
+	    (int)hook_expand_xallocx, arg_type, "Passed wrong expand type");
 	expect_ptr_eq((void *)111, arg_extra, "Passed wrong user pointer");
 	expect_ptr_eq((void *)222, arg_address, "Passed wrong address");
 	expect_zu_eq(333, arg_old_usize, "Passed wrong old usize");
@@ -205,7 +204,7 @@ TEST_END
 
 TEST_BEGIN(test_hooks_remove) {
 	hooks_t hooks = {&test_alloc_hook, NULL, NULL, NULL};
-	void *handle = hook_install(TSDN_NULL, &hooks);
+	void   *handle = hook_install(TSDN_NULL, &hooks);
 	expect_ptr_ne(handle, NULL, "Hook installation failed");
 	call_count = 0;
 	uintptr_t args_raw[4] = {10, 20, 30, 40};
@@ -216,14 +215,13 @@ TEST_BEGIN(test_hooks_remove) {
 	hook_remove(TSDN_NULL, handle);
 	hook_invoke_alloc(hook_alloc_malloc, NULL, 0, NULL);
 	expect_d_eq(call_count, 0, "Hook invoked after removal");
-
 }
 TEST_END
 
 TEST_BEGIN(test_hooks_alloc_simple) {
 	/* "Simple" in the sense that we're not in a realloc variant. */
 	hooks_t hooks = {&test_alloc_hook, NULL, NULL, (void *)123};
-	void *handle = hook_install(TSDN_NULL, &hooks);
+	void   *handle = hook_install(TSDN_NULL, &hooks);
 	expect_ptr_ne(handle, NULL, "Hook installation failed");
 
 	/* Stop malloc from being optimized away. */
@@ -237,8 +235,8 @@ TEST_BEGIN(test_hooks_alloc_simple) {
 	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
 	expect_d_eq(arg_type, (int)hook_alloc_malloc, "Wrong hook type");
 	expect_ptr_eq(ptr, arg_result, "Wrong result");
-	expect_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
-	    "Wrong raw result");
+	expect_u64_eq(
+	    (uintptr_t)ptr, (uintptr_t)arg_result_raw, "Wrong raw result");
 	expect_u64_eq((uintptr_t)1, arg_args_raw[0], "Wrong argument");
 	free(ptr);
 
@@ -247,11 +245,11 @@ TEST_BEGIN(test_hooks_alloc_simple) {
 	err = posix_memalign((void **)&ptr, 1024, 1);
 	expect_d_eq(call_count, 1, "Hook not called");
 	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
-	expect_d_eq(arg_type, (int)hook_alloc_posix_memalign,
-	    "Wrong hook type");
+	expect_d_eq(
+	    arg_type, (int)hook_alloc_posix_memalign, "Wrong hook type");
 	expect_ptr_eq(ptr, arg_result, "Wrong result");
-	expect_u64_eq((uintptr_t)err, (uintptr_t)arg_result_raw,
-	    "Wrong raw result");
+	expect_u64_eq(
+	    (uintptr_t)err, (uintptr_t)arg_result_raw, "Wrong raw result");
 	expect_u64_eq((uintptr_t)&ptr, arg_args_raw[0], "Wrong argument");
 	expect_u64_eq((uintptr_t)1024, arg_args_raw[1], "Wrong argument");
 	expect_u64_eq((uintptr_t)1, arg_args_raw[2], "Wrong argument");
@@ -262,11 +260,10 @@ TEST_BEGIN(test_hooks_alloc_simple) {
 	ptr = aligned_alloc(1024, 1);
 	expect_d_eq(call_count, 1, "Hook not called");
 	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
-	expect_d_eq(arg_type, (int)hook_alloc_aligned_alloc,
-	    "Wrong hook type");
+	expect_d_eq(arg_type, (int)hook_alloc_aligned_alloc, "Wrong hook type");
 	expect_ptr_eq(ptr, arg_result, "Wrong result");
-	expect_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
-	    "Wrong raw result");
+	expect_u64_eq(
+	    (uintptr_t)ptr, (uintptr_t)arg_result_raw, "Wrong raw result");
 	expect_u64_eq((uintptr_t)1024, arg_args_raw[0], "Wrong argument");
 	expect_u64_eq((uintptr_t)1, arg_args_raw[1], "Wrong argument");
 	free(ptr);
@@ -278,8 +275,8 @@ TEST_BEGIN(test_hooks_alloc_simple) {
 	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
 	expect_d_eq(arg_type, (int)hook_alloc_calloc, "Wrong hook type");
 	expect_ptr_eq(ptr, arg_result, "Wrong result");
-	expect_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
-	    "Wrong raw result");
+	expect_u64_eq(
+	    (uintptr_t)ptr, (uintptr_t)arg_result_raw, "Wrong raw result");
 	expect_u64_eq((uintptr_t)11, arg_args_raw[0], "Wrong argument");
 	expect_u64_eq((uintptr_t)13, arg_args_raw[1], "Wrong argument");
 	free(ptr);
@@ -292,8 +289,8 @@ TEST_BEGIN(test_hooks_alloc_simple) {
 	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
 	expect_d_eq(arg_type, (int)hook_alloc_memalign, "Wrong hook type");
 	expect_ptr_eq(ptr, arg_result, "Wrong result");
-	expect_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
-	    "Wrong raw result");
+	expect_u64_eq(
+	    (uintptr_t)ptr, (uintptr_t)arg_result_raw, "Wrong raw result");
 	expect_u64_eq((uintptr_t)1024, arg_args_raw[0], "Wrong argument");
 	expect_u64_eq((uintptr_t)1, arg_args_raw[1], "Wrong argument");
 	free(ptr);
@@ -307,8 +304,8 @@ TEST_BEGIN(test_hooks_alloc_simple) {
 	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
 	expect_d_eq(arg_type, (int)hook_alloc_valloc, "Wrong hook type");
 	expect_ptr_eq(ptr, arg_result, "Wrong result");
-	expect_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
-	    "Wrong raw result");
+	expect_u64_eq(
+	    (uintptr_t)ptr, (uintptr_t)arg_result_raw, "Wrong raw result");
 	expect_u64_eq((uintptr_t)1, arg_args_raw[0], "Wrong argument");
 	free(ptr);
 #endif /* JEMALLOC_OVERRIDE_VALLOC */
@@ -321,8 +318,8 @@ TEST_BEGIN(test_hooks_alloc_simple) {
 	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
 	expect_d_eq(arg_type, (int)hook_alloc_pvalloc, "Wrong hook type");
 	expect_ptr_eq(ptr, arg_result, "Wrong result");
-	expect_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
-	    "Wrong raw result");
+	expect_u64_eq(
+	    (uintptr_t)ptr, (uintptr_t)arg_result_raw, "Wrong raw result");
 	expect_u64_eq((uintptr_t)1, arg_args_raw[0], "Wrong argument");
 	free(ptr);
 #endif /* JEMALLOC_OVERRIDE_PVALLOC */
@@ -334,11 +331,11 @@ TEST_BEGIN(test_hooks_alloc_simple) {
 	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
 	expect_d_eq(arg_type, (int)hook_alloc_mallocx, "Wrong hook type");
 	expect_ptr_eq(ptr, arg_result, "Wrong result");
-	expect_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
-	    "Wrong raw result");
+	expect_u64_eq(
+	    (uintptr_t)ptr, (uintptr_t)arg_result_raw, "Wrong raw result");
 	expect_u64_eq((uintptr_t)1, arg_args_raw[0], "Wrong argument");
-	expect_u64_eq((uintptr_t)MALLOCX_LG_ALIGN(10), arg_args_raw[1],
-	    "Wrong flags");
+	expect_u64_eq(
+	    (uintptr_t)MALLOCX_LG_ALIGN(10), arg_args_raw[1], "Wrong flags");
 	free(ptr);
 
 	hook_remove(TSDN_NULL, handle);
@@ -348,7 +345,7 @@ TEST_END
 TEST_BEGIN(test_hooks_dalloc_simple) {
 	/* "Simple" in the sense that we're not in a realloc variant. */
 	hooks_t hooks = {NULL, &test_dalloc_hook, NULL, (void *)123};
-	void *handle = hook_install(TSDN_NULL, &hooks);
+	void   *handle = hook_install(TSDN_NULL, &hooks);
 	expect_ptr_ne(handle, NULL, "Hook installation failed");
 
 	void *volatile ptr;
@@ -372,8 +369,8 @@ TEST_BEGIN(test_hooks_dalloc_simple) {
 	expect_d_eq(arg_type, (int)hook_dalloc_dallocx, "Wrong hook type");
 	expect_ptr_eq(ptr, arg_address, "Wrong pointer freed");
 	expect_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong raw arg");
-	expect_u64_eq((uintptr_t)MALLOCX_TCACHE_NONE, arg_args_raw[1],
-	    "Wrong raw arg");
+	expect_u64_eq(
+	    (uintptr_t)MALLOCX_TCACHE_NONE, arg_args_raw[1], "Wrong raw arg");
 
 	/* sdallocx() */
 	reset();
@@ -385,8 +382,8 @@ TEST_BEGIN(test_hooks_dalloc_simple) {
 	expect_ptr_eq(ptr, arg_address, "Wrong pointer freed");
 	expect_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong raw arg");
 	expect_u64_eq((uintptr_t)1, arg_args_raw[1], "Wrong raw arg");
-	expect_u64_eq((uintptr_t)MALLOCX_TCACHE_NONE, arg_args_raw[2],
-	    "Wrong raw arg");
+	expect_u64_eq(
+	    (uintptr_t)MALLOCX_TCACHE_NONE, arg_args_raw[2], "Wrong raw arg");
 
 	hook_remove(TSDN_NULL, handle);
 }
@@ -395,7 +392,7 @@ TEST_END
 TEST_BEGIN(test_hooks_expand_simple) {
 	/* "Simple" in the sense that we're not in a realloc variant. */
 	hooks_t hooks = {NULL, NULL, &test_expand_hook, (void *)123};
-	void *handle = hook_install(TSDN_NULL, &hooks);
+	void   *handle = hook_install(TSDN_NULL, &hooks);
 	expect_ptr_ne(handle, NULL, "Hook installation failed");
 
 	void *volatile ptr;
@@ -421,9 +418,9 @@ TEST_BEGIN(test_hooks_expand_simple) {
 TEST_END
 
 TEST_BEGIN(test_hooks_realloc_as_malloc_or_free) {
-	hooks_t hooks = {&test_alloc_hook, &test_dalloc_hook,
-		&test_expand_hook, (void *)123};
-	void *handle = hook_install(TSDN_NULL, &hooks);
+	hooks_t hooks = {&test_alloc_hook, &test_dalloc_hook, &test_expand_hook,
+	    (void *)123};
+	void   *handle = hook_install(TSDN_NULL, &hooks);
 	expect_ptr_ne(handle, NULL, "Hook installation failed");
 
 	void *volatile ptr;
@@ -435,8 +432,8 @@ TEST_BEGIN(test_hooks_realloc_as_malloc_or_free) {
 	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
 	expect_d_eq(arg_type, (int)hook_alloc_realloc, "Wrong hook type");
 	expect_ptr_eq(ptr, arg_result, "Wrong result");
-	expect_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
-	    "Wrong raw result");
+	expect_u64_eq(
+	    (uintptr_t)ptr, (uintptr_t)arg_result_raw, "Wrong raw result");
 	expect_u64_eq((uintptr_t)NULL, arg_args_raw[0], "Wrong argument");
 	expect_u64_eq((uintptr_t)1, arg_args_raw[1], "Wrong argument");
 	free(ptr);
@@ -448,14 +445,11 @@ TEST_BEGIN(test_hooks_realloc_as_malloc_or_free) {
 		realloc(ptr, 0);
 		expect_d_eq(call_count, 1, "Hook not called");
 		expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
-		expect_d_eq(arg_type, (int)hook_dalloc_realloc,
-		    "Wrong hook type");
-		expect_ptr_eq(ptr, arg_address,
-		    "Wrong pointer freed");
-		expect_u64_eq((uintptr_t)ptr, arg_args_raw[0],
-		    "Wrong raw arg");
-		expect_u64_eq((uintptr_t)0, arg_args_raw[1],
-		    "Wrong raw arg");
+		expect_d_eq(
+		    arg_type, (int)hook_dalloc_realloc, "Wrong hook type");
+		expect_ptr_eq(ptr, arg_address, "Wrong pointer freed");
+		expect_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong raw arg");
+		expect_u64_eq((uintptr_t)0, arg_args_raw[1], "Wrong raw arg");
 	}
 
 	/* realloc(NULL, 0) as malloc(0) */
@@ -465,8 +459,8 @@ TEST_BEGIN(test_hooks_realloc_as_malloc_or_free) {
 	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
 	expect_d_eq(arg_type, (int)hook_alloc_realloc, "Wrong hook type");
 	expect_ptr_eq(ptr, arg_result, "Wrong result");
-	expect_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
-	    "Wrong raw result");
+	expect_u64_eq(
+	    (uintptr_t)ptr, (uintptr_t)arg_result_raw, "Wrong raw result");
 	expect_u64_eq((uintptr_t)NULL, arg_args_raw[0], "Wrong argument");
 	expect_u64_eq((uintptr_t)0, arg_args_raw[1], "Wrong argument");
 	free(ptr);
@@ -478,9 +472,9 @@ TEST_END
 static void
 do_realloc_test(void *(*ralloc)(void *, size_t, int), int flags,
     int expand_type, int dalloc_type) {
-	hooks_t hooks = {&test_alloc_hook, &test_dalloc_hook,
-		&test_expand_hook, (void *)123};
-	void *handle = hook_install(TSDN_NULL, &hooks);
+	hooks_t hooks = {&test_alloc_hook, &test_dalloc_hook, &test_expand_hook,
+	    (void *)123};
+	void   *handle = hook_install(TSDN_NULL, &hooks);
 	expect_ptr_ne(handle, NULL, "Hook installation failed");
 
 	void *volatile ptr;
@@ -496,8 +490,8 @@ do_realloc_test(void *(*ralloc)(void *, size_t, int), int flags,
 	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
 	expect_d_eq(arg_type, expand_type, "Wrong hook type");
 	expect_ptr_eq(ptr, arg_address, "Wrong address");
-	expect_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
-	    "Wrong raw result");
+	expect_u64_eq(
+	    (uintptr_t)ptr, (uintptr_t)arg_result_raw, "Wrong raw result");
 	expect_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong argument");
 	expect_u64_eq((uintptr_t)130, arg_args_raw[1], "Wrong argument");
 	free(ptr);
@@ -522,11 +516,11 @@ do_realloc_test(void *(*ralloc)(void *, size_t, int), int flags,
 	}
 	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
 	expect_ptr_eq(ptr2, arg_address, "Wrong address");
-	expect_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
-	    "Wrong raw result");
+	expect_u64_eq(
+	    (uintptr_t)ptr, (uintptr_t)arg_result_raw, "Wrong raw result");
 	expect_u64_eq((uintptr_t)ptr2, arg_args_raw[0], "Wrong argument");
-	expect_u64_eq((uintptr_t)2 * 1024 * 1024, arg_args_raw[1],
-	    "Wrong argument");
+	expect_u64_eq(
+	    (uintptr_t)2 * 1024 * 1024, arg_args_raw[1], "Wrong argument");
 	free(ptr);
 
 	/* Realloc with move, small. */
@@ -540,8 +534,8 @@ do_realloc_test(void *(*ralloc)(void *, size_t, int), int flags,
 	expect_d_eq(arg_type, dalloc_type, "Wrong hook type");
 	expect_ptr_eq(ptr, arg_address, "Wrong address");
 	expect_ptr_eq(ptr2, arg_result, "Wrong address");
-	expect_u64_eq((uintptr_t)ptr2, (uintptr_t)arg_result_raw,
-	    "Wrong raw result");
+	expect_u64_eq(
+	    (uintptr_t)ptr2, (uintptr_t)arg_result_raw, "Wrong raw result");
 	expect_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong argument");
 	expect_u64_eq((uintptr_t)128, arg_args_raw[1], "Wrong argument");
 	free(ptr2);
@@ -557,11 +551,11 @@ do_realloc_test(void *(*ralloc)(void *, size_t, int), int flags,
 	expect_d_eq(arg_type, dalloc_type, "Wrong hook type");
 	expect_ptr_eq(ptr, arg_address, "Wrong address");
 	expect_ptr_eq(ptr2, arg_result, "Wrong address");
-	expect_u64_eq((uintptr_t)ptr2, (uintptr_t)arg_result_raw,
-	    "Wrong raw result");
+	expect_u64_eq(
+	    (uintptr_t)ptr2, (uintptr_t)arg_result_raw, "Wrong raw result");
 	expect_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong argument");
-	expect_u64_eq((uintptr_t)2 * 1024 * 1024, arg_args_raw[1],
-	    "Wrong argument");
+	expect_u64_eq(
+	    (uintptr_t)2 * 1024 * 1024, arg_args_raw[1], "Wrong argument");
 	free(ptr2);
 
 	hook_remove(TSDN_NULL, handle);
@@ -573,8 +567,8 @@ realloc_wrapper(void *ptr, size_t size, UNUSED int flags) {
 }
 
 TEST_BEGIN(test_hooks_realloc) {
-	do_realloc_test(&realloc_wrapper, 0, hook_expand_realloc,
-	    hook_dalloc_realloc);
+	do_realloc_test(
+	    &realloc_wrapper, 0, hook_expand_realloc, hook_dalloc_realloc);
 }
 TEST_END
 
@@ -587,14 +581,9 @@ TEST_END
 int
 main(void) {
 	/* We assert on call counts. */
-	return test_no_reentrancy(
-	    test_hooks_basic,
-	    test_hooks_null,
-	    test_hooks_remove,
-	    test_hooks_alloc_simple,
-	    test_hooks_dalloc_simple,
-	    test_hooks_expand_simple,
-	    test_hooks_realloc_as_malloc_or_free,
-	    test_hooks_realloc,
+	return test_no_reentrancy(test_hooks_basic, test_hooks_null,
+	    test_hooks_remove, test_hooks_alloc_simple,
+	    test_hooks_dalloc_simple, test_hooks_expand_simple,
+	    test_hooks_realloc_as_malloc_or_free, test_hooks_realloc,
 	    test_hooks_rallocx);
 }
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index 47fa25f2..1fed8a80 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -13,55 +13,53 @@ struct test_data_s {
 	 * Must be the first member -- we convert back and forth between the
 	 * test_data_t and the hpa_shard_t;
 	 */
-	hpa_shard_t shard;
+	hpa_shard_t   shard;
 	hpa_central_t central;
-	base_t *base;
+	base_t       *base;
 	edata_cache_t shard_edata_cache;
 
 	emap_t emap;
 };
 
 static hpa_shard_opts_t test_hpa_shard_opts_default = {
-	/* slab_max_alloc */
-	ALLOC_MAX,
-	/* hugification_threshold */
-	HUGEPAGE,
-	/* dirty_mult */
-	FXP_INIT_PERCENT(25),
-	/* deferral_allowed */
-	false,
-	/* hugify_delay_ms */
-	10 * 1000,
-	/* hugify_sync */
-	false,
-	/* min_purge_interval_ms */
-	5 * 1000,
-	/* experimental_max_purge_nhp */
-	-1
-};
+    /* slab_max_alloc */
+    ALLOC_MAX,
+    /* hugification_threshold */
+    HUGEPAGE,
+    /* dirty_mult */
+    FXP_INIT_PERCENT(25),
+    /* deferral_allowed */
+    false,
+    /* hugify_delay_ms */
+    10 * 1000,
+    /* hugify_sync */
+    false,
+    /* min_purge_interval_ms */
+    5 * 1000,
+    /* experimental_max_purge_nhp */
+    -1};
 
 static hpa_shard_opts_t test_hpa_shard_opts_purge = {
-	/* slab_max_alloc */
-	HUGEPAGE,
-	/* hugification_threshold */
-	0.9 * HUGEPAGE,
-	/* dirty_mult */
-	FXP_INIT_PERCENT(11),
-	/* deferral_allowed */
-	true,
-	/* hugify_delay_ms */
-	0,
-	/* hugify_sync */
-	false,
-	/* min_purge_interval_ms */
-	5 * 1000,
-	/* experimental_max_purge_nhp */
-	-1
-};
+    /* slab_max_alloc */
+    HUGEPAGE,
+    /* hugification_threshold */
+    0.9 * HUGEPAGE,
+    /* dirty_mult */
+    FXP_INIT_PERCENT(11),
+    /* deferral_allowed */
+    true,
+    /* hugify_delay_ms */
+    0,
+    /* hugify_sync */
+    false,
+    /* min_purge_interval_ms */
+    5 * 1000,
+    /* experimental_max_purge_nhp */
+    -1};
 
 static hpa_shard_t *
 create_test_data(const hpa_hooks_t *hooks, hpa_shard_opts_t *opts) {
-	bool err;
+	bool    err;
 	base_t *base = base_new(TSDN_NULL, /* ind */ SHARD_IND,
 	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
 	assert_ptr_not_null(base, "");
@@ -98,8 +96,8 @@ destroy_test_data(hpa_shard_t *shard) {
 TEST_BEGIN(test_alloc_max) {
 	test_skip_if(!hpa_supported());
 
-	hpa_shard_t *shard = create_test_data(&hpa_hooks_default,
-	    &test_hpa_shard_opts_default);
+	hpa_shard_t *shard = create_test_data(
+	    &hpa_hooks_default, &test_hpa_shard_opts_default);
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 
 	edata_t *edata;
@@ -107,19 +105,19 @@ TEST_BEGIN(test_alloc_max) {
 	/* Small max */
 	bool deferred_work_generated = false;
 	edata = pai_alloc(tsdn, &shard->pai, ALLOC_MAX, PAGE, false, false,
-	     /* frequent_reuse */ false, &deferred_work_generated);
+	    /* frequent_reuse */ false, &deferred_work_generated);
 	expect_ptr_not_null(edata, "Allocation of small max failed");
 
 	edata = pai_alloc(tsdn, &shard->pai, ALLOC_MAX + PAGE, PAGE, false,
 	    false, /* frequent_reuse */ false, &deferred_work_generated);
 	expect_ptr_null(edata, "Allocation of larger than small max succeeded");
 
-	edata = pai_alloc(tsdn, &shard->pai, ALLOC_MAX, PAGE, false,
-	    false, /* frequent_reuse */ true, &deferred_work_generated);
+	edata = pai_alloc(tsdn, &shard->pai, ALLOC_MAX, PAGE, false, false,
+	    /* frequent_reuse */ true, &deferred_work_generated);
 	expect_ptr_not_null(edata, "Allocation of frequent reused failed");
 
-	edata = pai_alloc(tsdn, &shard->pai, HUGEPAGE, PAGE, false,
-	    false, /* frequent_reuse */ true, &deferred_work_generated);
+	edata = pai_alloc(tsdn, &shard->pai, HUGEPAGE, PAGE, false, false,
+	    /* frequent_reuse */ true, &deferred_work_generated);
 	expect_ptr_not_null(edata, "Allocation of frequent reused failed");
 
 	edata = pai_alloc(tsdn, &shard->pai, HUGEPAGE + PAGE, PAGE, false,
@@ -133,8 +131,8 @@ TEST_END
 typedef struct mem_contents_s mem_contents_t;
 struct mem_contents_s {
 	uintptr_t my_addr;
-	size_t size;
-	edata_t *my_edata;
+	size_t    size;
+	edata_t  *my_edata;
 	rb_node(mem_contents_t) link;
 };
 
@@ -144,8 +142,7 @@ mem_contents_cmp(const mem_contents_t *a, const mem_contents_t *b) {
 }
 
 typedef rb_tree(mem_contents_t) mem_tree_t;
-rb_gen(static, mem_tree_, mem_tree_t, mem_contents_t, link,
-    mem_contents_cmp);
+rb_gen(static, mem_tree_, mem_tree_t, mem_contents_t, link, mem_contents_cmp);
 
 static void
 node_assert_ordered(mem_contents_t *a, mem_contents_t *b) {
@@ -191,14 +188,14 @@ node_remove(mem_tree_t *tree, edata_t *edata) {
 TEST_BEGIN(test_stress) {
 	test_skip_if(!hpa_supported());
 
-	hpa_shard_t *shard = create_test_data(&hpa_hooks_default,
-	    &test_hpa_shard_opts_default);
+	hpa_shard_t *shard = create_test_data(
+	    &hpa_hooks_default, &test_hpa_shard_opts_default);
 
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 
 	const size_t nlive_edatas_max = 500;
-	size_t nlive_edatas = 0;
-	edata_t **live_edatas = calloc(nlive_edatas_max, sizeof(edata_t *));
+	size_t       nlive_edatas = 0;
+	edata_t    **live_edatas = calloc(nlive_edatas_max, sizeof(edata_t *));
 	/*
 	 * Nothing special about this constant; we're only fixing it for
 	 * consistency across runs.
@@ -224,13 +221,14 @@ TEST_BEGIN(test_stress) {
 			 */
 			size_t npages_min = 1;
 			size_t npages_max = ALLOC_MAX / PAGE;
-			size_t npages = npages_min + prng_range_zu(&prng_state,
-			    npages_max - npages_min);
+			size_t npages = npages_min
+			    + prng_range_zu(
+			        &prng_state, npages_max - npages_min);
 			edata_t *edata = pai_alloc(tsdn, &shard->pai,
 			    npages * PAGE, PAGE, false, false, false,
 			    &deferred_work_generated);
-			assert_ptr_not_null(edata,
-			    "Unexpected allocation failure");
+			assert_ptr_not_null(
+			    edata, "Unexpected allocation failure");
 			live_edatas[nlive_edatas] = edata;
 			nlive_edatas++;
 			node_insert(&tree, edata, npages);
@@ -239,7 +237,8 @@ TEST_BEGIN(test_stress) {
 			if (nlive_edatas == 0) {
 				continue;
 			}
-			size_t victim = prng_range_zu(&prng_state, nlive_edatas);
+			size_t victim = prng_range_zu(
+			    &prng_state, nlive_edatas);
 			edata_t *to_free = live_edatas[victim];
 			live_edatas[victim] = live_edatas[nlive_edatas - 1];
 			nlive_edatas--;
@@ -251,7 +250,7 @@ TEST_BEGIN(test_stress) {
 
 	size_t ntreenodes = 0;
 	for (mem_contents_t *contents = mem_tree_first(&tree); contents != NULL;
-	    contents = mem_tree_next(&tree, contents)) {
+	     contents = mem_tree_next(&tree, contents)) {
 		ntreenodes++;
 		node_check(&tree, contents);
 	}
@@ -264,8 +263,8 @@ TEST_BEGIN(test_stress) {
 	for (size_t i = 0; i < nlive_edatas; i++) {
 		edata_t *to_free = live_edatas[i];
 		node_remove(&tree, to_free);
-		pai_dalloc(tsdn, &shard->pai, to_free,
-		    &deferred_work_generated);
+		pai_dalloc(
+		    tsdn, &shard->pai, to_free, &deferred_work_generated);
 	}
 	hpa_shard_destroy(tsdn, shard);
 
@@ -277,8 +276,7 @@ TEST_END
 static void
 expect_contiguous(edata_t **edatas, size_t nedatas) {
 	for (size_t i = 0; i < nedatas; i++) {
-		size_t expected = (size_t)edata_base_get(edatas[0])
-		    + i * PAGE;
+		size_t expected = (size_t)edata_base_get(edatas[0]) + i * PAGE;
 		expect_zu_eq(expected, (size_t)edata_base_get(edatas[i]),
 		    "Mismatch at index %zu", i);
 	}
@@ -287,13 +285,13 @@ expect_contiguous(edata_t **edatas, size_t nedatas) {
 TEST_BEGIN(test_alloc_dalloc_batch) {
 	test_skip_if(!hpa_supported());
 
-	hpa_shard_t *shard = create_test_data(&hpa_hooks_default,
-	    &test_hpa_shard_opts_default);
+	hpa_shard_t *shard = create_test_data(
+	    &hpa_hooks_default, &test_hpa_shard_opts_default);
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 
 	bool deferred_work_generated = false;
 
-	enum {NALLOCS = 8};
+	enum { NALLOCS = 8 };
 
 	edata_t *allocs[NALLOCS];
 	/*
@@ -329,11 +327,11 @@ TEST_BEGIN(test_alloc_dalloc_batch) {
 	for (size_t i = 0; i < NALLOCS / 2; i++) {
 		edata_list_active_append(&allocs_list, allocs[i]);
 	}
-	pai_dalloc_batch(tsdn, &shard->pai, &allocs_list,
-	    &deferred_work_generated);
+	pai_dalloc_batch(
+	    tsdn, &shard->pai, &allocs_list, &deferred_work_generated);
 	for (size_t i = NALLOCS / 2; i < NALLOCS; i++) {
-		pai_dalloc(tsdn, &shard->pai, allocs[i],
-		    &deferred_work_generated);
+		pai_dalloc(
+		    tsdn, &shard->pai, allocs[i], &deferred_work_generated);
 	}
 
 	/* Reallocate (individually), and ensure reuse and contiguity. */
@@ -344,8 +342,8 @@ TEST_BEGIN(test_alloc_dalloc_batch) {
 		expect_ptr_not_null(allocs[i], "Unexpected alloc failure.");
 	}
 	void *new_base = edata_base_get(allocs[0]);
-	expect_ptr_eq(orig_base, new_base,
-	    "Failed to reuse the allocated memory.");
+	expect_ptr_eq(
+	    orig_base, new_base, "Failed to reuse the allocated memory.");
 	expect_contiguous(allocs, NALLOCS);
 
 	destroy_test_data(shard);
@@ -429,7 +427,7 @@ TEST_BEGIN(test_defer_time) {
 	bool deferred_work_generated = false;
 
 	nstime_init(&defer_curtime, 0);
-	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	tsdn_t  *tsdn = tsd_tsdn(tsd_fetch());
 	edata_t *edatas[HUGEPAGE_PAGES];
 	for (int i = 0; i < (int)HUGEPAGE_PAGES; i++) {
 		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
@@ -448,8 +446,8 @@ TEST_BEGIN(test_defer_time) {
 
 	/* Purge.  Recall that dirty_mult is .25. */
 	for (int i = 0; i < (int)HUGEPAGE_PAGES / 2; i++) {
-		pai_dalloc(tsdn, &shard->pai, edatas[i],
-		    &deferred_work_generated);
+		pai_dalloc(
+		    tsdn, &shard->pai, edatas[i], &deferred_work_generated);
 	}
 
 	hpa_shard_do_deferred_work(tsdn, shard);
@@ -474,8 +472,7 @@ TEST_BEGIN(test_defer_time) {
 	 * We would be ineligible for hugification, had we not already met the
 	 * threshold before dipping below it.
 	 */
-	pai_dalloc(tsdn, &shard->pai, edatas[0],
-	    &deferred_work_generated);
+	pai_dalloc(tsdn, &shard->pai, edatas[0], &deferred_work_generated);
 	/* Wait for the threshold again. */
 	nstime_init2(&defer_curtime, 22, 0);
 	hpa_shard_do_deferred_work(tsdn, shard);
@@ -491,8 +488,8 @@ TEST_END
 TEST_BEGIN(test_purge_no_infinite_loop) {
 	test_skip_if(!hpa_supported());
 
-	hpa_shard_t *shard = create_test_data(&hpa_hooks_default,
-	    &test_hpa_shard_opts_purge);
+	hpa_shard_t *shard = create_test_data(
+	    &hpa_hooks_default, &test_hpa_shard_opts_purge);
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 
 	/*
@@ -500,14 +497,15 @@ TEST_BEGIN(test_purge_no_infinite_loop) {
 	 * criteria for huge page and at the same time do not allow hugify page
 	 * without triggering a purge.
 	 */
-	const size_t npages =
-	    test_hpa_shard_opts_purge.hugification_threshold / PAGE + 1;
+	const size_t npages = test_hpa_shard_opts_purge.hugification_threshold
+	        / PAGE
+	    + 1;
 	const size_t size = npages * PAGE;
 
-	bool deferred_work_generated = false;
+	bool     deferred_work_generated = false;
 	edata_t *edata = pai_alloc(tsdn, &shard->pai, size, PAGE,
-	     /* zero */ false, /* guarded */ false, /* frequent_reuse */ false,
-	     &deferred_work_generated);
+	    /* zero */ false, /* guarded */ false, /* frequent_reuse */ false,
+	    &deferred_work_generated);
 	expect_ptr_not_null(edata, "Unexpected alloc failure");
 
 	hpa_shard_do_deferred_work(tsdn, shard);
@@ -542,8 +540,8 @@ TEST_BEGIN(test_no_min_purge_interval) {
 	nstime_init(&defer_curtime, 0);
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 
-	edata_t *edata = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
-	    false, false, &deferred_work_generated);
+	edata_t *edata = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false, false,
+	    false, &deferred_work_generated);
 	expect_ptr_not_null(edata, "Unexpected null edata");
 	pai_dalloc(tsdn, &shard->pai, edata, &deferred_work_generated);
 	hpa_shard_do_deferred_work(tsdn, shard);
@@ -584,8 +582,8 @@ TEST_BEGIN(test_min_purge_interval) {
 	nstime_init(&defer_curtime, 0);
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 
-	edata_t *edata = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
-	    false, false, &deferred_work_generated);
+	edata_t *edata = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false, false,
+	    false, &deferred_work_generated);
 	expect_ptr_not_null(edata, "Unexpected null edata");
 	pai_dalloc(tsdn, &shard->pai, edata, &deferred_work_generated);
 	hpa_shard_do_deferred_work(tsdn, shard);
@@ -634,7 +632,7 @@ TEST_BEGIN(test_purge) {
 
 	nstime_init(&defer_curtime, 0);
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
-	enum {NALLOCS = 8 * HUGEPAGE_PAGES};
+	enum { NALLOCS = 8 * HUGEPAGE_PAGES };
 	edata_t *edatas[NALLOCS];
 	for (int i = 0; i < NALLOCS; i++) {
 		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
@@ -643,8 +641,8 @@ TEST_BEGIN(test_purge) {
 	}
 	/* Deallocate 3 hugepages out of 8. */
 	for (int i = 0; i < 3 * (int)HUGEPAGE_PAGES; i++) {
-		pai_dalloc(tsdn, &shard->pai, edatas[i],
-		    &deferred_work_generated);
+		pai_dalloc(
+		    tsdn, &shard->pai, edatas[i], &deferred_work_generated);
 	}
 	nstime_init2(&defer_curtime, 6, 0);
 	hpa_shard_do_deferred_work(tsdn, shard);
@@ -702,7 +700,7 @@ TEST_BEGIN(test_experimental_max_purge_nhp) {
 
 	nstime_init(&defer_curtime, 0);
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
-	enum {NALLOCS = 8 * HUGEPAGE_PAGES};
+	enum { NALLOCS = 8 * HUGEPAGE_PAGES };
 	edata_t *edatas[NALLOCS];
 	for (int i = 0; i < NALLOCS; i++) {
 		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
@@ -711,8 +709,8 @@ TEST_BEGIN(test_experimental_max_purge_nhp) {
 	}
 	/* Deallocate 3 hugepages out of 8. */
 	for (int i = 0; i < 3 * (int)HUGEPAGE_PAGES; i++) {
-		pai_dalloc(tsdn, &shard->pai, edatas[i],
-		    &deferred_work_generated);
+		pai_dalloc(
+		    tsdn, &shard->pai, edatas[i], &deferred_work_generated);
 	}
 	nstime_init2(&defer_curtime, 6, 0);
 	hpa_shard_do_deferred_work(tsdn, shard);
@@ -749,8 +747,7 @@ TEST_BEGIN(test_experimental_max_purge_nhp) {
 TEST_END
 
 TEST_BEGIN(test_vectorized_opt_eq_zero) {
-    test_skip_if(!hpa_supported() ||
-		(opt_process_madvise_max_batch != 0));
+	test_skip_if(!hpa_supported() || (opt_process_madvise_max_batch != 0));
 
 	hpa_hooks_t hooks;
 	hooks.map = &defer_test_map;
@@ -770,11 +767,11 @@ TEST_BEGIN(test_vectorized_opt_eq_zero) {
 	ndefer_purge_calls = 0;
 
 	hpa_shard_t *shard = create_test_data(&hooks, &opts);
-	bool deferred_work_generated = false;
+	bool         deferred_work_generated = false;
 	nstime_init(&defer_curtime, 0);
-	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
-	edata_t *edata = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
-		false, false, &deferred_work_generated);
+	tsdn_t  *tsdn = tsd_tsdn(tsd_fetch());
+	edata_t *edata = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false, false,
+	    false, &deferred_work_generated);
 	expect_ptr_not_null(edata, "Unexpected null edata");
 	pai_dalloc(tsdn, &shard->pai, edata, &deferred_work_generated);
 	hpa_shard_do_deferred_work(tsdn, shard);
@@ -800,15 +797,9 @@ main(void) {
 	(void)mem_tree_iter;
 	(void)mem_tree_reverse_iter;
 	(void)mem_tree_destroy;
-	return test_no_reentrancy(
-	    test_alloc_max,
-	    test_stress,
-	    test_alloc_dalloc_batch,
-	    test_defer_time,
-	    test_purge_no_infinite_loop,
-	    test_no_min_purge_interval,
-	    test_min_purge_interval,
-	    test_purge,
-	    test_experimental_max_purge_nhp,
-	    test_vectorized_opt_eq_zero);
+	return test_no_reentrancy(test_alloc_max, test_stress,
+	    test_alloc_dalloc_batch, test_defer_time,
+	    test_purge_no_infinite_loop, test_no_min_purge_interval,
+	    test_min_purge_interval, test_purge,
+	    test_experimental_max_purge_nhp, test_vectorized_opt_eq_zero);
 }
diff --git a/test/unit/hpa_background_thread.c b/test/unit/hpa_background_thread.c
index 93f046b5..80cf2fed 100644
--- a/test/unit/hpa_background_thread.c
+++ b/test/unit/hpa_background_thread.c
@@ -12,7 +12,7 @@ TEST_BEGIN(test_hpa_background_thread_a0_initialized) {
 	test_skip_if(!have_background_thread);
 	test_skip_if(san_guard_enabled());
 
-	bool enabled = false;
+	bool   enabled = false;
 	size_t sz = sizeof(enabled);
 	int err = mallctl("background_thread", (void *)&enabled, &sz, NULL, 0);
 	expect_d_eq(err, 0, "Unexpected mallctl() failure");
@@ -38,7 +38,7 @@ sleep_for_background_thread_interval(void) {
 static unsigned
 create_arena(void) {
 	unsigned arena_ind;
-	size_t sz;
+	size_t   sz;
 
 	sz = sizeof(unsigned);
 	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 2),
@@ -48,17 +48,17 @@ create_arena(void) {
 
 static size_t
 get_empty_ndirty(unsigned arena_ind) {
-	int err;
-	size_t ndirty_huge;
-	size_t ndirty_nonhuge;
+	int      err;
+	size_t   ndirty_huge;
+	size_t   ndirty_nonhuge;
 	uint64_t epoch = 1;
-	size_t sz = sizeof(epoch);
-	err = je_mallctl("epoch", (void *)&epoch, &sz, (void *)&epoch,
-	    sizeof(epoch));
+	size_t   sz = sizeof(epoch);
+	err = je_mallctl(
+	    "epoch", (void *)&epoch, &sz, (void *)&epoch, sizeof(epoch));
 	expect_d_eq(0, err, "Unexpected mallctl() failure");
 
 	size_t mib[6];
-	size_t miblen = sizeof(mib)/sizeof(mib[0]);
+	size_t miblen = sizeof(mib) / sizeof(mib[0]);
 	err = mallctlnametomib(
 	    "stats.arenas.0.hpa_shard.empty_slabs.ndirty_nonhuge", mib,
 	    &miblen);
@@ -70,8 +70,7 @@ get_empty_ndirty(unsigned arena_ind) {
 	expect_d_eq(0, err, "Unexpected mallctlbymib() failure");
 
 	err = mallctlnametomib(
-	    "stats.arenas.0.hpa_shard.empty_slabs.ndirty_huge", mib,
-	    &miblen);
+	    "stats.arenas.0.hpa_shard.empty_slabs.ndirty_huge", mib, &miblen);
 	expect_d_eq(0, err, "Unexpected mallctlnametomib() failure");
 
 	sz = sizeof(ndirty_huge);
@@ -85,20 +84,20 @@ get_empty_ndirty(unsigned arena_ind) {
 static void
 set_background_thread_enabled(bool enabled) {
 	int err;
-	err = je_mallctl("background_thread", NULL, NULL, &enabled,
-	    sizeof(enabled));
+	err = je_mallctl(
+	    "background_thread", NULL, NULL, &enabled, sizeof(enabled));
 	expect_d_eq(0, err, "Unexpected mallctl failure");
 }
 
 static void
 wait_until_thread_is_enabled(unsigned arena_id) {
-	tsd_t* tsd = tsd_fetch();
+	tsd_t *tsd = tsd_fetch();
 
 	bool sleeping = false;
-	int iterations = 0;
+	int  iterations = 0;
 	do {
-		background_thread_info_t *info =
-		    background_thread_info_get(arena_id);
+		background_thread_info_t *info = background_thread_info_get(
+		    arena_id);
 		malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
 		malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
 		sleeping = background_thread_indefinite_sleep(info);
@@ -113,10 +112,8 @@ expect_purging(unsigned arena_ind) {
 	expect_zu_eq(0, empty_ndirty, "Expected arena to start unused.");
 
 	void *ptrs[2];
-	ptrs[0] = mallocx(PAGE,
-	    MALLOCX_TCACHE_NONE | MALLOCX_ARENA(arena_ind));
-	ptrs[1] = mallocx(PAGE,
-	    MALLOCX_TCACHE_NONE | MALLOCX_ARENA(arena_ind));
+	ptrs[0] = mallocx(PAGE, MALLOCX_TCACHE_NONE | MALLOCX_ARENA(arena_ind));
+	ptrs[1] = mallocx(PAGE, MALLOCX_TCACHE_NONE | MALLOCX_ARENA(arena_ind));
 
 	empty_ndirty = get_empty_ndirty(arena_ind);
 	expect_zu_eq(0, empty_ndirty, "All pages should be active");
@@ -151,15 +148,14 @@ expect_deferred_purging(unsigned arena_ind) {
 	 */
 	bool observed_dirty_page = false;
 	for (int i = 0; i < 10; i++) {
-		void *ptr = mallocx(PAGE,
-		    MALLOCX_TCACHE_NONE | MALLOCX_ARENA(arena_ind));
+		void *ptr = mallocx(
+		    PAGE, MALLOCX_TCACHE_NONE | MALLOCX_ARENA(arena_ind));
 		empty_ndirty = get_empty_ndirty(arena_ind);
 		expect_zu_eq(0, empty_ndirty, "All pages should be active");
 		dallocx(ptr, MALLOCX_TCACHE_NONE);
 		empty_ndirty = get_empty_ndirty(arena_ind);
-		expect_true(empty_ndirty == 0 || empty_ndirty == 1 ||
-		    opt_prof, "Unexpected extra dirty page count: %zu",
-		    empty_ndirty);
+		expect_true(empty_ndirty == 0 || empty_ndirty == 1 || opt_prof,
+		    "Unexpected extra dirty page count: %zu", empty_ndirty);
 		if (empty_ndirty > 0) {
 			observed_dirty_page = true;
 			break;
@@ -173,8 +169,8 @@ expect_deferred_purging(unsigned arena_ind) {
 	 * time.  Retry 100 times max before bailing out.
 	 */
 	unsigned retry = 0;
-	while ((empty_ndirty = get_empty_ndirty(arena_ind)) > 0 &&
-	    (retry++ < 100)) {
+	while ((empty_ndirty = get_empty_ndirty(arena_ind)) > 0
+	    && (retry++ < 100)) {
 		sleep_for_background_thread_interval();
 	}
 
diff --git a/test/unit/hpa_vectorized_madvise.c b/test/unit/hpa_vectorized_madvise.c
index 6770a9fa..8df54d06 100644
--- a/test/unit/hpa_vectorized_madvise.c
+++ b/test/unit/hpa_vectorized_madvise.c
@@ -13,36 +13,35 @@ struct test_data_s {
 	 * Must be the first member -- we convert back and forth between the
 	 * test_data_t and the hpa_shard_t;
 	 */
-	hpa_shard_t shard;
+	hpa_shard_t   shard;
 	hpa_central_t central;
-	base_t *base;
+	base_t       *base;
 	edata_cache_t shard_edata_cache;
 
 	emap_t emap;
 };
 
 static hpa_shard_opts_t test_hpa_shard_opts_default = {
-	/* slab_max_alloc */
-	ALLOC_MAX,
-	/* hugification_threshold */
-	HUGEPAGE,
-	/* dirty_mult */
-	FXP_INIT_PERCENT(25),
-	/* deferral_allowed */
-	false,
-	/* hugify_delay_ms */
-	10 * 1000,
-	/* hugify_sync */
-	false,
-	/* min_purge_interval_ms */
-	5 * 1000,
-	/* experimental_max_purge_nhp */
-	-1
-};
+    /* slab_max_alloc */
+    ALLOC_MAX,
+    /* hugification_threshold */
+    HUGEPAGE,
+    /* dirty_mult */
+    FXP_INIT_PERCENT(25),
+    /* deferral_allowed */
+    false,
+    /* hugify_delay_ms */
+    10 * 1000,
+    /* hugify_sync */
+    false,
+    /* min_purge_interval_ms */
+    5 * 1000,
+    /* experimental_max_purge_nhp */
+    -1};
 
 static hpa_shard_t *
 create_test_data(const hpa_hooks_t *hooks, hpa_shard_opts_t *opts) {
-	bool err;
+	bool    err;
 	base_t *base = base_new(TSDN_NULL, /* ind */ SHARD_IND,
 	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
 	assert_ptr_not_null(base, "");
@@ -108,7 +107,8 @@ defer_vectorized_purge(void *vec, size_t vlen, size_t nbytes) {
 }
 
 static bool defer_vec_purge_didfail = false;
-static bool defer_vectorized_purge_fail(void *vec, size_t vlen, size_t nbytes) {
+static bool
+defer_vectorized_purge_fail(void *vec, size_t vlen, size_t nbytes) {
 	(void)vec;
 	(void)vlen;
 	(void)nbytes;
@@ -141,8 +141,7 @@ defer_test_ms_since(nstime_t *past_time) {
 }
 
 TEST_BEGIN(test_vectorized_failure_fallback) {
-	test_skip_if(!hpa_supported() ||
-		(opt_process_madvise_max_batch == 0));
+	test_skip_if(!hpa_supported() || (opt_process_madvise_max_batch == 0));
 
 	hpa_hooks_t hooks;
 	hooks.map = &defer_test_map;
@@ -166,8 +165,8 @@ TEST_BEGIN(test_vectorized_failure_fallback) {
 	nstime_init(&defer_curtime, 0);
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 
-	edata_t *edata = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
-	false, false, &deferred_work_generated);
+	edata_t *edata = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false, false,
+	    false, &deferred_work_generated);
 	expect_ptr_not_null(edata, "Unexpected null edata");
 	pai_dalloc(tsdn, &shard->pai, edata, &deferred_work_generated);
 	hpa_shard_do_deferred_work(tsdn, shard);
@@ -181,9 +180,8 @@ TEST_BEGIN(test_vectorized_failure_fallback) {
 TEST_END
 
 TEST_BEGIN(test_more_regions_purged_from_one_page) {
-	test_skip_if(!hpa_supported() ||
-		(opt_process_madvise_max_batch == 0) ||
-		HUGEPAGE_PAGES <= 4);
+	test_skip_if(!hpa_supported() || (opt_process_madvise_max_batch == 0)
+	    || HUGEPAGE_PAGES <= 4);
 
 	hpa_hooks_t hooks;
 	hooks.map = &defer_test_map;
@@ -208,7 +206,7 @@ TEST_BEGIN(test_more_regions_purged_from_one_page) {
 	nstime_init(&defer_curtime, 0);
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 
-	enum {NALLOCS = 8 * HUGEPAGE_PAGES};
+	enum { NALLOCS = 8 * HUGEPAGE_PAGES };
 	edata_t *edatas[NALLOCS];
 	for (int i = 0; i < NALLOCS; i++) {
 		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
@@ -249,12 +247,10 @@ TEST_BEGIN(test_more_regions_purged_from_one_page) {
 }
 TEST_END
 
-size_t
-hpa_purge_max_batch_size_for_test_set(size_t new_size);
+size_t hpa_purge_max_batch_size_for_test_set(size_t new_size);
 TEST_BEGIN(test_more_pages_than_batch_page_size) {
-	test_skip_if(!hpa_supported() ||
-		(opt_process_madvise_max_batch == 0) ||
-		HUGEPAGE_PAGES <= 4);
+	test_skip_if(!hpa_supported() || (opt_process_madvise_max_batch == 0)
+	    || HUGEPAGE_PAGES <= 4);
 
 	size_t old_page_batch = hpa_purge_max_batch_size_for_test_set(1);
 
@@ -281,7 +277,7 @@ TEST_BEGIN(test_more_pages_than_batch_page_size) {
 	nstime_init(&defer_curtime, 0);
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 
-	enum {NALLOCS = 8 * HUGEPAGE_PAGES};
+	enum { NALLOCS = 8 * HUGEPAGE_PAGES };
 	edata_t *edatas[NALLOCS];
 	for (int i = 0; i < NALLOCS; i++) {
 		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
@@ -289,8 +285,8 @@ TEST_BEGIN(test_more_pages_than_batch_page_size) {
 		expect_ptr_not_null(edatas[i], "Unexpected null edata");
 	}
 	for (int i = 0; i < 3 * (int)HUGEPAGE_PAGES; i++) {
-		pai_dalloc(tsdn, &shard->pai, edatas[i],
-			&deferred_work_generated);
+		pai_dalloc(
+		    tsdn, &shard->pai, edatas[i], &deferred_work_generated);
 	}
 
 	hpa_shard_do_deferred_work(tsdn, shard);
@@ -321,8 +317,7 @@ TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_vectorized_failure_fallback,
+	return test_no_reentrancy(test_vectorized_failure_fallback,
 	    test_more_regions_purged_from_one_page,
 	    test_more_pages_than_batch_page_size);
 }
diff --git a/test/unit/hpa_vectorized_madvise_large_batch.c b/test/unit/hpa_vectorized_madvise_large_batch.c
index 561da7a2..a5766620 100644
--- a/test/unit/hpa_vectorized_madvise_large_batch.c
+++ b/test/unit/hpa_vectorized_madvise_large_batch.c
@@ -13,36 +13,35 @@ struct test_data_s {
 	 * Must be the first member -- we convert back and forth between the
 	 * test_data_t and the hpa_shard_t;
 	 */
-	hpa_shard_t shard;
+	hpa_shard_t   shard;
 	hpa_central_t central;
-	base_t *base;
+	base_t       *base;
 	edata_cache_t shard_edata_cache;
 
 	emap_t emap;
 };
 
 static hpa_shard_opts_t test_hpa_shard_opts_default = {
-	/* slab_max_alloc */
-	ALLOC_MAX,
-	/* hugification_threshold */
-	HUGEPAGE,
-	/* dirty_mult */
-	FXP_INIT_PERCENT(25),
-	/* deferral_allowed */
-	false,
-	/* hugify_delay_ms */
-	10 * 1000,
-	/* hugify_sync */
-	false,
-	/* min_purge_interval_ms */
-	5 * 1000,
-	/* experimental_max_purge_nhp */
-	-1
-};
+    /* slab_max_alloc */
+    ALLOC_MAX,
+    /* hugification_threshold */
+    HUGEPAGE,
+    /* dirty_mult */
+    FXP_INIT_PERCENT(25),
+    /* deferral_allowed */
+    false,
+    /* hugify_delay_ms */
+    10 * 1000,
+    /* hugify_sync */
+    false,
+    /* min_purge_interval_ms */
+    5 * 1000,
+    /* experimental_max_purge_nhp */
+    -1};
 
 static hpa_shard_t *
 create_test_data(const hpa_hooks_t *hooks, hpa_shard_opts_t *opts) {
-	bool err;
+	bool    err;
 	base_t *base = base_new(TSDN_NULL, /* ind */ SHARD_IND,
 	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
 	assert_ptr_not_null(base, "");
@@ -132,8 +131,8 @@ defer_test_ms_since(nstime_t *past_time) {
 }
 
 TEST_BEGIN(test_vectorized_purge) {
-	test_skip_if(!hpa_supported() ||
-		     opt_process_madvise_max_batch == 0 || HUGEPAGE_PAGES <= 4);
+	test_skip_if(!hpa_supported() || opt_process_madvise_max_batch == 0
+	    || HUGEPAGE_PAGES <= 4);
 	assert(opt_process_madvise_max_batch == 64);
 
 	hpa_hooks_t hooks;
@@ -159,7 +158,7 @@ TEST_BEGIN(test_vectorized_purge) {
 	nstime_init(&defer_curtime, 0);
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 
-	enum {NALLOCS = 8 * HUGEPAGE_PAGES};
+	enum { NALLOCS = 8 * HUGEPAGE_PAGES };
 	edata_t *edatas[NALLOCS];
 	for (int i = 0; i < NALLOCS; i++) {
 		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
@@ -192,6 +191,5 @@ TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_vectorized_purge);
+	return test_no_reentrancy(test_vectorized_purge);
 }
diff --git a/test/unit/hpdata.c b/test/unit/hpdata.c
index 995ab77b..2329f065 100644
--- a/test/unit/hpdata.c
+++ b/test/unit/hpdata.c
@@ -69,23 +69,25 @@ TEST_BEGIN(test_purge_simple) {
 
 	hpdata_alloc_allowed_set(&hpdata, false);
 	hpdata_purge_state_t purge_state;
-	size_t nranges;
+	size_t               nranges;
 	size_t to_purge = hpdata_purge_begin(&hpdata, &purge_state, &nranges);
 	expect_zu_eq(HUGEPAGE_PAGES / 4, to_purge, "");
 	expect_zu_eq(1, nranges, "All dirty pages in a single range");
 
-	void *purge_addr;
+	void  *purge_addr;
 	size_t purge_size;
-	bool got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
-	    &purge_size);
+	bool   got_result = hpdata_purge_next(
+            &hpdata, &purge_state, &purge_addr, &purge_size);
 	expect_true(got_result, "");
 	expect_ptr_eq(HPDATA_ADDR, purge_addr, "");
 	expect_zu_eq(HUGEPAGE_PAGES / 4 * PAGE, purge_size, "");
 
-	got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
-	    &purge_size);
-	expect_false(got_result, "Unexpected additional purge range: "
-	    "extent at %p of size %zu", purge_addr, purge_size);
+	got_result = hpdata_purge_next(
+	    &hpdata, &purge_state, &purge_addr, &purge_size);
+	expect_false(got_result,
+	    "Unexpected additional purge range: "
+	    "extent at %p of size %zu",
+	    purge_addr, purge_size);
 
 	hpdata_purge_end(&hpdata, &purge_state);
 	expect_zu_eq(hpdata_ntouched_get(&hpdata), HUGEPAGE_PAGES / 4, "");
@@ -102,7 +104,8 @@ TEST_BEGIN(test_purge_intervening_dalloc) {
 	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
 
 	/* Allocate the first 3/4 of the pages. */
-	void *alloc = hpdata_reserve_alloc(&hpdata, 3 * HUGEPAGE_PAGES / 4  * PAGE);
+	void *alloc = hpdata_reserve_alloc(
+	    &hpdata, 3 * HUGEPAGE_PAGES / 4 * PAGE);
 	expect_ptr_eq(alloc, HPDATA_ADDR, "");
 
 	/* Free the first 1/4 and the third 1/4 of the pages. */
@@ -115,16 +118,16 @@ TEST_BEGIN(test_purge_intervening_dalloc) {
 
 	hpdata_alloc_allowed_set(&hpdata, false);
 	hpdata_purge_state_t purge_state;
-	size_t nranges;
+	size_t               nranges;
 	size_t to_purge = hpdata_purge_begin(&hpdata, &purge_state, &nranges);
 	expect_zu_eq(HUGEPAGE_PAGES / 2, to_purge, "");
 	expect_zu_eq(2, nranges, "First quarter and last half");
 
-	void *purge_addr;
+	void  *purge_addr;
 	size_t purge_size;
 	/* First purge. */
-	bool got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
-	    &purge_size);
+	bool got_result = hpdata_purge_next(
+	    &hpdata, &purge_state, &purge_addr, &purge_size);
 	expect_true(got_result, "");
 	expect_ptr_eq(HPDATA_ADDR, purge_addr, "");
 	expect_zu_eq(HUGEPAGE_PAGES / 4 * PAGE, purge_size, "");
@@ -135,18 +138,20 @@ TEST_BEGIN(test_purge_intervening_dalloc) {
 	    HUGEPAGE_PAGES / 4 * PAGE);
 
 	/* Now continue purging. */
-	got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
-	    &purge_size);
+	got_result = hpdata_purge_next(
+	    &hpdata, &purge_state, &purge_addr, &purge_size);
 	expect_true(got_result, "");
 	expect_ptr_eq(
 	    (void *)((uintptr_t)alloc + 2 * HUGEPAGE_PAGES / 4 * PAGE),
 	    purge_addr, "");
 	expect_zu_ge(HUGEPAGE_PAGES / 4 * PAGE, purge_size, "");
 
-	got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
-	    &purge_size);
-	expect_false(got_result, "Unexpected additional purge range: "
-	    "extent at %p of size %zu", purge_addr, purge_size);
+	got_result = hpdata_purge_next(
+	    &hpdata, &purge_state, &purge_addr, &purge_size);
+	expect_false(got_result,
+	    "Unexpected additional purge range: "
+	    "extent at %p of size %zu",
+	    purge_addr, purge_size);
 
 	hpdata_purge_end(&hpdata, &purge_state);
 
@@ -155,19 +160,20 @@ TEST_BEGIN(test_purge_intervening_dalloc) {
 TEST_END
 
 TEST_BEGIN(test_purge_over_retained) {
-	void *purge_addr;
+	void  *purge_addr;
 	size_t purge_size;
 
 	hpdata_t hpdata;
 	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
 
 	/* Allocate the first 3/4 of the pages. */
-	void *alloc = hpdata_reserve_alloc(&hpdata, 3 * HUGEPAGE_PAGES / 4  * PAGE);
+	void *alloc = hpdata_reserve_alloc(
+	    &hpdata, 3 * HUGEPAGE_PAGES / 4 * PAGE);
 	expect_ptr_eq(alloc, HPDATA_ADDR, "");
 
 	/* Free the second quarter. */
-	void *second_quarter =
-	    (void *)((uintptr_t)alloc + HUGEPAGE_PAGES / 4 * PAGE);
+	void *second_quarter = (void *)((uintptr_t)alloc
+	    + HUGEPAGE_PAGES / 4 * PAGE);
 	hpdata_unreserve(&hpdata, second_quarter, HUGEPAGE_PAGES / 4 * PAGE);
 
 	expect_zu_eq(hpdata_ntouched_get(&hpdata), 3 * HUGEPAGE_PAGES / 4, "");
@@ -175,21 +181,24 @@ TEST_BEGIN(test_purge_over_retained) {
 	/* Purge the second quarter. */
 	hpdata_alloc_allowed_set(&hpdata, false);
 	hpdata_purge_state_t purge_state;
-	size_t nranges;
-	size_t to_purge_dirty = hpdata_purge_begin(&hpdata, &purge_state, &nranges);
+	size_t               nranges;
+	size_t               to_purge_dirty = hpdata_purge_begin(
+            &hpdata, &purge_state, &nranges);
 	expect_zu_eq(HUGEPAGE_PAGES / 4, to_purge_dirty, "");
 	expect_zu_eq(1, nranges, "Second quarter only");
 
-	bool got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
-	    &purge_size);
+	bool got_result = hpdata_purge_next(
+	    &hpdata, &purge_state, &purge_addr, &purge_size);
 	expect_true(got_result, "");
 	expect_ptr_eq(second_quarter, purge_addr, "");
 	expect_zu_eq(HUGEPAGE_PAGES / 4 * PAGE, purge_size, "");
 
-	got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
-	    &purge_size);
-	expect_false(got_result, "Unexpected additional purge range: "
-	    "extent at %p of size %zu", purge_addr, purge_size);
+	got_result = hpdata_purge_next(
+	    &hpdata, &purge_state, &purge_addr, &purge_size);
+	expect_false(got_result,
+	    "Unexpected additional purge range: "
+	    "extent at %p of size %zu",
+	    purge_addr, purge_size);
 	hpdata_purge_end(&hpdata, &purge_state);
 
 	expect_zu_eq(hpdata_ntouched_get(&hpdata), HUGEPAGE_PAGES / 2, "");
@@ -209,16 +218,18 @@ TEST_BEGIN(test_purge_over_retained) {
 	expect_zu_eq(HUGEPAGE_PAGES / 2, to_purge_dirty, "");
 	expect_zu_eq(1, nranges, "Single range expected");
 
-	got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
-	    &purge_size);
+	got_result = hpdata_purge_next(
+	    &hpdata, &purge_state, &purge_addr, &purge_size);
 	expect_true(got_result, "");
 	expect_ptr_eq(HPDATA_ADDR, purge_addr, "");
 	expect_zu_eq(3 * HUGEPAGE_PAGES / 4 * PAGE, purge_size, "");
 
-	got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
-	    &purge_size);
-	expect_false(got_result, "Unexpected additional purge range: "
-	    "extent at %p of size %zu", purge_addr, purge_size);
+	got_result = hpdata_purge_next(
+	    &hpdata, &purge_state, &purge_addr, &purge_size);
+	expect_false(got_result,
+	    "Unexpected additional purge range: "
+	    "extent at %p of size %zu",
+	    purge_addr, purge_size);
 	hpdata_purge_end(&hpdata, &purge_state);
 
 	expect_zu_eq(hpdata_ntouched_get(&hpdata), 0, "");
@@ -241,11 +252,9 @@ TEST_BEGIN(test_hugify) {
 }
 TEST_END
 
-int main(void) {
-	return test_no_reentrancy(
-	    test_reserve_alloc,
-	    test_purge_simple,
-	    test_purge_intervening_dalloc,
-	    test_purge_over_retained,
+int
+main(void) {
+	return test_no_reentrancy(test_reserve_alloc, test_purge_simple,
+	    test_purge_intervening_dalloc, test_purge_over_retained,
 	    test_hugify);
 }
diff --git a/test/unit/huge.c b/test/unit/huge.c
index 53f6577b..70abe4ac 100644
--- a/test/unit/huge.c
+++ b/test/unit/huge.c
@@ -8,38 +8,40 @@ const char *malloc_conf = "oversize_threshold:2097152";
 
 TEST_BEGIN(huge_bind_thread) {
 	unsigned arena1, arena2;
-	size_t sz = sizeof(unsigned);
+	size_t   sz = sizeof(unsigned);
 
 	/* Bind to a manual arena. */
 	expect_d_eq(mallctl("arenas.create", &arena1, &sz, NULL, 0), 0,
 	    "Failed to create arena");
-	expect_d_eq(mallctl("thread.arena", NULL, NULL, &arena1,
-	    sizeof(arena1)), 0, "Fail to bind thread");
+	expect_d_eq(
+	    mallctl("thread.arena", NULL, NULL, &arena1, sizeof(arena1)), 0,
+	    "Fail to bind thread");
 
 	void *ptr = mallocx(HUGE_SZ, 0);
 	expect_ptr_not_null(ptr, "Fail to allocate huge size");
-	expect_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr,
-	    sizeof(ptr)), 0, "Unexpected mallctl() failure");
+	expect_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr, sizeof(ptr)),
+	    0, "Unexpected mallctl() failure");
 	expect_u_eq(arena1, arena2, "Wrong arena used after binding");
 	dallocx(ptr, 0);
 
 	/* Switch back to arena 0. */
-	test_skip_if(have_percpu_arena &&
-	    PERCPU_ARENA_ENABLED(opt_percpu_arena));
+	test_skip_if(
+	    have_percpu_arena && PERCPU_ARENA_ENABLED(opt_percpu_arena));
 	arena2 = 0;
-	expect_d_eq(mallctl("thread.arena", NULL, NULL, &arena2,
-	    sizeof(arena2)), 0, "Fail to bind thread");
+	expect_d_eq(
+	    mallctl("thread.arena", NULL, NULL, &arena2, sizeof(arena2)), 0,
+	    "Fail to bind thread");
 	ptr = mallocx(SMALL_SZ, MALLOCX_TCACHE_NONE);
-	expect_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr,
-	    sizeof(ptr)), 0, "Unexpected mallctl() failure");
+	expect_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr, sizeof(ptr)),
+	    0, "Unexpected mallctl() failure");
 	expect_u_eq(arena2, 0, "Wrong arena used after binding");
 	dallocx(ptr, MALLOCX_TCACHE_NONE);
 
 	/* Then huge allocation should use the huge arena. */
 	ptr = mallocx(HUGE_SZ, 0);
 	expect_ptr_not_null(ptr, "Fail to allocate huge size");
-	expect_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr,
-	    sizeof(ptr)), 0, "Unexpected mallctl() failure");
+	expect_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr, sizeof(ptr)),
+	    0, "Unexpected mallctl() failure");
 	expect_u_ne(arena2, 0, "Wrong arena used after binding");
 	expect_u_ne(arena1, arena2, "Wrong arena used after binding");
 	dallocx(ptr, 0);
@@ -48,25 +50,26 @@ TEST_END
 
 TEST_BEGIN(huge_mallocx) {
 	unsigned arena1, arena2;
-	size_t sz = sizeof(unsigned);
+	size_t   sz = sizeof(unsigned);
 
 	expect_d_eq(mallctl("arenas.create", &arena1, &sz, NULL, 0), 0,
 	    "Failed to create arena");
 	void *huge = mallocx(HUGE_SZ, MALLOCX_ARENA(arena1));
 	expect_ptr_not_null(huge, "Fail to allocate huge size");
-	expect_d_eq(mallctl("arenas.lookup", &arena2, &sz, &huge,
-	    sizeof(huge)), 0, "Unexpected mallctl() failure");
+	expect_d_eq(mallctl("arenas.lookup", &arena2, &sz, &huge, sizeof(huge)),
+	    0, "Unexpected mallctl() failure");
 	expect_u_eq(arena1, arena2, "Wrong arena used for mallocx");
 	dallocx(huge, MALLOCX_ARENA(arena1));
 
 	void *huge2 = mallocx(HUGE_SZ, 0);
 	expect_ptr_not_null(huge, "Fail to allocate huge size");
-	expect_d_eq(mallctl("arenas.lookup", &arena2, &sz, &huge2,
-	    sizeof(huge2)), 0, "Unexpected mallctl() failure");
+	expect_d_eq(
+	    mallctl("arenas.lookup", &arena2, &sz, &huge2, sizeof(huge2)), 0,
+	    "Unexpected mallctl() failure");
 	expect_u_ne(arena1, arena2,
 	    "Huge allocation should not come from the manual arena.");
-	expect_u_ne(arena2, 0,
-	    "Huge allocation should not come from the arena 0.");
+	expect_u_ne(
+	    arena2, 0, "Huge allocation should not come from the arena 0.");
 	dallocx(huge2, 0);
 }
 TEST_END
@@ -82,30 +85,27 @@ TEST_BEGIN(huge_allocation) {
 	expect_u_gt(arena1, 0, "Huge allocation should not come from arena 0");
 	dallocx(ptr, 0);
 
-	test_skip_if(have_percpu_arena &&
-	    PERCPU_ARENA_ENABLED(opt_percpu_arena));
+	test_skip_if(
+	    have_percpu_arena && PERCPU_ARENA_ENABLED(opt_percpu_arena));
 
 	ptr = mallocx(HUGE_SZ >> 1, 0);
 	expect_ptr_not_null(ptr, "Fail to allocate half huge size");
-	expect_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr,
-	    sizeof(ptr)), 0, "Unexpected mallctl() failure");
+	expect_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr, sizeof(ptr)),
+	    0, "Unexpected mallctl() failure");
 	expect_u_ne(arena1, arena2, "Wrong arena used for half huge");
 	dallocx(ptr, 0);
 
 	ptr = mallocx(SMALL_SZ, MALLOCX_TCACHE_NONE);
 	expect_ptr_not_null(ptr, "Fail to allocate small size");
-	expect_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr,
-	    sizeof(ptr)), 0, "Unexpected mallctl() failure");
-	expect_u_ne(arena1, arena2,
-	    "Huge and small should be from different arenas");
+	expect_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr, sizeof(ptr)),
+	    0, "Unexpected mallctl() failure");
+	expect_u_ne(
+	    arena1, arena2, "Huge and small should be from different arenas");
 	dallocx(ptr, 0);
 }
 TEST_END
 
 int
 main(void) {
-	return test(
-	    huge_allocation,
-	    huge_mallocx,
-	    huge_bind_thread);
+	return test(huge_allocation, huge_mallocx, huge_bind_thread);
 }
diff --git a/test/unit/inspect.c b/test/unit/inspect.c
index fe59e597..8111e4a5 100644
--- a/test/unit/inspect.c
+++ b/test/unit/inspect.c
@@ -1,27 +1,30 @@
 #include "test/jemalloc_test.h"
 
-#define TEST_UTIL_EINVAL(node, a, b, c, d, why_inval) do {		\
-	assert_d_eq(mallctl("experimental.utilization." node,		\
-	    a, b, c, d), EINVAL, "Should fail when " why_inval);	\
-	assert_zu_eq(out_sz, out_sz_ref,				\
-	    "Output size touched when given invalid arguments");	\
-	assert_d_eq(memcmp(out, out_ref, out_sz_ref), 0,		\
-	    "Output content touched when given invalid arguments");	\
-} while (0)
+#define TEST_UTIL_EINVAL(node, a, b, c, d, why_inval)                          \
+	do {                                                                   \
+		assert_d_eq(                                                   \
+		    mallctl("experimental.utilization." node, a, b, c, d),     \
+		    EINVAL, "Should fail when " why_inval);                    \
+		assert_zu_eq(out_sz, out_sz_ref,                               \
+		    "Output size touched when given invalid arguments");       \
+		assert_d_eq(memcmp(out, out_ref, out_sz_ref), 0,               \
+		    "Output content touched when given invalid arguments");    \
+	} while (0)
 
-#define TEST_UTIL_QUERY_EINVAL(a, b, c, d, why_inval)			\
+#define TEST_UTIL_QUERY_EINVAL(a, b, c, d, why_inval)                          \
 	TEST_UTIL_EINVAL("query", a, b, c, d, why_inval)
-#define TEST_UTIL_BATCH_EINVAL(a, b, c, d, why_inval)			\
+#define TEST_UTIL_BATCH_EINVAL(a, b, c, d, why_inval)                          \
 	TEST_UTIL_EINVAL("batch_query", a, b, c, d, why_inval)
 
-#define TEST_UTIL_VALID(node) do {					\
-        assert_d_eq(mallctl("experimental.utilization." node,		\
-	    out, &out_sz, in, in_sz), 0,				\
-	    "Should return 0 on correct arguments");			\
-        expect_zu_eq(out_sz, out_sz_ref, "incorrect output size");	\
-	expect_d_ne(memcmp(out, out_ref, out_sz_ref), 0,		\
-	    "Output content should be changed");			\
-} while (0)
+#define TEST_UTIL_VALID(node)                                                  \
+	do {                                                                   \
+		assert_d_eq(mallctl("experimental.utilization." node, out,     \
+		                &out_sz, in, in_sz),                           \
+		    0, "Should return 0 on correct arguments");                \
+		expect_zu_eq(out_sz, out_sz_ref, "incorrect output size");     \
+		expect_d_ne(memcmp(out, out_ref, out_sz_ref), 0,               \
+		    "Output content should be changed");                       \
+	} while (0)
 
 #define TEST_UTIL_BATCH_VALID TEST_UTIL_VALID("batch_query")
 
@@ -34,21 +37,19 @@ TEST_BEGIN(test_query) {
 	 * numerically unrelated to any size boundaries.
 	 */
 	for (sz = 7; sz <= TEST_MAX_SIZE && sz <= SC_LARGE_MAXCLASS;
-	    sz += (sz <= SC_SMALL_MAXCLASS ? 1009 : 99989)) {
-		void *p = mallocx(sz, 0);
+	     sz += (sz <= SC_SMALL_MAXCLASS ? 1009 : 99989)) {
+		void  *p = mallocx(sz, 0);
 		void **in = &p;
 		size_t in_sz = sizeof(const void *);
 		size_t out_sz = sizeof(void *) + sizeof(size_t) * 5;
-		void *out = mallocx(out_sz, 0);
-		void *out_ref = mallocx(out_sz, 0);
+		void  *out = mallocx(out_sz, 0);
+		void  *out_ref = mallocx(out_sz, 0);
 		size_t out_sz_ref = out_sz;
 
-		assert_ptr_not_null(p,
-		    "test pointer allocation failed");
-		assert_ptr_not_null(out,
-		    "test output allocation failed");
-		assert_ptr_not_null(out_ref,
-		    "test reference output allocation failed");
+		assert_ptr_not_null(p, "test pointer allocation failed");
+		assert_ptr_not_null(out, "test output allocation failed");
+		assert_ptr_not_null(
+		    out_ref, "test reference output allocation failed");
 
 #define SLABCUR_READ(out) (*(void **)out)
 #define COUNTS(out) ((size_t *)((void **)out + 1))
@@ -64,21 +65,18 @@ TEST_BEGIN(test_query) {
 		memcpy(out_ref, out, out_sz);
 
 		/* Test invalid argument(s) errors */
-		TEST_UTIL_QUERY_EINVAL(NULL, &out_sz, in, in_sz,
-		    "old is NULL");
-		TEST_UTIL_QUERY_EINVAL(out, NULL, in, in_sz,
-		    "oldlenp is NULL");
-		TEST_UTIL_QUERY_EINVAL(out, &out_sz, NULL, in_sz,
-		    "newp is NULL");
-		TEST_UTIL_QUERY_EINVAL(out, &out_sz, in, 0,
-		    "newlen is zero");
+		TEST_UTIL_QUERY_EINVAL(NULL, &out_sz, in, in_sz, "old is NULL");
+		TEST_UTIL_QUERY_EINVAL(out, NULL, in, in_sz, "oldlenp is NULL");
+		TEST_UTIL_QUERY_EINVAL(
+		    out, &out_sz, NULL, in_sz, "newp is NULL");
+		TEST_UTIL_QUERY_EINVAL(out, &out_sz, in, 0, "newlen is zero");
 		in_sz -= 1;
-		TEST_UTIL_QUERY_EINVAL(out, &out_sz, in, in_sz,
-		    "invalid newlen");
+		TEST_UTIL_QUERY_EINVAL(
+		    out, &out_sz, in, in_sz, "invalid newlen");
 		in_sz += 1;
 		out_sz_ref = out_sz -= 2 * sizeof(size_t);
-		TEST_UTIL_QUERY_EINVAL(out, &out_sz, in, in_sz,
-		    "invalid *oldlenp");
+		TEST_UTIL_QUERY_EINVAL(
+		    out, &out_sz, in, in_sz, "invalid *oldlenp");
 		out_sz_ref = out_sz += 2 * sizeof(size_t);
 
 		/* Examine output for valid call */
@@ -100,8 +98,9 @@ TEST_BEGIN(test_query) {
 			    "Extent region count exceeded size");
 			expect_zu_ne(NREGS_READ(out), 0,
 			    "Extent region count must be positive");
-			expect_true(NFREE_READ(out) == 0 || (SLABCUR_READ(out)
-			    != NULL && SLABCUR_READ(out) <= p),
+			expect_true(NFREE_READ(out) == 0
+			        || (SLABCUR_READ(out) != NULL
+			            && SLABCUR_READ(out) <= p),
 			    "Allocation should follow first fit principle");
 
 			if (config_stats) {
@@ -117,8 +116,8 @@ TEST_BEGIN(test_query) {
 				    BIN_NREGS_READ(out),
 				    "Extent region count exceeded "
 				    "bin region count");
-				expect_zu_eq(BIN_NREGS_READ(out)
-				    % NREGS_READ(out), 0,
+				expect_zu_eq(
+				    BIN_NREGS_READ(out) % NREGS_READ(out), 0,
 				    "Bin region count isn't a multiple of "
 				    "extent region count");
 				expect_zu_le(
@@ -171,10 +170,10 @@ TEST_BEGIN(test_batch) {
 	 * numerically unrelated to any size boundaries.
 	 */
 	for (sz = 17; sz <= TEST_MAX_SIZE && sz <= SC_LARGE_MAXCLASS;
-	    sz += (sz <= SC_SMALL_MAXCLASS ? 1019 : 99991)) {
-		void *p = mallocx(sz, 0);
-		void *q = mallocx(sz, 0);
-		void *in[] = {p, q};
+	     sz += (sz <= SC_SMALL_MAXCLASS ? 1019 : 99991)) {
+		void  *p = mallocx(sz, 0);
+		void  *q = mallocx(sz, 0);
+		void  *in[] = {p, q};
 		size_t in_sz = sizeof(const void *) * 2;
 		size_t out[] = {-1, -1, -1, -1, -1, -1};
 		size_t out_sz = sizeof(size_t) * 6;
@@ -185,17 +184,14 @@ TEST_BEGIN(test_batch) {
 		assert_ptr_not_null(q, "test pointer allocation failed");
 
 		/* Test invalid argument(s) errors */
-		TEST_UTIL_BATCH_EINVAL(NULL, &out_sz, in, in_sz,
-		    "old is NULL");
-		TEST_UTIL_BATCH_EINVAL(out, NULL, in, in_sz,
-		    "oldlenp is NULL");
-		TEST_UTIL_BATCH_EINVAL(out, &out_sz, NULL, in_sz,
-		    "newp is NULL");
-		TEST_UTIL_BATCH_EINVAL(out, &out_sz, in, 0,
-		    "newlen is zero");
+		TEST_UTIL_BATCH_EINVAL(NULL, &out_sz, in, in_sz, "old is NULL");
+		TEST_UTIL_BATCH_EINVAL(out, NULL, in, in_sz, "oldlenp is NULL");
+		TEST_UTIL_BATCH_EINVAL(
+		    out, &out_sz, NULL, in_sz, "newp is NULL");
+		TEST_UTIL_BATCH_EINVAL(out, &out_sz, in, 0, "newlen is zero");
 		in_sz -= 1;
-		TEST_UTIL_BATCH_EINVAL(out, &out_sz, in, in_sz,
-		    "newlen is not an exact multiple");
+		TEST_UTIL_BATCH_EINVAL(
+		    out, &out_sz, in, in_sz, "newlen is not an exact multiple");
 		in_sz += 1;
 		out_sz_ref = out_sz -= 2 * sizeof(size_t);
 		TEST_UTIL_BATCH_EINVAL(out, &out_sz, in, in_sz,
@@ -206,8 +202,8 @@ TEST_BEGIN(test_batch) {
 		    "*oldlenp and newlen do not match");
 		in_sz += sizeof(const void *);
 
-	/* Examine output for valid calls */
-#define TEST_EQUAL_REF(i, message) \
+		/* Examine output for valid calls */
+#define TEST_EQUAL_REF(i, message)                                             \
 	assert_d_eq(memcmp(out + (i) * 3, out_ref + (i) * 3, 3), 0, message)
 
 #define NFREE_READ(out, i) out[(i) * 3]
@@ -238,8 +234,8 @@ TEST_BEGIN(test_batch) {
 			expect_zu_eq(NREGS_READ(out, 0), 1,
 			    "Extent region count should be one");
 		}
-		TEST_EQUAL_REF(1,
-		    "Should not overwrite content beyond what's needed");
+		TEST_EQUAL_REF(
+		    1, "Should not overwrite content beyond what's needed");
 		in_sz *= 2;
 		out_sz_ref = out_sz *= 2;
 
diff --git a/test/unit/junk.c b/test/unit/junk.c
index 6c5b8beb..80f51e15 100644
--- a/test/unit/junk.c
+++ b/test/unit/junk.c
@@ -1,9 +1,9 @@
 #include "test/jemalloc_test.h"
 
-#define arraylen(arr) (sizeof(arr)/sizeof(arr[0]))
+#define arraylen(arr) (sizeof(arr) / sizeof(arr[0]))
 static size_t ptr_ind;
 static void *volatile ptrs[100];
-static void *last_junked_ptr;
+static void  *last_junked_ptr;
 static size_t last_junked_usize;
 
 static void
@@ -21,17 +21,17 @@ test_junk(void *ptr, size_t usize) {
 
 static void
 do_allocs(size_t size, bool zero, size_t lg_align) {
-#define JUNK_ALLOC(...)							\
-	do {								\
-		assert(ptr_ind + 1 < arraylen(ptrs));			\
-		void *ptr = __VA_ARGS__;				\
-		assert_ptr_not_null(ptr, "");				\
-		ptrs[ptr_ind++] = ptr;					\
-		if (opt_junk_alloc && !zero) {				\
-			expect_ptr_eq(ptr, last_junked_ptr, "");	\
-			expect_zu_eq(last_junked_usize,			\
-			    TEST_MALLOC_SIZE(ptr), "");			\
-		}							\
+#define JUNK_ALLOC(...)                                                        \
+	do {                                                                   \
+		assert(ptr_ind + 1 < arraylen(ptrs));                          \
+		void *ptr = __VA_ARGS__;                                       \
+		assert_ptr_not_null(ptr, "");                                  \
+		ptrs[ptr_ind++] = ptr;                                         \
+		if (opt_junk_alloc && !zero) {                                 \
+			expect_ptr_eq(ptr, last_junked_ptr, "");               \
+			expect_zu_eq(                                          \
+			    last_junked_usize, TEST_MALLOC_SIZE(ptr), "");     \
+		}                                                              \
 	} while (0)
 	if (!zero && lg_align == 0) {
 		JUNK_ALLOC(malloc(size));
@@ -51,21 +51,20 @@ do_allocs(size_t size, bool zero, size_t lg_align) {
 #endif
 	int zero_flag = zero ? MALLOCX_ZERO : 0;
 	JUNK_ALLOC(mallocx(size, zero_flag | MALLOCX_LG_ALIGN(lg_align)));
-	JUNK_ALLOC(mallocx(size, zero_flag | MALLOCX_LG_ALIGN(lg_align)
-	    | MALLOCX_TCACHE_NONE));
+	JUNK_ALLOC(mallocx(size,
+	    zero_flag | MALLOCX_LG_ALIGN(lg_align) | MALLOCX_TCACHE_NONE));
 	if (lg_align >= LG_SIZEOF_PTR) {
 		void *memalign_result;
-		int err = posix_memalign(&memalign_result, (1 << lg_align),
-		    size);
+		int   err = posix_memalign(
+                    &memalign_result, (1 << lg_align), size);
 		assert_d_eq(err, 0, "");
 		JUNK_ALLOC(memalign_result);
 	}
 }
 
 TEST_BEGIN(test_junk_alloc_free) {
-	bool zerovals[] = {false, true};
-	size_t sizevals[] = {
-		1, 8, 100, 1000, 100*1000
+	bool   zerovals[] = {false, true};
+	size_t sizevals[] = {1, 8, 100, 1000, 100 * 1000
 	/*
 	 * Memory allocation failure is a real possibility in 32-bit mode.
 	 * Rather than try to check in the face of resource exhaustion, we just
@@ -75,49 +74,49 @@ TEST_BEGIN(test_junk_alloc_free) {
 	 * mechanisms; but this is in fact the case.
 	 */
 #if LG_SIZEOF_PTR == 3
-		    , 10 * 1000 * 1000
+	    ,
+	    10 * 1000 * 1000
 #endif
 	};
-	size_t lg_alignvals[] = {
-		0, 4, 10, 15, 16, LG_PAGE
+	size_t lg_alignvals[] = {0, 4, 10, 15, 16, LG_PAGE
 #if LG_SIZEOF_PTR == 3
-		    , 20, 24
+	    ,
+	    20, 24
 #endif
 	};
 
-#define JUNK_FREE(...)							\
-	do {								\
-		do_allocs(size, zero, lg_align);			\
-		for (size_t n = 0; n < ptr_ind; n++) {			\
-			void *ptr = ptrs[n];				\
-			__VA_ARGS__;					\
-			if (opt_junk_free) {				\
-				assert_ptr_eq(ptr, last_junked_ptr,	\
-				    "");				\
-				assert_zu_eq(usize, last_junked_usize,	\
-				    "");				\
-			}						\
-			reset();					\
-		}							\
+#define JUNK_FREE(...)                                                         \
+	do {                                                                   \
+		do_allocs(size, zero, lg_align);                               \
+		for (size_t n = 0; n < ptr_ind; n++) {                         \
+			void *ptr = ptrs[n];                                   \
+			__VA_ARGS__;                                           \
+			if (opt_junk_free) {                                   \
+				assert_ptr_eq(ptr, last_junked_ptr, "");       \
+				assert_zu_eq(usize, last_junked_usize, "");    \
+			}                                                      \
+			reset();                                               \
+		}                                                              \
 	} while (0)
 	for (size_t i = 0; i < arraylen(zerovals); i++) {
 		for (size_t j = 0; j < arraylen(sizevals); j++) {
 			for (size_t k = 0; k < arraylen(lg_alignvals); k++) {
-				bool zero = zerovals[i];
+				bool   zero = zerovals[i];
 				size_t size = sizevals[j];
 				size_t lg_align = lg_alignvals[k];
-				size_t usize = nallocx(size,
-				    MALLOCX_LG_ALIGN(lg_align));
+				size_t usize = nallocx(
+				    size, MALLOCX_LG_ALIGN(lg_align));
 
 				JUNK_FREE(free(ptr));
 				JUNK_FREE(dallocx(ptr, 0));
 				JUNK_FREE(dallocx(ptr, MALLOCX_TCACHE_NONE));
-				JUNK_FREE(dallocx(ptr, MALLOCX_LG_ALIGN(
-				    lg_align)));
-				JUNK_FREE(sdallocx(ptr, usize, MALLOCX_LG_ALIGN(
-				    lg_align)));
+				JUNK_FREE(
+				    dallocx(ptr, MALLOCX_LG_ALIGN(lg_align)));
+				JUNK_FREE(sdallocx(
+				    ptr, usize, MALLOCX_LG_ALIGN(lg_align)));
 				JUNK_FREE(sdallocx(ptr, usize,
-				    MALLOCX_TCACHE_NONE | MALLOCX_LG_ALIGN(lg_align)));
+				    MALLOCX_TCACHE_NONE
+				        | MALLOCX_LG_ALIGN(lg_align)));
 				if (opt_zero_realloc_action
 				    == zero_realloc_action_free) {
 					JUNK_FREE(realloc(ptr, 0));
@@ -138,24 +137,24 @@ TEST_BEGIN(test_realloc_expand) {
 	ptr = malloc(SC_SMALL_MAXCLASS);
 	expanded = realloc(ptr, SC_LARGE_MINCLASS);
 	expect_ptr_eq(last_junked_ptr, &expanded[SC_SMALL_MAXCLASS], "");
-	expect_zu_eq(last_junked_usize,
-	    SC_LARGE_MINCLASS - SC_SMALL_MAXCLASS, "");
+	expect_zu_eq(
+	    last_junked_usize, SC_LARGE_MINCLASS - SC_SMALL_MAXCLASS, "");
 	free(expanded);
 
 	/* rallocx(..., 0) */
 	ptr = malloc(SC_SMALL_MAXCLASS);
 	expanded = rallocx(ptr, SC_LARGE_MINCLASS, 0);
 	expect_ptr_eq(last_junked_ptr, &expanded[SC_SMALL_MAXCLASS], "");
-	expect_zu_eq(last_junked_usize,
-	    SC_LARGE_MINCLASS - SC_SMALL_MAXCLASS, "");
+	expect_zu_eq(
+	    last_junked_usize, SC_LARGE_MINCLASS - SC_SMALL_MAXCLASS, "");
 	free(expanded);
 
 	/* rallocx(..., nonzero) */
 	ptr = malloc(SC_SMALL_MAXCLASS);
 	expanded = rallocx(ptr, SC_LARGE_MINCLASS, MALLOCX_TCACHE_NONE);
 	expect_ptr_eq(last_junked_ptr, &expanded[SC_SMALL_MAXCLASS], "");
-	expect_zu_eq(last_junked_usize,
-	    SC_LARGE_MINCLASS - SC_SMALL_MAXCLASS, "");
+	expect_zu_eq(
+	    last_junked_usize, SC_LARGE_MINCLASS - SC_SMALL_MAXCLASS, "");
 	free(expanded);
 
 	/* rallocx(..., MALLOCX_ZERO) */
@@ -189,7 +188,5 @@ main(void) {
 	 * We check the last pointer junked.  If a reentrant call happens, that
 	 * might be an internal allocation.
 	 */
-	return test_no_reentrancy(
-	    test_junk_alloc_free,
-	    test_realloc_expand);
+	return test_no_reentrancy(test_junk_alloc_free, test_realloc_expand);
 }
diff --git a/test/unit/log.c b/test/unit/log.c
index c09b5896..bf4ee1ff 100644
--- a/test/unit/log.c
+++ b/test/unit/log.c
@@ -18,16 +18,13 @@ expect_no_logging(const char *names) {
 	int count = 0;
 
 	for (int i = 0; i < 10; i++) {
-		log_do_begin(log_l1)
-			count++;
+		log_do_begin(log_l1) count++;
 		log_do_end(log_l1)
 
-		log_do_begin(log_l2)
-			count++;
+		    log_do_begin(log_l2) count++;
 		log_do_end(log_l2)
 
-		log_do_begin(log_l2_a)
-			count++;
+		    log_do_begin(log_l2_a) count++;
 		log_do_end(log_l2_a)
 	}
 	expect_d_eq(count, 0, "Disabled logging not ignored!");
@@ -57,8 +54,7 @@ TEST_BEGIN(test_log_enabled_direct) {
 	count = 0;
 	update_log_var_names("l1");
 	for (int i = 0; i < 10; i++) {
-		log_do_begin(log_l1)
-			count++;
+		log_do_begin(log_l1) count++;
 		log_do_end(log_l1)
 	}
 	expect_d_eq(count, 10, "Mis-logged!");
@@ -66,8 +62,7 @@ TEST_BEGIN(test_log_enabled_direct) {
 	count = 0;
 	update_log_var_names("l1.a");
 	for (int i = 0; i < 10; i++) {
-		log_do_begin(log_l1_a)
-			count++;
+		log_do_begin(log_l1_a) count++;
 		log_do_end(log_l1_a)
 	}
 	expect_d_eq(count, 10, "Mis-logged!");
@@ -75,12 +70,10 @@ TEST_BEGIN(test_log_enabled_direct) {
 	count = 0;
 	update_log_var_names("l1.a|abc|l2|def");
 	for (int i = 0; i < 10; i++) {
-		log_do_begin(log_l1_a)
-			count++;
+		log_do_begin(log_l1_a) count++;
 		log_do_end(log_l1_a)
 
-		log_do_begin(log_l2)
-			count++;
+		    log_do_begin(log_l2) count++;
 		log_do_end(log_l2)
 	}
 	expect_d_eq(count, 20, "Mis-logged!");
@@ -108,28 +101,22 @@ TEST_BEGIN(test_log_enabled_indirect) {
 	/* 4 are on total, so should sum to 40. */
 	int count = 0;
 	for (int i = 0; i < 10; i++) {
-		log_do_begin(log_l1)
-			count++;
+		log_do_begin(log_l1) count++;
 		log_do_end(log_l1)
 
-		log_do_begin(log_l1a)
-			count++;
+		    log_do_begin(log_l1a) count++;
 		log_do_end(log_l1a)
 
-		log_do_begin(log_l1_a)
-			count++;
+		    log_do_begin(log_l1_a) count++;
 		log_do_end(log_l1_a)
 
-		log_do_begin(log_l2_a)
-			count++;
+		    log_do_begin(log_l2_a) count++;
 		log_do_end(log_l2_a)
 
-		log_do_begin(log_l2_b_a)
-			count++;
+		    log_do_begin(log_l2_b_a) count++;
 		log_do_end(log_l2_b_a)
 
-		log_do_begin(log_l2_b_b)
-			count++;
+		    log_do_begin(log_l2_b_b) count++;
 		log_do_end(log_l2_b_b)
 	}
 
@@ -147,12 +134,10 @@ TEST_BEGIN(test_log_enabled_global) {
 
 	int count = 0;
 	for (int i = 0; i < 10; i++) {
-		log_do_begin(log_l1)
-		    count++;
+		log_do_begin(log_l1) count++;
 		log_do_end(log_l1)
 
-		log_do_begin(log_l2_a_a)
-		    count++;
+		    log_do_begin(log_l2_a_a) count++;
 		log_do_end(log_l2_a_a)
 	}
 	expect_d_eq(count, 20, "Mis-logged!");
@@ -167,8 +152,7 @@ TEST_BEGIN(test_logs_if_no_init) {
 
 	int count = 0;
 	for (int i = 0; i < 10; i++) {
-		log_do_begin(l)
-			count++;
+		log_do_begin(l) count++;
 		log_do_end(l)
 	}
 	expect_d_eq(count, 0, "Logging shouldn't happen if not initialized.");
@@ -188,11 +172,7 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_log_disabled,
-	    test_log_enabled_direct,
-	    test_log_enabled_indirect,
-	    test_log_enabled_global,
-	    test_logs_if_no_init,
-	    test_log_only_format_string);
+	return test(test_log_disabled, test_log_enabled_direct,
+	    test_log_enabled_indirect, test_log_enabled_global,
+	    test_logs_if_no_init, test_log_only_format_string);
 }
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 838a4445..ac7506cf 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -6,26 +6,27 @@
 
 TEST_BEGIN(test_mallctl_errors) {
 	uint64_t epoch;
-	size_t sz;
+	size_t   sz;
 
 	expect_d_eq(mallctl("no_such_name", NULL, NULL, NULL, 0), ENOENT,
 	    "mallctl() should return ENOENT for non-existent names");
 
 	expect_d_eq(mallctl("version", NULL, NULL, "0.0.0", strlen("0.0.0")),
-	    EPERM, "mallctl() should return EPERM on attempt to write "
+	    EPERM,
+	    "mallctl() should return EPERM on attempt to write "
 	    "read-only value");
 
-	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch,
-	    sizeof(epoch)-1), EINVAL,
-	    "mallctl() should return EINVAL for input size mismatch");
-	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch,
-	    sizeof(epoch)+1), EINVAL,
-	    "mallctl() should return EINVAL for input size mismatch");
+	expect_d_eq(
+	    mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch) - 1),
+	    EINVAL, "mallctl() should return EINVAL for input size mismatch");
+	expect_d_eq(
+	    mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch) + 1),
+	    EINVAL, "mallctl() should return EINVAL for input size mismatch");
 
-	sz = sizeof(epoch)-1;
+	sz = sizeof(epoch) - 1;
 	expect_d_eq(mallctl("epoch", (void *)&epoch, &sz, NULL, 0), EINVAL,
 	    "mallctl() should return EINVAL for output size mismatch");
-	sz = sizeof(epoch)+1;
+	sz = sizeof(epoch) + 1;
 	expect_d_eq(mallctl("epoch", (void *)&epoch, &sz, NULL, 0), EINVAL,
 	    "mallctl() should return EINVAL for output size mismatch");
 }
@@ -35,7 +36,7 @@ TEST_BEGIN(test_mallctlnametomib_errors) {
 	size_t mib[1];
 	size_t miblen;
 
-	miblen = sizeof(mib)/sizeof(size_t);
+	miblen = sizeof(mib) / sizeof(size_t);
 	expect_d_eq(mallctlnametomib("no_such_name", mib, &miblen), ENOENT,
 	    "mallctlnametomib() should return ENOENT for non-existent names");
 }
@@ -43,34 +44,38 @@ TEST_END
 
 TEST_BEGIN(test_mallctlbymib_errors) {
 	uint64_t epoch;
-	size_t sz;
-	size_t mib[1];
-	size_t miblen;
+	size_t   sz;
+	size_t   mib[1];
+	size_t   miblen;
 
-	miblen = sizeof(mib)/sizeof(size_t);
+	miblen = sizeof(mib) / sizeof(size_t);
 	expect_d_eq(mallctlnametomib("version", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 
-	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, "0.0.0",
-	    strlen("0.0.0")), EPERM, "mallctl() should return EPERM on "
+	expect_d_eq(
+	    mallctlbymib(mib, miblen, NULL, NULL, "0.0.0", strlen("0.0.0")),
+	    EPERM,
+	    "mallctl() should return EPERM on "
 	    "attempt to write read-only value");
 
-	miblen = sizeof(mib)/sizeof(size_t);
+	miblen = sizeof(mib) / sizeof(size_t);
 	expect_d_eq(mallctlnametomib("epoch", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 
 	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, (void *)&epoch,
-	    sizeof(epoch)-1), EINVAL,
+	                sizeof(epoch) - 1),
+	    EINVAL,
 	    "mallctlbymib() should return EINVAL for input size mismatch");
 	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, (void *)&epoch,
-	    sizeof(epoch)+1), EINVAL,
+	                sizeof(epoch) + 1),
+	    EINVAL,
 	    "mallctlbymib() should return EINVAL for input size mismatch");
 
-	sz = sizeof(epoch)-1;
+	sz = sizeof(epoch) - 1;
 	expect_d_eq(mallctlbymib(mib, miblen, (void *)&epoch, &sz, NULL, 0),
 	    EINVAL,
 	    "mallctlbymib() should return EINVAL for output size mismatch");
-	sz = sizeof(epoch)+1;
+	sz = sizeof(epoch) + 1;
 	expect_d_eq(mallctlbymib(mib, miblen, (void *)&epoch, &sz, NULL, 0),
 	    EINVAL,
 	    "mallctlbymib() should return EINVAL for output size mismatch");
@@ -79,7 +84,7 @@ TEST_END
 
 TEST_BEGIN(test_mallctl_read_write) {
 	uint64_t old_epoch, new_epoch;
-	size_t sz = sizeof(old_epoch);
+	size_t   sz = sizeof(old_epoch);
 
 	/* Blind. */
 	expect_d_eq(mallctl("epoch", NULL, NULL, NULL, 0), 0,
@@ -92,14 +97,15 @@ TEST_BEGIN(test_mallctl_read_write) {
 	expect_zu_eq(sz, sizeof(old_epoch), "Unexpected output size");
 
 	/* Write. */
-	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&new_epoch,
-	    sizeof(new_epoch)), 0, "Unexpected mallctl() failure");
+	expect_d_eq(
+	    mallctl("epoch", NULL, NULL, (void *)&new_epoch, sizeof(new_epoch)),
+	    0, "Unexpected mallctl() failure");
 	expect_zu_eq(sz, sizeof(old_epoch), "Unexpected output size");
 
 	/* Read+write. */
 	expect_d_eq(mallctl("epoch", (void *)&old_epoch, &sz,
-	    (void *)&new_epoch, sizeof(new_epoch)), 0,
-	    "Unexpected mallctl() failure");
+	                (void *)&new_epoch, sizeof(new_epoch)),
+	    0, "Unexpected mallctl() failure");
 	expect_zu_eq(sz, sizeof(old_epoch), "Unexpected output size");
 }
 TEST_END
@@ -133,10 +139,10 @@ TEST_BEGIN(test_mallctlnametomib_short_name) {
 TEST_END
 
 TEST_BEGIN(test_mallctlmibnametomib) {
-	size_t mib[4];
-	size_t miblen = 4;
+	size_t   mib[4];
+	size_t   miblen = 4;
 	uint32_t result, result_ref;
-	size_t len_result = sizeof(uint32_t);
+	size_t   len_result = sizeof(uint32_t);
 
 	tsd_t *tsd = tsd_fetch();
 
@@ -178,20 +184,21 @@ TEST_BEGIN(test_mallctlmibnametomib) {
 	/* Valid case. */
 	assert_d_eq(ctl_mibnametomib(tsd, mib, 3, "nregs", &miblen), 0, "");
 	assert_zu_eq(miblen, 4, "");
-	assert_d_eq(mallctlbymib(mib, miblen, &result, &len_result, NULL, 0),
-	    0, "Unexpected mallctlbymib() failure");
-	assert_d_eq(mallctl("arenas.bin.0.nregs", &result_ref, &len_result,
-	    NULL, 0), 0, "Unexpected mallctl() failure");
+	assert_d_eq(mallctlbymib(mib, miblen, &result, &len_result, NULL, 0), 0,
+	    "Unexpected mallctlbymib() failure");
+	assert_d_eq(
+	    mallctl("arenas.bin.0.nregs", &result_ref, &len_result, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
 	expect_zu_eq(result, result_ref,
 	    "mallctlbymib() and mallctl() returned different result");
 }
 TEST_END
 
 TEST_BEGIN(test_mallctlbymibname) {
-	size_t mib[4];
-	size_t miblen = 4;
+	size_t   mib[4];
+	size_t   miblen = 4;
 	uint32_t result, result_ref;
-	size_t len_result = sizeof(uint32_t);
+	size_t   len_result = sizeof(uint32_t);
 
 	tsd_t *tsd = tsd_fetch();
 
@@ -202,50 +209,60 @@ TEST_BEGIN(test_mallctlbymibname) {
 	assert_zu_eq(miblen, 1, "");
 
 	miblen = 4;
-	assert_d_eq(ctl_bymibname(tsd, mib, 1, "bin.0", &miblen,
-	    &result, &len_result, NULL, 0), ENOENT, "");
+	assert_d_eq(ctl_bymibname(tsd, mib, 1, "bin.0", &miblen, &result,
+	                &len_result, NULL, 0),
+	    ENOENT, "");
 	miblen = 4;
-	assert_d_eq(ctl_bymibname(tsd, mib, 1, "bin.0.bob", &miblen,
-	    &result, &len_result, NULL, 0), ENOENT, "");
+	assert_d_eq(ctl_bymibname(tsd, mib, 1, "bin.0.bob", &miblen, &result,
+	                &len_result, NULL, 0),
+	    ENOENT, "");
 	assert_zu_eq(miblen, 4, "");
 
 	/* Valid cases. */
 
-	assert_d_eq(mallctl("arenas.bin.0.nregs", &result_ref, &len_result,
-	    NULL, 0), 0, "Unexpected mallctl() failure");
+	assert_d_eq(
+	    mallctl("arenas.bin.0.nregs", &result_ref, &len_result, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
 	miblen = 4;
 
 	assert_d_eq(ctl_bymibname(tsd, mib, 0, "arenas.bin.0.nregs", &miblen,
-	    &result, &len_result, NULL, 0), 0, "");
+	                &result, &len_result, NULL, 0),
+	    0, "");
 	assert_zu_eq(miblen, 4, "");
 	expect_zu_eq(result, result_ref, "Unexpected result");
 
 	assert_d_eq(ctl_bymibname(tsd, mib, 1, "bin.0.nregs", &miblen, &result,
-	    &len_result, NULL, 0), 0, "");
+	                &len_result, NULL, 0),
+	    0, "");
 	assert_zu_eq(miblen, 4, "");
 	expect_zu_eq(result, result_ref, "Unexpected result");
 
 	assert_d_eq(ctl_bymibname(tsd, mib, 2, "0.nregs", &miblen, &result,
-	    &len_result, NULL, 0), 0, "");
+	                &len_result, NULL, 0),
+	    0, "");
 	assert_zu_eq(miblen, 4, "");
 	expect_zu_eq(result, result_ref, "Unexpected result");
 
 	assert_d_eq(ctl_bymibname(tsd, mib, 3, "nregs", &miblen, &result,
-	    &len_result, NULL, 0), 0, "");
+	                &len_result, NULL, 0),
+	    0, "");
 	assert_zu_eq(miblen, 4, "");
 	expect_zu_eq(result, result_ref, "Unexpected result");
 }
 TEST_END
 
 TEST_BEGIN(test_mallctl_config) {
-#define TEST_MALLCTL_CONFIG(config, t) do {				\
-	t oldval;							\
-	size_t sz = sizeof(oldval);					\
-	expect_d_eq(mallctl("config."#config, (void *)&oldval, &sz,	\
-	    NULL, 0), 0, "Unexpected mallctl() failure");		\
-	expect_b_eq(oldval, config_##config, "Incorrect config value");	\
-	expect_zu_eq(sz, sizeof(oldval), "Unexpected output size");	\
-} while (0)
+#define TEST_MALLCTL_CONFIG(config, t)                                         \
+	do {                                                                   \
+		t      oldval;                                                 \
+		size_t sz = sizeof(oldval);                                    \
+		expect_d_eq(                                                   \
+		    mallctl("config." #config, (void *)&oldval, &sz, NULL, 0), \
+		    0, "Unexpected mallctl() failure");                        \
+		expect_b_eq(                                                   \
+		    oldval, config_##config, "Incorrect config value");        \
+		expect_zu_eq(sz, sizeof(oldval), "Unexpected output size");    \
+	} while (0)
 
 	TEST_MALLCTL_CONFIG(cache_oblivious, bool);
 	TEST_MALLCTL_CONFIG(debug, bool);
@@ -267,16 +284,17 @@ TEST_END
 TEST_BEGIN(test_mallctl_opt) {
 	bool config_always = true;
 
-#define TEST_MALLCTL_OPT(t, opt, config) do {				\
-	t oldval;							\
-	size_t sz = sizeof(oldval);					\
-	int expected = config_##config ? 0 : ENOENT;			\
-	int result = mallctl("opt."#opt, (void *)&oldval, &sz, NULL,	\
-	    0);								\
-	expect_d_eq(result, expected,					\
-	    "Unexpected mallctl() result for opt."#opt);		\
-	expect_zu_eq(sz, sizeof(oldval), "Unexpected output size");	\
-} while (0)
+#define TEST_MALLCTL_OPT(t, opt, config)                                       \
+	do {                                                                   \
+		t      oldval;                                                 \
+		size_t sz = sizeof(oldval);                                    \
+		int    expected = config_##config ? 0 : ENOENT;                \
+		int    result = mallctl(                                       \
+                    "opt." #opt, (void *)&oldval, &sz, NULL, 0);            \
+		expect_d_eq(result, expected,                                  \
+		    "Unexpected mallctl() result for opt." #opt);              \
+		expect_zu_eq(sz, sizeof(oldval), "Unexpected output size");    \
+	} while (0)
 
 	TEST_MALLCTL_OPT(bool, abort, always);
 	TEST_MALLCTL_OPT(bool, abort_conf, always);
@@ -341,8 +359,8 @@ TEST_END
 
 TEST_BEGIN(test_manpage_example) {
 	unsigned nbins, i;
-	size_t mib[4];
-	size_t len, miblen;
+	size_t   mib[4];
+	size_t   len, miblen;
 
 	len = sizeof(nbins);
 	expect_d_eq(mallctl("arenas.nbins", (void *)&nbins, &len, NULL, 0), 0,
@@ -356,8 +374,9 @@ TEST_BEGIN(test_manpage_example) {
 
 		mib[2] = i;
 		len = sizeof(bin_size);
-		expect_d_eq(mallctlbymib(mib, miblen, (void *)&bin_size, &len,
-		    NULL, 0), 0, "Unexpected mallctlbymib() failure");
+		expect_d_eq(
+		    mallctlbymib(mib, miblen, (void *)&bin_size, &len, NULL, 0),
+		    0, "Unexpected mallctlbymib() failure");
 		/* Do something with bin_size... */
 	}
 }
@@ -380,8 +399,8 @@ TEST_BEGIN(test_tcache_none) {
 	void *p1 = mallocx(42, 0);
 	expect_ptr_not_null(p1, "Unexpected mallocx() failure");
 	if (!opt_prof && !san_uaf_detection_enabled()) {
-		expect_ptr_eq(p0, p1,
-		    "Expected tcache to allocate cached region");
+		expect_ptr_eq(
+		    p0, p1, "Expected tcache to allocate cached region");
 	}
 
 	/* Clean up. */
@@ -390,12 +409,12 @@ TEST_BEGIN(test_tcache_none) {
 TEST_END
 
 TEST_BEGIN(test_tcache) {
-#define NTCACHES	10
+#define NTCACHES 10
 	unsigned tis[NTCACHES];
-	void *ps[NTCACHES];
-	void *qs[NTCACHES];
+	void    *ps[NTCACHES];
+	void    *qs[NTCACHES];
 	unsigned i;
-	size_t sz, psz, qsz;
+	size_t   sz, psz, qsz;
 
 	psz = 42;
 	qsz = nallocx(psz, 0) + 1;
@@ -403,39 +422,41 @@ TEST_BEGIN(test_tcache) {
 	/* Create tcaches. */
 	for (i = 0; i < NTCACHES; i++) {
 		sz = sizeof(unsigned);
-		expect_d_eq(mallctl("tcache.create", (void *)&tis[i], &sz, NULL,
-		    0), 0, "Unexpected mallctl() failure, i=%u", i);
+		expect_d_eq(
+		    mallctl("tcache.create", (void *)&tis[i], &sz, NULL, 0), 0,
+		    "Unexpected mallctl() failure, i=%u", i);
 	}
 
 	/* Exercise tcache ID recycling. */
 	for (i = 0; i < NTCACHES; i++) {
 		expect_d_eq(mallctl("tcache.destroy", NULL, NULL,
-		    (void *)&tis[i], sizeof(unsigned)), 0,
-		    "Unexpected mallctl() failure, i=%u", i);
+		                (void *)&tis[i], sizeof(unsigned)),
+		    0, "Unexpected mallctl() failure, i=%u", i);
 	}
 	for (i = 0; i < NTCACHES; i++) {
 		sz = sizeof(unsigned);
-		expect_d_eq(mallctl("tcache.create", (void *)&tis[i], &sz, NULL,
-		    0), 0, "Unexpected mallctl() failure, i=%u", i);
+		expect_d_eq(
+		    mallctl("tcache.create", (void *)&tis[i], &sz, NULL, 0), 0,
+		    "Unexpected mallctl() failure, i=%u", i);
 	}
 
 	/* Flush empty tcaches. */
 	for (i = 0; i < NTCACHES; i++) {
 		expect_d_eq(mallctl("tcache.flush", NULL, NULL, (void *)&tis[i],
-		    sizeof(unsigned)), 0, "Unexpected mallctl() failure, i=%u",
-		    i);
+		                sizeof(unsigned)),
+		    0, "Unexpected mallctl() failure, i=%u", i);
 	}
 
 	/* Cache some allocations. */
 	for (i = 0; i < NTCACHES; i++) {
 		ps[i] = mallocx(psz, MALLOCX_TCACHE(tis[i]));
-		expect_ptr_not_null(ps[i], "Unexpected mallocx() failure, i=%u",
-		    i);
+		expect_ptr_not_null(
+		    ps[i], "Unexpected mallocx() failure, i=%u", i);
 		dallocx(ps[i], MALLOCX_TCACHE(tis[i]));
 
 		qs[i] = mallocx(qsz, MALLOCX_TCACHE(tis[i]));
-		expect_ptr_not_null(qs[i], "Unexpected mallocx() failure, i=%u",
-		    i);
+		expect_ptr_not_null(
+		    qs[i], "Unexpected mallocx() failure, i=%u", i);
 		dallocx(qs[i], MALLOCX_TCACHE(tis[i]));
 	}
 
@@ -443,11 +464,13 @@ TEST_BEGIN(test_tcache) {
 	for (i = 0; i < NTCACHES; i++) {
 		void *p0 = ps[i];
 		ps[i] = mallocx(psz, MALLOCX_TCACHE(tis[i]));
-		expect_ptr_not_null(ps[i], "Unexpected mallocx() failure, i=%u",
-		    i);
+		expect_ptr_not_null(
+		    ps[i], "Unexpected mallocx() failure, i=%u", i);
 		if (!san_uaf_detection_enabled()) {
-			expect_ptr_eq(ps[i], p0, "Expected mallocx() to "
-			    "allocate cached region, i=%u", i);
+			expect_ptr_eq(ps[i], p0,
+			    "Expected mallocx() to "
+			    "allocate cached region, i=%u",
+			    i);
 		}
 	}
 
@@ -455,11 +478,13 @@ TEST_BEGIN(test_tcache) {
 	for (i = 0; i < NTCACHES; i++) {
 		void *q0 = qs[i];
 		qs[i] = rallocx(ps[i], qsz, MALLOCX_TCACHE(tis[i]));
-		expect_ptr_not_null(qs[i], "Unexpected rallocx() failure, i=%u",
-		    i);
+		expect_ptr_not_null(
+		    qs[i], "Unexpected rallocx() failure, i=%u", i);
 		if (!san_uaf_detection_enabled()) {
-			expect_ptr_eq(qs[i], q0, "Expected rallocx() to "
-			    "allocate cached region, i=%u", i);
+			expect_ptr_eq(qs[i], q0,
+			    "Expected rallocx() to "
+			    "allocate cached region, i=%u",
+			    i);
 		}
 		/* Avoid undefined behavior in case of test failure. */
 		if (qs[i] == NULL) {
@@ -471,17 +496,17 @@ TEST_BEGIN(test_tcache) {
 	}
 
 	/* Flush some non-empty tcaches. */
-	for (i = 0; i < NTCACHES/2; i++) {
+	for (i = 0; i < NTCACHES / 2; i++) {
 		expect_d_eq(mallctl("tcache.flush", NULL, NULL, (void *)&tis[i],
-		    sizeof(unsigned)), 0, "Unexpected mallctl() failure, i=%u",
-		    i);
+		                sizeof(unsigned)),
+		    0, "Unexpected mallctl() failure, i=%u", i);
 	}
 
 	/* Destroy tcaches. */
 	for (i = 0; i < NTCACHES; i++) {
 		expect_d_eq(mallctl("tcache.destroy", NULL, NULL,
-		    (void *)&tis[i], sizeof(unsigned)), 0,
-		    "Unexpected mallctl() failure, i=%u", i);
+		                (void *)&tis[i], sizeof(unsigned)),
+		    0, "Unexpected mallctl() failure, i=%u", i);
 	}
 }
 TEST_END
@@ -490,7 +515,7 @@ TEST_BEGIN(test_thread_arena) {
 	unsigned old_arena_ind, new_arena_ind, narenas;
 
 	const char *opa;
-	size_t sz = sizeof(opa);
+	size_t      sz = sizeof(opa);
 	expect_d_eq(mallctl("opt.percpu_arena", (void *)&opa, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 
@@ -505,20 +530,23 @@ TEST_BEGIN(test_thread_arena) {
 	if (strcmp(opa, "disabled") == 0) {
 		new_arena_ind = narenas - 1;
 		expect_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz,
-		    (void *)&new_arena_ind, sizeof(unsigned)), 0,
-		    "Unexpected mallctl() failure");
+		                (void *)&new_arena_ind, sizeof(unsigned)),
+		    0, "Unexpected mallctl() failure");
 		new_arena_ind = 0;
 		expect_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz,
-		    (void *)&new_arena_ind, sizeof(unsigned)), 0,
-		    "Unexpected mallctl() failure");
+		                (void *)&new_arena_ind, sizeof(unsigned)),
+		    0, "Unexpected mallctl() failure");
 	} else {
 		expect_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz,
-		    NULL, 0), 0, "Unexpected mallctl() failure");
+		                NULL, 0),
+		    0, "Unexpected mallctl() failure");
 		new_arena_ind = percpu_arena_ind_limit(opt_percpu_arena) - 1;
 		if (old_arena_ind != new_arena_ind) {
-			expect_d_eq(mallctl("thread.arena",
-			    (void *)&old_arena_ind, &sz, (void *)&new_arena_ind,
-			    sizeof(unsigned)), EPERM, "thread.arena ctl "
+			expect_d_eq(
+			    mallctl("thread.arena", (void *)&old_arena_ind, &sz,
+			        (void *)&new_arena_ind, sizeof(unsigned)),
+			    EPERM,
+			    "thread.arena ctl "
 			    "should not be allowed with percpu arena");
 		}
 	}
@@ -527,10 +555,10 @@ TEST_END
 
 TEST_BEGIN(test_arena_i_initialized) {
 	unsigned narenas, i;
-	size_t sz;
-	size_t mib[3];
-	size_t miblen = sizeof(mib) / sizeof(size_t);
-	bool initialized;
+	size_t   sz;
+	size_t   mib[3];
+	size_t   miblen = sizeof(mib) / sizeof(size_t);
+	bool     initialized;
 
 	sz = sizeof(narenas);
 	expect_d_eq(mallctl("arenas.narenas", (void *)&narenas, &sz, NULL, 0),
@@ -541,8 +569,9 @@ TEST_BEGIN(test_arena_i_initialized) {
 	for (i = 0; i < narenas; i++) {
 		mib[1] = i;
 		sz = sizeof(initialized);
-		expect_d_eq(mallctlbymib(mib, miblen, &initialized, &sz, NULL,
-		    0), 0, "Unexpected mallctl() failure");
+		expect_d_eq(
+		    mallctlbymib(mib, miblen, &initialized, &sz, NULL, 0), 0,
+		    "Unexpected mallctl() failure");
 	}
 
 	mib[1] = MALLCTL_ARENAS_ALL;
@@ -554,10 +583,10 @@ TEST_BEGIN(test_arena_i_initialized) {
 
 	/* Equivalent to the above but using mallctl() directly. */
 	sz = sizeof(initialized);
-	expect_d_eq(mallctl(
-	    "arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".initialized",
-	    (void *)&initialized, &sz, NULL, 0), 0,
-	    "Unexpected mallctl() failure");
+	expect_d_eq(
+	    mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".initialized",
+	        (void *)&initialized, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
 	expect_true(initialized,
 	    "Merged arena statistics should always be initialized");
 }
@@ -565,30 +594,31 @@ TEST_END
 
 TEST_BEGIN(test_arena_i_dirty_decay_ms) {
 	ssize_t dirty_decay_ms, orig_dirty_decay_ms, prev_dirty_decay_ms;
-	size_t sz = sizeof(ssize_t);
+	size_t  sz = sizeof(ssize_t);
 
 	expect_d_eq(mallctl("arena.0.dirty_decay_ms",
-	    (void *)&orig_dirty_decay_ms, &sz, NULL, 0), 0,
-	    "Unexpected mallctl() failure");
+	                (void *)&orig_dirty_decay_ms, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
 
 	dirty_decay_ms = -2;
 	expect_d_eq(mallctl("arena.0.dirty_decay_ms", NULL, NULL,
-	    (void *)&dirty_decay_ms, sizeof(ssize_t)), EFAULT,
-	    "Unexpected mallctl() success");
+	                (void *)&dirty_decay_ms, sizeof(ssize_t)),
+	    EFAULT, "Unexpected mallctl() success");
 
 	dirty_decay_ms = 0x7fffffff;
 	expect_d_eq(mallctl("arena.0.dirty_decay_ms", NULL, NULL,
-	    (void *)&dirty_decay_ms, sizeof(ssize_t)), 0,
-	    "Unexpected mallctl() failure");
+	                (void *)&dirty_decay_ms, sizeof(ssize_t)),
+	    0, "Unexpected mallctl() failure");
 
 	for (prev_dirty_decay_ms = dirty_decay_ms, dirty_decay_ms = -1;
-	    dirty_decay_ms < 20; prev_dirty_decay_ms = dirty_decay_ms,
-	    dirty_decay_ms++) {
+	     dirty_decay_ms < 20;
+	     prev_dirty_decay_ms = dirty_decay_ms, dirty_decay_ms++) {
 		ssize_t old_dirty_decay_ms;
 
 		expect_d_eq(mallctl("arena.0.dirty_decay_ms",
-		    (void *)&old_dirty_decay_ms, &sz, (void *)&dirty_decay_ms,
-		    sizeof(ssize_t)), 0, "Unexpected mallctl() failure");
+		                (void *)&old_dirty_decay_ms, &sz,
+		                (void *)&dirty_decay_ms, sizeof(ssize_t)),
+		    0, "Unexpected mallctl() failure");
 		expect_zd_eq(old_dirty_decay_ms, prev_dirty_decay_ms,
 		    "Unexpected old arena.0.dirty_decay_ms");
 	}
@@ -597,30 +627,31 @@ TEST_END
 
 TEST_BEGIN(test_arena_i_muzzy_decay_ms) {
 	ssize_t muzzy_decay_ms, orig_muzzy_decay_ms, prev_muzzy_decay_ms;
-	size_t sz = sizeof(ssize_t);
+	size_t  sz = sizeof(ssize_t);
 
 	expect_d_eq(mallctl("arena.0.muzzy_decay_ms",
-	    (void *)&orig_muzzy_decay_ms, &sz, NULL, 0), 0,
-	    "Unexpected mallctl() failure");
+	                (void *)&orig_muzzy_decay_ms, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
 
 	muzzy_decay_ms = -2;
 	expect_d_eq(mallctl("arena.0.muzzy_decay_ms", NULL, NULL,
-	    (void *)&muzzy_decay_ms, sizeof(ssize_t)), EFAULT,
-	    "Unexpected mallctl() success");
+	                (void *)&muzzy_decay_ms, sizeof(ssize_t)),
+	    EFAULT, "Unexpected mallctl() success");
 
 	muzzy_decay_ms = 0x7fffffff;
 	expect_d_eq(mallctl("arena.0.muzzy_decay_ms", NULL, NULL,
-	    (void *)&muzzy_decay_ms, sizeof(ssize_t)), 0,
-	    "Unexpected mallctl() failure");
+	                (void *)&muzzy_decay_ms, sizeof(ssize_t)),
+	    0, "Unexpected mallctl() failure");
 
 	for (prev_muzzy_decay_ms = muzzy_decay_ms, muzzy_decay_ms = -1;
-	    muzzy_decay_ms < 20; prev_muzzy_decay_ms = muzzy_decay_ms,
-	    muzzy_decay_ms++) {
+	     muzzy_decay_ms < 20;
+	     prev_muzzy_decay_ms = muzzy_decay_ms, muzzy_decay_ms++) {
 		ssize_t old_muzzy_decay_ms;
 
 		expect_d_eq(mallctl("arena.0.muzzy_decay_ms",
-		    (void *)&old_muzzy_decay_ms, &sz, (void *)&muzzy_decay_ms,
-		    sizeof(ssize_t)), 0, "Unexpected mallctl() failure");
+		                (void *)&old_muzzy_decay_ms, &sz,
+		                (void *)&muzzy_decay_ms, sizeof(ssize_t)),
+		    0, "Unexpected mallctl() failure");
 		expect_zd_eq(old_muzzy_decay_ms, prev_muzzy_decay_ms,
 		    "Unexpected old arena.0.muzzy_decay_ms");
 	}
@@ -629,9 +660,9 @@ TEST_END
 
 TEST_BEGIN(test_arena_i_purge) {
 	unsigned narenas;
-	size_t sz = sizeof(unsigned);
-	size_t mib[3];
-	size_t miblen = 3;
+	size_t   sz = sizeof(unsigned);
+	size_t   mib[3];
+	size_t   miblen = 3;
 
 	expect_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
@@ -652,9 +683,9 @@ TEST_END
 
 TEST_BEGIN(test_arena_i_decay) {
 	unsigned narenas;
-	size_t sz = sizeof(unsigned);
-	size_t mib[3];
-	size_t miblen = 3;
+	size_t   sz = sizeof(unsigned);
+	size_t   mib[3];
+	size_t   miblen = 3;
 
 	expect_d_eq(mallctl("arena.0.decay", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
@@ -675,86 +706,89 @@ TEST_END
 
 TEST_BEGIN(test_arena_i_dss) {
 	const char *dss_prec_old, *dss_prec_new;
-	size_t sz = sizeof(dss_prec_old);
-	size_t mib[3];
-	size_t miblen;
+	size_t      sz = sizeof(dss_prec_old);
+	size_t      mib[3];
+	size_t      miblen;
 
-	miblen = sizeof(mib)/sizeof(size_t);
+	miblen = sizeof(mib) / sizeof(size_t);
 	expect_d_eq(mallctlnametomib("arena.0.dss", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() error");
 
 	dss_prec_new = "disabled";
 	expect_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_old, &sz,
-	    (void *)&dss_prec_new, sizeof(dss_prec_new)), 0,
-	    "Unexpected mallctl() failure");
-	expect_str_ne(dss_prec_old, "primary",
-	    "Unexpected default for dss precedence");
+	                (void *)&dss_prec_new, sizeof(dss_prec_new)),
+	    0, "Unexpected mallctl() failure");
+	expect_str_ne(
+	    dss_prec_old, "primary", "Unexpected default for dss precedence");
 
 	expect_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_new, &sz,
-	    (void *)&dss_prec_old, sizeof(dss_prec_old)), 0,
-	    "Unexpected mallctl() failure");
+	                (void *)&dss_prec_old, sizeof(dss_prec_old)),
+	    0, "Unexpected mallctl() failure");
 
-	expect_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_old, &sz, NULL,
-	    0), 0, "Unexpected mallctl() failure");
-	expect_str_ne(dss_prec_old, "primary",
-	    "Unexpected value for dss precedence");
+	expect_d_eq(
+	    mallctlbymib(mib, miblen, (void *)&dss_prec_old, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
+	expect_str_ne(
+	    dss_prec_old, "primary", "Unexpected value for dss precedence");
 
 	mib[1] = narenas_total_get();
 	dss_prec_new = "disabled";
 	expect_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_old, &sz,
-	    (void *)&dss_prec_new, sizeof(dss_prec_new)), 0,
-	    "Unexpected mallctl() failure");
-	expect_str_ne(dss_prec_old, "primary",
-	    "Unexpected default for dss precedence");
+	                (void *)&dss_prec_new, sizeof(dss_prec_new)),
+	    0, "Unexpected mallctl() failure");
+	expect_str_ne(
+	    dss_prec_old, "primary", "Unexpected default for dss precedence");
 
 	expect_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_new, &sz,
-	    (void *)&dss_prec_old, sizeof(dss_prec_new)), 0,
-	    "Unexpected mallctl() failure");
+	                (void *)&dss_prec_old, sizeof(dss_prec_new)),
+	    0, "Unexpected mallctl() failure");
 
-	expect_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_old, &sz, NULL,
-	    0), 0, "Unexpected mallctl() failure");
-	expect_str_ne(dss_prec_old, "primary",
-	    "Unexpected value for dss precedence");
+	expect_d_eq(
+	    mallctlbymib(mib, miblen, (void *)&dss_prec_old, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
+	expect_str_ne(
+	    dss_prec_old, "primary", "Unexpected value for dss precedence");
 }
 TEST_END
 
 TEST_BEGIN(test_arena_i_name) {
-	unsigned arena_ind;
-	size_t ind_sz = sizeof(arena_ind);
-	size_t mib[3];
-	size_t miblen;
-	char name_old[ARENA_NAME_LEN];
-	char *name_oldp = name_old;
-	size_t sz = sizeof(name_oldp);
-	char default_name[ARENA_NAME_LEN];
+	unsigned    arena_ind;
+	size_t      ind_sz = sizeof(arena_ind);
+	size_t      mib[3];
+	size_t      miblen;
+	char        name_old[ARENA_NAME_LEN];
+	char       *name_oldp = name_old;
+	size_t      sz = sizeof(name_oldp);
+	char        default_name[ARENA_NAME_LEN];
 	const char *name_new = "test name";
 	const char *super_long_name = "A name longer than ARENA_NAME_LEN";
-	size_t super_long_name_len = strlen(super_long_name);
+	size_t      super_long_name_len = strlen(super_long_name);
 	assert(super_long_name_len > ARENA_NAME_LEN);
 
-	miblen = sizeof(mib)/sizeof(size_t);
+	miblen = sizeof(mib) / sizeof(size_t);
 	expect_d_eq(mallctlnametomib("arena.0.name", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() error");
 
-	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &ind_sz, NULL,
-	    0), 0, "Unexpected mallctl() failure");
+	expect_d_eq(
+	    mallctl("arenas.create", (void *)&arena_ind, &ind_sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
 	mib[1] = arena_ind;
 
-	malloc_snprintf(default_name, sizeof(default_name), "manual_%u",
-	    arena_ind);
+	malloc_snprintf(
+	    default_name, sizeof(default_name), "manual_%u", arena_ind);
 	expect_d_eq(mallctlbymib(mib, miblen, (void *)&name_oldp, &sz,
-	    (void *)&name_new, sizeof(name_new)), 0,
-	    "Unexpected mallctl() failure");
-	expect_str_eq(name_old, default_name,
-	    "Unexpected default value for arena name");
+	                (void *)&name_new, sizeof(name_new)),
+	    0, "Unexpected mallctl() failure");
+	expect_str_eq(
+	    name_old, default_name, "Unexpected default value for arena name");
 
 	expect_d_eq(mallctlbymib(mib, miblen, (void *)&name_oldp, &sz,
-	    (void *)&super_long_name, sizeof(super_long_name)), 0,
-	    "Unexpected mallctl() failure");
+	                (void *)&super_long_name, sizeof(super_long_name)),
+	    0, "Unexpected mallctl() failure");
 	expect_str_eq(name_old, name_new, "Unexpected value for arena name");
 
-	expect_d_eq(mallctlbymib(mib, miblen, (void *)&name_oldp, &sz,
-	    NULL, 0), 0, "Unexpected mallctl() failure");
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&name_oldp, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
 	int cmp = strncmp(name_old, super_long_name, ARENA_NAME_LEN - 1);
 	expect_true(cmp == 0, "Unexpected value for long arena name ");
 }
@@ -765,14 +799,14 @@ TEST_BEGIN(test_arena_i_retain_grow_limit) {
 	size_t mib[3];
 	size_t miblen;
 
-	bool retain_enabled;
+	bool   retain_enabled;
 	size_t sz = sizeof(retain_enabled);
-	expect_d_eq(mallctl("opt.retain", &retain_enabled, &sz, NULL, 0),
-	    0, "Unexpected mallctl() failure");
+	expect_d_eq(mallctl("opt.retain", &retain_enabled, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
 	test_skip_if(!retain_enabled);
 
 	sz = sizeof(default_limit);
-	miblen = sizeof(mib)/sizeof(size_t);
+	miblen = sizeof(mib) / sizeof(size_t);
 	expect_d_eq(mallctlnametomib("arena.0.retain_grow_limit", mib, &miblen),
 	    0, "Unexpected mallctlnametomib() error");
 
@@ -782,58 +816,62 @@ TEST_BEGIN(test_arena_i_retain_grow_limit) {
 	    "Unexpected default for retain_grow_limit");
 
 	new_limit = PAGE - 1;
-	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, &new_limit,
-	    sizeof(new_limit)), EFAULT, "Unexpected mallctl() success");
+	expect_d_eq(mallctlbymib(
+	                mib, miblen, NULL, NULL, &new_limit, sizeof(new_limit)),
+	    EFAULT, "Unexpected mallctl() success");
 
 	new_limit = PAGE + 1;
-	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, &new_limit,
-	    sizeof(new_limit)), 0, "Unexpected mallctl() failure");
+	expect_d_eq(mallctlbymib(
+	                mib, miblen, NULL, NULL, &new_limit, sizeof(new_limit)),
+	    0, "Unexpected mallctl() failure");
 	expect_d_eq(mallctlbymib(mib, miblen, &old_limit, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
-	expect_zu_eq(old_limit, PAGE,
-	    "Unexpected value for retain_grow_limit");
+	expect_zu_eq(old_limit, PAGE, "Unexpected value for retain_grow_limit");
 
 	/* Expect grow less than psize class 10. */
 	new_limit = sz_pind2sz(10) - 1;
-	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, &new_limit,
-	    sizeof(new_limit)), 0, "Unexpected mallctl() failure");
+	expect_d_eq(mallctlbymib(
+	                mib, miblen, NULL, NULL, &new_limit, sizeof(new_limit)),
+	    0, "Unexpected mallctl() failure");
 	expect_d_eq(mallctlbymib(mib, miblen, &old_limit, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
-	expect_zu_eq(old_limit, sz_pind2sz(9),
-	    "Unexpected value for retain_grow_limit");
+	expect_zu_eq(
+	    old_limit, sz_pind2sz(9), "Unexpected value for retain_grow_limit");
 
 	/* Restore to default. */
 	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, &default_limit,
-	    sizeof(default_limit)), 0, "Unexpected mallctl() failure");
+	                sizeof(default_limit)),
+	    0, "Unexpected mallctl() failure");
 }
 TEST_END
 
 TEST_BEGIN(test_arenas_dirty_decay_ms) {
 	ssize_t dirty_decay_ms, orig_dirty_decay_ms, prev_dirty_decay_ms;
-	size_t sz = sizeof(ssize_t);
+	size_t  sz = sizeof(ssize_t);
 
 	expect_d_eq(mallctl("arenas.dirty_decay_ms",
-	    (void *)&orig_dirty_decay_ms, &sz, NULL, 0), 0,
-	    "Unexpected mallctl() failure");
+	                (void *)&orig_dirty_decay_ms, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
 
 	dirty_decay_ms = -2;
 	expect_d_eq(mallctl("arenas.dirty_decay_ms", NULL, NULL,
-	    (void *)&dirty_decay_ms, sizeof(ssize_t)), EFAULT,
-	    "Unexpected mallctl() success");
+	                (void *)&dirty_decay_ms, sizeof(ssize_t)),
+	    EFAULT, "Unexpected mallctl() success");
 
 	dirty_decay_ms = 0x7fffffff;
 	expect_d_eq(mallctl("arenas.dirty_decay_ms", NULL, NULL,
-	    (void *)&dirty_decay_ms, sizeof(ssize_t)), 0,
-	    "Expected mallctl() failure");
+	                (void *)&dirty_decay_ms, sizeof(ssize_t)),
+	    0, "Expected mallctl() failure");
 
 	for (prev_dirty_decay_ms = dirty_decay_ms, dirty_decay_ms = -1;
-	    dirty_decay_ms < 20; prev_dirty_decay_ms = dirty_decay_ms,
-	    dirty_decay_ms++) {
+	     dirty_decay_ms < 20;
+	     prev_dirty_decay_ms = dirty_decay_ms, dirty_decay_ms++) {
 		ssize_t old_dirty_decay_ms;
 
 		expect_d_eq(mallctl("arenas.dirty_decay_ms",
-		    (void *)&old_dirty_decay_ms, &sz, (void *)&dirty_decay_ms,
-		    sizeof(ssize_t)), 0, "Unexpected mallctl() failure");
+		                (void *)&old_dirty_decay_ms, &sz,
+		                (void *)&dirty_decay_ms, sizeof(ssize_t)),
+		    0, "Unexpected mallctl() failure");
 		expect_zd_eq(old_dirty_decay_ms, prev_dirty_decay_ms,
 		    "Unexpected old arenas.dirty_decay_ms");
 	}
@@ -842,30 +880,31 @@ TEST_END
 
 TEST_BEGIN(test_arenas_muzzy_decay_ms) {
 	ssize_t muzzy_decay_ms, orig_muzzy_decay_ms, prev_muzzy_decay_ms;
-	size_t sz = sizeof(ssize_t);
+	size_t  sz = sizeof(ssize_t);
 
 	expect_d_eq(mallctl("arenas.muzzy_decay_ms",
-	    (void *)&orig_muzzy_decay_ms, &sz, NULL, 0), 0,
-	    "Unexpected mallctl() failure");
+	                (void *)&orig_muzzy_decay_ms, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
 
 	muzzy_decay_ms = -2;
 	expect_d_eq(mallctl("arenas.muzzy_decay_ms", NULL, NULL,
-	    (void *)&muzzy_decay_ms, sizeof(ssize_t)), EFAULT,
-	    "Unexpected mallctl() success");
+	                (void *)&muzzy_decay_ms, sizeof(ssize_t)),
+	    EFAULT, "Unexpected mallctl() success");
 
 	muzzy_decay_ms = 0x7fffffff;
 	expect_d_eq(mallctl("arenas.muzzy_decay_ms", NULL, NULL,
-	    (void *)&muzzy_decay_ms, sizeof(ssize_t)), 0,
-	    "Expected mallctl() failure");
+	                (void *)&muzzy_decay_ms, sizeof(ssize_t)),
+	    0, "Expected mallctl() failure");
 
 	for (prev_muzzy_decay_ms = muzzy_decay_ms, muzzy_decay_ms = -1;
-	    muzzy_decay_ms < 20; prev_muzzy_decay_ms = muzzy_decay_ms,
-	    muzzy_decay_ms++) {
+	     muzzy_decay_ms < 20;
+	     prev_muzzy_decay_ms = muzzy_decay_ms, muzzy_decay_ms++) {
 		ssize_t old_muzzy_decay_ms;
 
 		expect_d_eq(mallctl("arenas.muzzy_decay_ms",
-		    (void *)&old_muzzy_decay_ms, &sz, (void *)&muzzy_decay_ms,
-		    sizeof(ssize_t)), 0, "Unexpected mallctl() failure");
+		                (void *)&old_muzzy_decay_ms, &sz,
+		                (void *)&muzzy_decay_ms, sizeof(ssize_t)),
+		    0, "Unexpected mallctl() failure");
 		expect_zd_eq(old_muzzy_decay_ms, prev_muzzy_decay_ms,
 		    "Unexpected old arenas.muzzy_decay_ms");
 	}
@@ -873,13 +912,15 @@ TEST_BEGIN(test_arenas_muzzy_decay_ms) {
 TEST_END
 
 TEST_BEGIN(test_arenas_constants) {
-#define TEST_ARENAS_CONSTANT(t, name, expected) do {			\
-	t name;								\
-	size_t sz = sizeof(t);						\
-	expect_d_eq(mallctl("arenas."#name, (void *)&name, &sz, NULL,	\
-	    0), 0, "Unexpected mallctl() failure");			\
-	expect_zu_eq(name, expected, "Incorrect "#name" size");		\
-} while (0)
+#define TEST_ARENAS_CONSTANT(t, name, expected)                                \
+	do {                                                                   \
+		t      name;                                                   \
+		size_t sz = sizeof(t);                                         \
+		expect_d_eq(                                                   \
+		    mallctl("arenas." #name, (void *)&name, &sz, NULL, 0), 0,  \
+		    "Unexpected mallctl() failure");                           \
+		expect_zu_eq(name, expected, "Incorrect " #name " size");      \
+	} while (0)
 
 	TEST_ARENAS_CONSTANT(size_t, quantum, QUANTUM);
 	TEST_ARENAS_CONSTANT(size_t, page, PAGE);
@@ -892,18 +933,19 @@ TEST_BEGIN(test_arenas_constants) {
 TEST_END
 
 TEST_BEGIN(test_arenas_bin_constants) {
-#define TEST_ARENAS_BIN_CONSTANT(t, name, expected) do {		\
-	t name;								\
-	size_t sz = sizeof(t);						\
-	expect_d_eq(mallctl("arenas.bin.0."#name, (void *)&name, &sz,	\
-	    NULL, 0), 0, "Unexpected mallctl() failure");		\
-	expect_zu_eq(name, expected, "Incorrect "#name" size");		\
-} while (0)
+#define TEST_ARENAS_BIN_CONSTANT(t, name, expected)                            \
+	do {                                                                   \
+		t      name;                                                   \
+		size_t sz = sizeof(t);                                         \
+		expect_d_eq(mallctl("arenas.bin.0." #name, (void *)&name, &sz, \
+		                NULL, 0),                                      \
+		    0, "Unexpected mallctl() failure");                        \
+		expect_zu_eq(name, expected, "Incorrect " #name " size");      \
+	} while (0)
 
 	TEST_ARENAS_BIN_CONSTANT(size_t, size, bin_infos[0].reg_size);
 	TEST_ARENAS_BIN_CONSTANT(uint32_t, nregs, bin_infos[0].nregs);
-	TEST_ARENAS_BIN_CONSTANT(size_t, slab_size,
-	    bin_infos[0].slab_size);
+	TEST_ARENAS_BIN_CONSTANT(size_t, slab_size, bin_infos[0].slab_size);
 	TEST_ARENAS_BIN_CONSTANT(uint32_t, nshards, bin_infos[0].n_shards);
 
 #undef TEST_ARENAS_BIN_CONSTANT
@@ -911,16 +953,17 @@ TEST_BEGIN(test_arenas_bin_constants) {
 TEST_END
 
 TEST_BEGIN(test_arenas_lextent_constants) {
-#define TEST_ARENAS_LEXTENT_CONSTANT(t, name, expected) do {		\
-	t name;								\
-	size_t sz = sizeof(t);						\
-	expect_d_eq(mallctl("arenas.lextent.0."#name, (void *)&name,	\
-	    &sz, NULL, 0), 0, "Unexpected mallctl() failure");		\
-	expect_zu_eq(name, expected, "Incorrect "#name" size");		\
-} while (0)
+#define TEST_ARENAS_LEXTENT_CONSTANT(t, name, expected)                        \
+	do {                                                                   \
+		t      name;                                                   \
+		size_t sz = sizeof(t);                                         \
+		expect_d_eq(mallctl("arenas.lextent.0." #name, (void *)&name,  \
+		                &sz, NULL, 0),                                 \
+		    0, "Unexpected mallctl() failure");                        \
+		expect_zu_eq(name, expected, "Incorrect " #name " size");      \
+	} while (0)
 
-	TEST_ARENAS_LEXTENT_CONSTANT(size_t, size,
-	    SC_LARGE_MINCLASS);
+	TEST_ARENAS_LEXTENT_CONSTANT(size_t, size, SC_LARGE_MINCLASS);
 
 #undef TEST_ARENAS_LEXTENT_CONSTANT
 }
@@ -928,25 +971,27 @@ TEST_END
 
 TEST_BEGIN(test_arenas_create) {
 	unsigned narenas_before, arena, narenas_after;
-	size_t sz = sizeof(unsigned);
+	size_t   sz = sizeof(unsigned);
 
-	expect_d_eq(mallctl("arenas.narenas", (void *)&narenas_before, &sz,
-	    NULL, 0), 0, "Unexpected mallctl() failure");
+	expect_d_eq(
+	    mallctl("arenas.narenas", (void *)&narenas_before, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
 	expect_d_eq(mallctl("arenas.create", (void *)&arena, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
-	expect_d_eq(mallctl("arenas.narenas", (void *)&narenas_after, &sz, NULL,
-	    0), 0, "Unexpected mallctl() failure");
+	expect_d_eq(
+	    mallctl("arenas.narenas", (void *)&narenas_after, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
 
-	expect_u_eq(narenas_before+1, narenas_after,
+	expect_u_eq(narenas_before + 1, narenas_after,
 	    "Unexpected number of arenas before versus after extension");
-	expect_u_eq(arena, narenas_after-1, "Unexpected arena index");
+	expect_u_eq(arena, narenas_after - 1, "Unexpected arena index");
 }
 TEST_END
 
 TEST_BEGIN(test_arenas_lookup) {
 	unsigned arena, arena1;
-	void *ptr;
-	size_t sz = sizeof(unsigned);
+	void    *ptr;
+	size_t   sz = sizeof(unsigned);
 
 	expect_d_eq(mallctl("arenas.create", (void *)&arena, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
@@ -967,7 +1012,7 @@ TEST_BEGIN(test_prof_active) {
 	test_skip_if(!config_prof);
 	test_skip_if(opt_prof);
 
-	bool active, old;
+	bool   active, old;
 	size_t len = sizeof(bool);
 
 	active = true;
@@ -987,12 +1032,14 @@ TEST_BEGIN(test_prof_active) {
 TEST_END
 
 TEST_BEGIN(test_stats_arenas) {
-#define TEST_STATS_ARENAS(t, name) do {					\
-	t name;								\
-	size_t sz = sizeof(t);						\
-	expect_d_eq(mallctl("stats.arenas.0."#name, (void *)&name, &sz,	\
-	    NULL, 0), 0, "Unexpected mallctl() failure");		\
-} while (0)
+#define TEST_STATS_ARENAS(t, name)                                             \
+	do {                                                                   \
+		t      name;                                                   \
+		size_t sz = sizeof(t);                                         \
+		expect_d_eq(mallctl("stats.arenas.0." #name, (void *)&name,    \
+		                &sz, NULL, 0),                                 \
+		    0, "Unexpected mallctl() failure");                        \
+	} while (0)
 
 	TEST_STATS_ARENAS(unsigned, nthreads);
 	TEST_STATS_ARENAS(const char *, dss);
@@ -1008,13 +1055,14 @@ TEST_END
 TEST_BEGIN(test_stats_arenas_hpa_shard_counters) {
 	test_skip_if(!config_stats);
 
-#define TEST_STATS_ARENAS_HPA_SHARD_COUNTERS(t, name) do {		\
-	t name;								\
-	size_t sz = sizeof(t);						\
-	expect_d_eq(mallctl("stats.arenas.0.hpa_shard."#name,		\
-	    (void *)&name, &sz,						\
-	    NULL, 0), 0, "Unexpected mallctl() failure");		\
-} while (0)
+#define TEST_STATS_ARENAS_HPA_SHARD_COUNTERS(t, name)                          \
+	do {                                                                   \
+		t      name;                                                   \
+		size_t sz = sizeof(t);                                         \
+		expect_d_eq(mallctl("stats.arenas.0.hpa_shard." #name,         \
+		                (void *)&name, &sz, NULL, 0),                  \
+		    0, "Unexpected mallctl() failure");                        \
+	} while (0)
 
 	TEST_STATS_ARENAS_HPA_SHARD_COUNTERS(size_t, npageslabs);
 	TEST_STATS_ARENAS_HPA_SHARD_COUNTERS(size_t, nactive);
@@ -1031,19 +1079,22 @@ TEST_END
 TEST_BEGIN(test_stats_arenas_hpa_shard_slabs) {
 	test_skip_if(!config_stats);
 
-#define TEST_STATS_ARENAS_HPA_SHARD_SLABS_GEN(t, slab, name) do {	\
-	t slab##_##name;						\
-	size_t sz = sizeof(t);						\
-	expect_d_eq(mallctl("stats.arenas.0.hpa_shard."#slab"."#name,	\
-	    (void *)&slab##_##name, &sz,				\
-	    NULL, 0), 0, "Unexpected mallctl() failure");		\
-} while (0)
+#define TEST_STATS_ARENAS_HPA_SHARD_SLABS_GEN(t, slab, name)                   \
+	do {                                                                   \
+		t      slab##_##name;                                          \
+		size_t sz = sizeof(t);                                         \
+		expect_d_eq(                                                   \
+		    mallctl("stats.arenas.0.hpa_shard." #slab "." #name,       \
+		        (void *)&slab##_##name, &sz, NULL, 0),                 \
+		    0, "Unexpected mallctl() failure");                        \
+	} while (0)
 
-#define TEST_STATS_ARENAS_HPA_SHARD_SLABS(t, slab, name) do {		\
-	TEST_STATS_ARENAS_HPA_SHARD_SLABS_GEN(t, slab,			\
-	    name##_##nonhuge);						\
-	TEST_STATS_ARENAS_HPA_SHARD_SLABS_GEN(t, slab, name##_##huge);	\
-} while (0)
+#define TEST_STATS_ARENAS_HPA_SHARD_SLABS(t, slab, name)                       \
+	do {                                                                   \
+		TEST_STATS_ARENAS_HPA_SHARD_SLABS_GEN(                         \
+		    t, slab, name##_##nonhuge);                                \
+		TEST_STATS_ARENAS_HPA_SHARD_SLABS_GEN(t, slab, name##_##huge); \
+	} while (0)
 
 	TEST_STATS_ARENAS_HPA_SHARD_SLABS(size_t, slabs, npageslabs);
 	TEST_STATS_ARENAS_HPA_SHARD_SLABS(size_t, slabs, nactive);
@@ -1069,18 +1120,18 @@ alloc_hook(void *extra, UNUSED hook_alloc_t type, UNUSED void *result,
 }
 
 static void
-dalloc_hook(void *extra, UNUSED hook_dalloc_t type,
-    UNUSED void *address, UNUSED uintptr_t args_raw[3]) {
+dalloc_hook(void *extra, UNUSED hook_dalloc_t type, UNUSED void *address,
+    UNUSED uintptr_t args_raw[3]) {
 	*(bool *)extra = true;
 }
 
 TEST_BEGIN(test_hooks) {
-	bool hook_called = false;
+	bool    hook_called = false;
 	hooks_t hooks = {&alloc_hook, &dalloc_hook, NULL, &hook_called};
-	void *handle = NULL;
-	size_t sz = sizeof(handle);
-	int err = mallctl("experimental.hooks.install", &handle, &sz, &hooks,
-	    sizeof(hooks));
+	void   *handle = NULL;
+	size_t  sz = sizeof(handle);
+	int     err = mallctl(
+            "experimental.hooks.install", &handle, &sz, &hooks, sizeof(hooks));
 	expect_d_eq(err, 0, "Hook installation failed");
 	expect_ptr_ne(handle, NULL, "Hook installation gave null handle");
 	void *ptr = mallocx(1, 0);
@@ -1089,8 +1140,8 @@ TEST_BEGIN(test_hooks) {
 	free(ptr);
 	expect_true(hook_called, "Free hook not called");
 
-	err = mallctl("experimental.hooks.remove", NULL, NULL, &handle,
-	    sizeof(handle));
+	err = mallctl(
+	    "experimental.hooks.remove", NULL, NULL, &handle, sizeof(handle));
 	expect_d_eq(err, 0, "Hook removal failed");
 	hook_called = false;
 	ptr = mallocx(1, 0);
@@ -1100,13 +1151,13 @@ TEST_BEGIN(test_hooks) {
 TEST_END
 
 TEST_BEGIN(test_hooks_exhaustion) {
-	bool hook_called = false;
+	bool    hook_called = false;
 	hooks_t hooks = {&alloc_hook, &dalloc_hook, NULL, &hook_called};
 
-	void *handle;
-	void *handles[HOOK_MAX];
+	void  *handle;
+	void  *handles[HOOK_MAX];
 	size_t sz = sizeof(handle);
-	int err;
+	int    err;
 	for (int i = 0; i < HOOK_MAX; i++) {
 		handle = NULL;
 		err = mallctl("experimental.hooks.install", &handle, &sz,
@@ -1115,8 +1166,8 @@ TEST_BEGIN(test_hooks_exhaustion) {
 		expect_ptr_ne(handle, NULL, "Got NULL handle");
 		handles[i] = handle;
 	}
-	err = mallctl("experimental.hooks.install", &handle, &sz, &hooks,
-	    sizeof(hooks));
+	err = mallctl(
+	    "experimental.hooks.install", &handle, &sz, &hooks, sizeof(hooks));
 	expect_d_eq(err, EAGAIN, "Should have failed hook installation");
 	for (int i = 0; i < HOOK_MAX; i++) {
 		err = mallctl("experimental.hooks.remove", NULL, NULL,
@@ -1125,12 +1176,12 @@ TEST_BEGIN(test_hooks_exhaustion) {
 	}
 	/* Insertion failed, but then we removed some; it should work now. */
 	handle = NULL;
-	err = mallctl("experimental.hooks.install", &handle, &sz, &hooks,
-	    sizeof(hooks));
+	err = mallctl(
+	    "experimental.hooks.install", &handle, &sz, &hooks, sizeof(hooks));
 	expect_d_eq(err, 0, "Hook insertion failed");
 	expect_ptr_ne(handle, NULL, "Got NULL handle");
-	err = mallctl("experimental.hooks.remove", NULL, NULL, &handle,
-	    sizeof(handle));
+	err = mallctl(
+	    "experimental.hooks.remove", NULL, NULL, &handle, sizeof(handle));
 	expect_d_eq(err, 0, "Hook removal failed");
 }
 TEST_END
@@ -1144,7 +1195,7 @@ TEST_BEGIN(test_thread_idle) {
 	 */
 	test_skip_if(!config_stats);
 
-	int err;
+	int    err;
 	size_t sz;
 	size_t miblen;
 
@@ -1164,14 +1215,15 @@ TEST_BEGIN(test_thread_idle) {
 	sz = sizeof(arena_ind);
 	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
 	    0, "Unexpected mallctl() failure");
-        err = mallctl("thread.arena", NULL, NULL, &arena_ind, sizeof(arena_ind));
+	err = mallctl(
+	    "thread.arena", NULL, NULL, &arena_ind, sizeof(arena_ind));
 	expect_d_eq(err, 0, "Unexpected mallctl() failure");
 	err = mallctl("thread.tcache.flush", NULL, NULL, NULL, 0);
 	expect_d_eq(err, 0, "Unexpected mallctl() failure");
 
 	/* We're going to do an allocation of size 1, which we know is small. */
 	size_t mib[5];
-	miblen = sizeof(mib)/sizeof(mib[0]);
+	miblen = sizeof(mib) / sizeof(mib[0]);
 	err = mallctlnametomib("stats.arenas.0.small.ndalloc", mib, &miblen);
 	expect_d_eq(err, 0, "");
 	mib[2] = arena_ind;
@@ -1220,9 +1272,9 @@ TEST_BEGIN(test_thread_peak) {
 	size_t big_size = 10 * 1024 * 1024;
 	size_t small_size = 256;
 
-	void *ptr;
-	int err;
-	size_t sz;
+	void    *ptr;
+	int      err;
+	size_t   sz;
 	uint64_t peak;
 	sz = sizeof(uint64_t);
 
@@ -1293,9 +1345,9 @@ TEST_BEGIN(test_thread_activity_callback) {
 	test_skip_if(!config_stats);
 
 	const size_t big_size = 10 * 1024 * 1024;
-	void *ptr;
-	int err;
-	size_t sz;
+	void        *ptr;
+	int          err;
+	size_t       sz;
 
 	uint64_t *allocatedp;
 	uint64_t *deallocatedp;
@@ -1305,12 +1357,12 @@ TEST_BEGIN(test_thread_activity_callback) {
 	err = mallctl("thread.deallocatedp", &deallocatedp, &sz, NULL, 0);
 	assert_d_eq(0, err, "");
 
-	activity_callback_thunk_t old_thunk = {(activity_callback_t)111,
-		(void *)222};
+	activity_callback_thunk_t old_thunk = {
+	    (activity_callback_t)111, (void *)222};
 
-	activity_test_data_t test_data = {333, 444};
-	activity_callback_thunk_t new_thunk =
-	    {&activity_test_callback, &test_data};
+	activity_test_data_t      test_data = {333, 444};
+	activity_callback_thunk_t new_thunk = {
+	    &activity_test_callback, &test_data};
 
 	sz = sizeof(old_thunk);
 	err = mallctl("experimental.thread.activity_callback", &old_thunk, &sz,
@@ -1329,7 +1381,7 @@ TEST_BEGIN(test_thread_activity_callback) {
 	expect_u64_eq(test_data.obtained_dalloc, *deallocatedp, "");
 
 	sz = sizeof(old_thunk);
-	new_thunk = (activity_callback_thunk_t){ NULL, NULL };
+	new_thunk = (activity_callback_thunk_t){NULL, NULL};
 	err = mallctl("experimental.thread.activity_callback", &old_thunk, &sz,
 	    &new_thunk, sizeof(new_thunk));
 	assert_d_eq(0, err, "");
@@ -1347,8 +1399,6 @@ TEST_BEGIN(test_thread_activity_callback) {
 }
 TEST_END
 
-
-
 static unsigned nuser_thread_event_cb_calls;
 static void
 user_thread_event_cb(bool is_alloc, uint64_t tallocated, uint64_t tdallocated) {
@@ -1357,25 +1407,25 @@ user_thread_event_cb(bool is_alloc, uint64_t tallocated, uint64_t tdallocated) {
 	++nuser_thread_event_cb_calls;
 }
 static user_hook_object_t user_te_obj = {
-	.callback = user_thread_event_cb,
-	.interval = 100,
-	.is_alloc_only = false,
+    .callback = user_thread_event_cb,
+    .interval = 100,
+    .is_alloc_only = false,
 };
 
 TEST_BEGIN(test_thread_event_hook) {
 	const size_t big_size = 10 * 1024 * 1024;
-	void *ptr;
-	int err;
+	void        *ptr;
+	int          err;
 
 	unsigned current_calls = nuser_thread_event_cb_calls;
-	err = mallctl("experimental.hooks.thread_event", NULL, 0,
-	    &user_te_obj, sizeof(user_te_obj));
+	err = mallctl("experimental.hooks.thread_event", NULL, 0, &user_te_obj,
+	    sizeof(user_te_obj));
 	assert_d_eq(0, err, "");
 
-	err = mallctl("experimental.hooks.thread_event", NULL, 0,
-	    &user_te_obj, sizeof(user_te_obj));
-	assert_d_eq(0, err, "Not an error to provide object with same interval and cb");
-
+	err = mallctl("experimental.hooks.thread_event", NULL, 0, &user_te_obj,
+	    sizeof(user_te_obj));
+	assert_d_eq(
+	    0, err, "Not an error to provide object with same interval and cb");
 
 	ptr = mallocx(big_size, 0);
 	free(ptr);
@@ -1383,47 +1433,23 @@ TEST_BEGIN(test_thread_event_hook) {
 }
 TEST_END
 
-
 int
 main(void) {
-	return test(
-	    test_mallctl_errors,
-	    test_mallctlnametomib_errors,
-	    test_mallctlbymib_errors,
-	    test_mallctl_read_write,
-	    test_mallctlnametomib_short_mib,
-	    test_mallctlnametomib_short_name,
-	    test_mallctlmibnametomib,
-	    test_mallctlbymibname,
-	    test_mallctl_config,
-	    test_mallctl_opt,
-	    test_manpage_example,
-	    test_tcache_none,
-	    test_tcache,
-	    test_thread_arena,
-	    test_arena_i_initialized,
-	    test_arena_i_dirty_decay_ms,
-	    test_arena_i_muzzy_decay_ms,
-	    test_arena_i_purge,
-	    test_arena_i_decay,
-	    test_arena_i_dss,
-	    test_arena_i_name,
-	    test_arena_i_retain_grow_limit,
-	    test_arenas_dirty_decay_ms,
-	    test_arenas_muzzy_decay_ms,
-	    test_arenas_constants,
-	    test_arenas_bin_constants,
-	    test_arenas_lextent_constants,
-	    test_arenas_create,
-	    test_arenas_lookup,
-	    test_prof_active,
-	    test_stats_arenas,
+	return test(test_mallctl_errors, test_mallctlnametomib_errors,
+	    test_mallctlbymib_errors, test_mallctl_read_write,
+	    test_mallctlnametomib_short_mib, test_mallctlnametomib_short_name,
+	    test_mallctlmibnametomib, test_mallctlbymibname,
+	    test_mallctl_config, test_mallctl_opt, test_manpage_example,
+	    test_tcache_none, test_tcache, test_thread_arena,
+	    test_arena_i_initialized, test_arena_i_dirty_decay_ms,
+	    test_arena_i_muzzy_decay_ms, test_arena_i_purge, test_arena_i_decay,
+	    test_arena_i_dss, test_arena_i_name, test_arena_i_retain_grow_limit,
+	    test_arenas_dirty_decay_ms, test_arenas_muzzy_decay_ms,
+	    test_arenas_constants, test_arenas_bin_constants,
+	    test_arenas_lextent_constants, test_arenas_create,
+	    test_arenas_lookup, test_prof_active, test_stats_arenas,
 	    test_stats_arenas_hpa_shard_counters,
-	    test_stats_arenas_hpa_shard_slabs,
-	    test_hooks,
-	    test_hooks_exhaustion,
-	    test_thread_idle,
-	    test_thread_peak,
-	    test_thread_activity_callback,
-	    test_thread_event_hook);
+	    test_stats_arenas_hpa_shard_slabs, test_hooks,
+	    test_hooks_exhaustion, test_thread_idle, test_thread_peak,
+	    test_thread_activity_callback, test_thread_event_hook);
 }
diff --git a/test/unit/malloc_conf_2.c b/test/unit/malloc_conf_2.c
index 9d2c6077..023b7102 100644
--- a/test/unit/malloc_conf_2.c
+++ b/test/unit/malloc_conf_2.c
@@ -13,12 +13,12 @@ TEST_BEGIN(test_malloc_conf_2) {
 	test_skip_if(windows);
 
 	ssize_t dirty_decay_ms;
-	size_t sz = sizeof(dirty_decay_ms);
+	size_t  sz = sizeof(dirty_decay_ms);
 
 	int err = mallctl("opt.dirty_decay_ms", &dirty_decay_ms, &sz, NULL, 0);
 	assert_d_eq(err, 0, "Unexpected mallctl failure");
-	expect_zd_eq(dirty_decay_ms, 1234,
-	    "malloc_conf_2 setting didn't take effect");
+	expect_zd_eq(
+	    dirty_decay_ms, 1234, "malloc_conf_2 setting didn't take effect");
 }
 TEST_END
 
@@ -32,22 +32,24 @@ TEST_BEGIN(test_mallctl_global_var) {
 	test_skip_if(windows);
 
 	const char *mc;
-	size_t sz = sizeof(mc);
-	expect_d_eq(mallctl("opt.malloc_conf.global_var",
-	    (void *)&mc, &sz, NULL, 0), 0, "Unexpected mallctl() failure");
-	expect_str_eq(mc, malloc_conf, "Unexpected value for the global variable "
+	size_t      sz = sizeof(mc);
+	expect_d_eq(
+	    mallctl("opt.malloc_conf.global_var", (void *)&mc, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
+	expect_str_eq(mc, malloc_conf,
+	    "Unexpected value for the global variable "
 	    "malloc_conf");
 
 	expect_d_eq(mallctl("opt.malloc_conf.global_var_2_conf_harder",
-	    (void *)&mc, &sz, NULL, 0), 0, "Unexpected mallctl() failure");
-	expect_str_eq(mc, malloc_conf_2_conf_harder, "Unexpected value for the "
+	                (void *)&mc, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
+	expect_str_eq(mc, malloc_conf_2_conf_harder,
+	    "Unexpected value for the "
 	    "global variable malloc_conf_2_conf_harder");
 }
 TEST_END
 
 int
 main(void) {
-	return test(
-	    test_malloc_conf_2,
-	    test_mallctl_global_var);
+	return test(test_malloc_conf_2, test_mallctl_global_var);
 }
diff --git a/test/unit/malloc_io.c b/test/unit/malloc_io.c
index 385f7450..f7895945 100644
--- a/test/unit/malloc_io.c
+++ b/test/unit/malloc_io.c
@@ -14,77 +14,68 @@ TEST_BEGIN(test_malloc_strtoumax) {
 	struct test_s {
 		const char *input;
 		const char *expected_remainder;
-		int base;
-		int expected_errno;
+		int         base;
+		int         expected_errno;
 		const char *expected_errno_name;
-		uintmax_t expected_x;
+		uintmax_t   expected_x;
 	};
-#define ERR(e)		e, #e
-#define KUMAX(x)	((uintmax_t)x##ULL)
-#define KSMAX(x)	((uintmax_t)(intmax_t)x##LL)
-	struct test_s tests[] = {
-		{"0",		"0",	-1,	ERR(EINVAL),	UINTMAX_MAX},
-		{"0",		"0",	1,	ERR(EINVAL),	UINTMAX_MAX},
-		{"0",		"0",	37,	ERR(EINVAL),	UINTMAX_MAX},
+#define ERR(e) e, #e
+#define KUMAX(x) ((uintmax_t)x##ULL)
+#define KSMAX(x) ((uintmax_t)(intmax_t)x##LL)
+	struct test_s tests[] = {{"0", "0", -1, ERR(EINVAL), UINTMAX_MAX},
+	    {"0", "0", 1, ERR(EINVAL), UINTMAX_MAX},
+	    {"0", "0", 37, ERR(EINVAL), UINTMAX_MAX},
 
-		{"",		"",	0,	ERR(EINVAL),	UINTMAX_MAX},
-		{"+",		"+",	0,	ERR(EINVAL),	UINTMAX_MAX},
-		{"++3",		"++3",	0,	ERR(EINVAL),	UINTMAX_MAX},
-		{"-",		"-",	0,	ERR(EINVAL),	UINTMAX_MAX},
+	    {"", "", 0, ERR(EINVAL), UINTMAX_MAX},
+	    {"+", "+", 0, ERR(EINVAL), UINTMAX_MAX},
+	    {"++3", "++3", 0, ERR(EINVAL), UINTMAX_MAX},
+	    {"-", "-", 0, ERR(EINVAL), UINTMAX_MAX},
 
-		{"42",		"",	0,	ERR(0),		KUMAX(42)},
-		{"+42",		"",	0,	ERR(0),		KUMAX(42)},
-		{"-42",		"",	0,	ERR(0),		KSMAX(-42)},
-		{"042",		"",	0,	ERR(0),		KUMAX(042)},
-		{"+042",	"",	0,	ERR(0),		KUMAX(042)},
-		{"-042",	"",	0,	ERR(0),		KSMAX(-042)},
-		{"0x42",	"",	0,	ERR(0),		KUMAX(0x42)},
-		{"+0x42",	"",	0,	ERR(0),		KUMAX(0x42)},
-		{"-0x42",	"",	0,	ERR(0),		KSMAX(-0x42)},
+	    {"42", "", 0, ERR(0), KUMAX(42)}, {"+42", "", 0, ERR(0), KUMAX(42)},
+	    {"-42", "", 0, ERR(0), KSMAX(-42)},
+	    {"042", "", 0, ERR(0), KUMAX(042)},
+	    {"+042", "", 0, ERR(0), KUMAX(042)},
+	    {"-042", "", 0, ERR(0), KSMAX(-042)},
+	    {"0x42", "", 0, ERR(0), KUMAX(0x42)},
+	    {"+0x42", "", 0, ERR(0), KUMAX(0x42)},
+	    {"-0x42", "", 0, ERR(0), KSMAX(-0x42)},
 
-		{"0",		"",	0,	ERR(0),		KUMAX(0)},
-		{"1",		"",	0,	ERR(0),		KUMAX(1)},
+	    {"0", "", 0, ERR(0), KUMAX(0)}, {"1", "", 0, ERR(0), KUMAX(1)},
 
-		{"42",		"",	0,	ERR(0),		KUMAX(42)},
-		{" 42",		"",	0,	ERR(0),		KUMAX(42)},
-		{"42 ",		" ",	0,	ERR(0),		KUMAX(42)},
-		{"0x",		"x",	0,	ERR(0),		KUMAX(0)},
-		{"42x",		"x",	0,	ERR(0),		KUMAX(42)},
+	    {"42", "", 0, ERR(0), KUMAX(42)}, {" 42", "", 0, ERR(0), KUMAX(42)},
+	    {"42 ", " ", 0, ERR(0), KUMAX(42)},
+	    {"0x", "x", 0, ERR(0), KUMAX(0)},
+	    {"42x", "x", 0, ERR(0), KUMAX(42)},
 
-		{"07",		"",	0,	ERR(0),		KUMAX(7)},
-		{"010",		"",	0,	ERR(0),		KUMAX(8)},
-		{"08",		"8",	0,	ERR(0),		KUMAX(0)},
-		{"0_",		"_",	0,	ERR(0),		KUMAX(0)},
+	    {"07", "", 0, ERR(0), KUMAX(7)}, {"010", "", 0, ERR(0), KUMAX(8)},
+	    {"08", "8", 0, ERR(0), KUMAX(0)}, {"0_", "_", 0, ERR(0), KUMAX(0)},
 
-		{"0x",		"x",	0,	ERR(0),		KUMAX(0)},
-		{"0X",		"X",	0,	ERR(0),		KUMAX(0)},
-		{"0xg",		"xg",	0,	ERR(0),		KUMAX(0)},
-		{"0XA",		"",	0,	ERR(0),		KUMAX(10)},
+	    {"0x", "x", 0, ERR(0), KUMAX(0)}, {"0X", "X", 0, ERR(0), KUMAX(0)},
+	    {"0xg", "xg", 0, ERR(0), KUMAX(0)},
+	    {"0XA", "", 0, ERR(0), KUMAX(10)},
 
-		{"010",		"",	10,	ERR(0),		KUMAX(10)},
-		{"0x3",		"x3",	10,	ERR(0),		KUMAX(0)},
+	    {"010", "", 10, ERR(0), KUMAX(10)},
+	    {"0x3", "x3", 10, ERR(0), KUMAX(0)},
 
-		{"12",		"2",	2,	ERR(0),		KUMAX(1)},
-		{"78",		"8",	8,	ERR(0),		KUMAX(7)},
-		{"9a",		"a",	10,	ERR(0),		KUMAX(9)},
-		{"9A",		"A",	10,	ERR(0),		KUMAX(9)},
-		{"fg",		"g",	16,	ERR(0),		KUMAX(15)},
-		{"FG",		"G",	16,	ERR(0),		KUMAX(15)},
-		{"0xfg",	"g",	16,	ERR(0),		KUMAX(15)},
-		{"0XFG",	"G",	16,	ERR(0),		KUMAX(15)},
-		{"z_",		"_",	36,	ERR(0),		KUMAX(35)},
-		{"Z_",		"_",	36,	ERR(0),		KUMAX(35)}
-	};
+	    {"12", "2", 2, ERR(0), KUMAX(1)}, {"78", "8", 8, ERR(0), KUMAX(7)},
+	    {"9a", "a", 10, ERR(0), KUMAX(9)},
+	    {"9A", "A", 10, ERR(0), KUMAX(9)},
+	    {"fg", "g", 16, ERR(0), KUMAX(15)},
+	    {"FG", "G", 16, ERR(0), KUMAX(15)},
+	    {"0xfg", "g", 16, ERR(0), KUMAX(15)},
+	    {"0XFG", "G", 16, ERR(0), KUMAX(15)},
+	    {"z_", "_", 36, ERR(0), KUMAX(35)},
+	    {"Z_", "_", 36, ERR(0), KUMAX(35)}};
 #undef ERR
 #undef KUMAX
 #undef KSMAX
 	unsigned i;
 
-	for (i = 0; i < sizeof(tests)/sizeof(struct test_s); i++) {
+	for (i = 0; i < sizeof(tests) / sizeof(struct test_s); i++) {
 		struct test_s *test = &tests[i];
-		int err;
-		uintmax_t result;
-		char *remainder;
+		int            err;
+		uintmax_t      result;
+		char          *remainder;
 
 		set_errno(0);
 		result = malloc_strtoumax(test->input, &remainder, test->base);
@@ -93,8 +84,8 @@ TEST_BEGIN(test_malloc_strtoumax) {
 		    "Expected errno %s for \"%s\", base %d",
 		    test->expected_errno_name, test->input, test->base);
 		expect_str_eq(remainder, test->expected_remainder,
-		    "Unexpected remainder for \"%s\", base %d",
-		    test->input, test->base);
+		    "Unexpected remainder for \"%s\", base %d", test->input,
+		    test->base);
 		if (err == 0) {
 			expect_ju_eq(result, test->expected_x,
 			    "Unexpected result for \"%s\", base %d",
@@ -105,31 +96,32 @@ TEST_BEGIN(test_malloc_strtoumax) {
 TEST_END
 
 TEST_BEGIN(test_malloc_snprintf_truncated) {
-#define BUFLEN	15
-	char buf[BUFLEN];
+#define BUFLEN 15
+	char   buf[BUFLEN];
 	size_t result;
 	size_t len;
-#define TEST(expected_str_untruncated, ...) do {			\
-	result = malloc_snprintf(buf, len, __VA_ARGS__);		\
-	expect_d_eq(strncmp(buf, expected_str_untruncated, len-1), 0,	\
-	    "Unexpected string inequality (\"%s\" vs \"%s\")",		\
-	    buf, expected_str_untruncated);				\
-	expect_zu_eq(result, strlen(expected_str_untruncated),		\
-	    "Unexpected result");					\
-} while (0)
+#define TEST(expected_str_untruncated, ...)                                    \
+	do {                                                                   \
+		result = malloc_snprintf(buf, len, __VA_ARGS__);               \
+		expect_d_eq(strncmp(buf, expected_str_untruncated, len - 1),   \
+		    0, "Unexpected string inequality (\"%s\" vs \"%s\")", buf, \
+		    expected_str_untruncated);                                 \
+		expect_zu_eq(result, strlen(expected_str_untruncated),         \
+		    "Unexpected result");                                      \
+	} while (0)
 
 	for (len = 1; len < BUFLEN; len++) {
-		TEST("012346789",	"012346789");
-		TEST("a0123b",		"a%sb", "0123");
-		TEST("a01234567",	"a%s%s", "0123", "4567");
-		TEST("a0123  ",		"a%-6s", "0123");
-		TEST("a  0123",		"a%6s", "0123");
-		TEST("a   012",		"a%6.3s", "0123");
-		TEST("a   012",		"a%*.*s", 6, 3, "0123");
-		TEST("a 123b",		"a% db", 123);
-		TEST("a123b",		"a%-db", 123);
-		TEST("a-123b",		"a%-db", -123);
-		TEST("a+123b",		"a%+db", 123);
+		TEST("012346789", "012346789");
+		TEST("a0123b", "a%sb", "0123");
+		TEST("a01234567", "a%s%s", "0123", "4567");
+		TEST("a0123  ", "a%-6s", "0123");
+		TEST("a  0123", "a%6s", "0123");
+		TEST("a   012", "a%6.3s", "0123");
+		TEST("a   012", "a%*.*s", 6, 3, "0123");
+		TEST("a 123b", "a% db", 123);
+		TEST("a123b", "a%-db", 123);
+		TEST("a-123b", "a%-db", -123);
+		TEST("a+123b", "a%+db", 123);
 	}
 #undef BUFLEN
 #undef TEST
@@ -137,14 +129,16 @@ TEST_BEGIN(test_malloc_snprintf_truncated) {
 TEST_END
 
 TEST_BEGIN(test_malloc_snprintf) {
-#define BUFLEN	128
-	char buf[BUFLEN];
+#define BUFLEN 128
+	char   buf[BUFLEN];
 	size_t result;
-#define TEST(expected_str, ...) do {					\
-	result = malloc_snprintf(buf, sizeof(buf), __VA_ARGS__);	\
-	expect_str_eq(buf, expected_str, "Unexpected output");		\
-	expect_zu_eq(result, strlen(expected_str), "Unexpected result");\
-} while (0)
+#define TEST(expected_str, ...)                                                \
+	do {                                                                   \
+		result = malloc_snprintf(buf, sizeof(buf), __VA_ARGS__);       \
+		expect_str_eq(buf, expected_str, "Unexpected output");         \
+		expect_zu_eq(                                                  \
+		    result, strlen(expected_str), "Unexpected result");        \
+	} while (0)
 
 	TEST("hello", "hello");
 
@@ -260,9 +254,6 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_malloc_strtoumax_no_endptr,
-	    test_malloc_strtoumax,
-	    test_malloc_snprintf_truncated,
-	    test_malloc_snprintf);
+	return test(test_malloc_strtoumax_no_endptr, test_malloc_strtoumax,
+	    test_malloc_snprintf_truncated, test_malloc_snprintf);
 }
diff --git a/test/unit/math.c b/test/unit/math.c
index a32767c5..b0994768 100644
--- a/test/unit/math.c
+++ b/test/unit/math.c
@@ -6,11 +6,11 @@
 #include <float.h>
 
 #ifdef __PGI
-#undef INFINITY
+#	undef INFINITY
 #endif
 
 #ifndef INFINITY
-#define INFINITY (DBL_MAX + DBL_MAX)
+#	define INFINITY (DBL_MAX + DBL_MAX)
 #endif
 
 static bool
@@ -20,7 +20,7 @@ double_eq_rel(double a, double b, double max_rel_err, double max_abs_err) {
 	if (fabs(a - b) < max_abs_err) {
 		return true;
 	}
-	rel_err = (fabs(b) > fabs(a)) ? fabs((a-b)/b) : fabs((a-b)/a);
+	rel_err = (fabs(b) > fabs(a)) ? fabs((a - b) / b) : fabs((a - b) / a);
 	return (rel_err < max_rel_err);
 }
 
@@ -41,209 +41,206 @@ TEST_BEGIN(test_ln_gamma_factorial) {
 
 	/* exp(ln_gamma(x)) == (x-1)! for integer x. */
 	for (x = 1; x <= 21; x++) {
-		expect_true(double_eq_rel(exp(ln_gamma(x)),
-		    (double)factorial(x-1), MAX_REL_ERR, MAX_ABS_ERR),
+		expect_true(
+		    double_eq_rel(exp(ln_gamma(x)), (double)factorial(x - 1),
+		        MAX_REL_ERR, MAX_ABS_ERR),
 		    "Incorrect factorial result for x=%u", x);
 	}
 }
 TEST_END
 
 /* Expected ln_gamma([0.0..100.0] increment=0.25). */
-static const double ln_gamma_misc_expected[] = {
-	INFINITY,
-	1.28802252469807743, 0.57236494292470008, 0.20328095143129538,
-	0.00000000000000000, -0.09827183642181320, -0.12078223763524518,
-	-0.08440112102048555, 0.00000000000000000, 0.12487171489239651,
-	0.28468287047291918, 0.47521466691493719, 0.69314718055994529,
-	0.93580193110872523, 1.20097360234707429, 1.48681557859341718,
-	1.79175946922805496, 2.11445692745037128, 2.45373657084244234,
-	2.80857141857573644, 3.17805383034794575, 3.56137591038669710,
-	3.95781396761871651, 4.36671603662228680, 4.78749174278204581,
-	5.21960398699022932, 5.66256205985714178, 6.11591589143154568,
-	6.57925121201010121, 7.05218545073853953, 7.53436423675873268,
-	8.02545839631598312, 8.52516136106541467, 9.03318691960512332,
-	9.54926725730099690, 10.07315123968123949, 10.60460290274525086,
-	11.14340011995171231, 11.68933342079726856, 12.24220494005076176,
-	12.80182748008146909, 13.36802367147604720, 13.94062521940376342,
-	14.51947222506051816, 15.10441257307551943, 15.69530137706046524,
-	16.29200047656724237, 16.89437797963419285, 17.50230784587389010,
-	18.11566950571089407, 18.73434751193644843, 19.35823122022435427,
-	19.98721449566188468, 20.62119544270163018, 21.26007615624470048,
-	21.90376249182879320, 22.55216385312342098, 23.20519299513386002,
-	23.86276584168908954, 24.52480131594137802, 25.19122118273868338,
-	25.86194990184851861, 26.53691449111561340, 27.21604439872720604,
-	27.89927138384089389, 28.58652940490193828, 29.27775451504081516,
-	29.97288476399884871, 30.67186010608067548, 31.37462231367769050,
-	32.08111489594735843, 32.79128302226991565, 33.50507345013689076,
-	34.22243445715505317, 34.94331577687681545, 35.66766853819134298,
-	36.39544520803305261, 37.12659953718355865, 37.86108650896109395,
-	38.59886229060776230, 39.33988418719949465, 40.08411059791735198,
-	40.83150097453079752, 41.58201578195490100, 42.33561646075348506,
-	43.09226539146988699, 43.85192586067515208, 44.61456202863158893,
-	45.38013889847690052, 46.14862228684032885, 46.91997879580877395,
-	47.69417578616628361, 48.47118135183522014, 49.25096429545256882,
-	50.03349410501914463, 50.81874093156324790, 51.60667556776436982,
-	52.39726942748592364, 53.19049452616926743, 53.98632346204390586,
-	54.78472939811231157, 55.58568604486942633, 56.38916764371992940,
-	57.19514895105859864, 58.00360522298051080, 58.81451220059079787,
-	59.62784609588432261, 60.44358357816834371, 61.26170176100199427,
-	62.08217818962842927, 62.90499082887649962, 63.73011805151035958,
-	64.55753862700632340, 65.38723171073768015, 66.21917683354901385,
-	67.05335389170279825, 67.88974313718154008, 68.72832516833013017,
-	69.56908092082363737, 70.41199165894616385, 71.25703896716800045,
-	72.10420474200799390, 72.95347118416940191, 73.80482079093779646,
-	74.65823634883015814, 75.51370092648485866, 76.37119786778275454,
-	77.23071078519033961, 78.09222355331530707, 78.95572030266725960,
-	79.82118541361435859, 80.68860351052903468, 81.55795945611502873,
-	82.42923834590904164, 83.30242550295004378, 84.17750647261028973,
-	85.05446701758152983, 85.93329311301090456, 86.81397094178107920,
-	87.69648688992882057, 88.58082754219766741, 89.46697967771913795,
-	90.35493026581838194, 91.24466646193963015, 92.13617560368709292,
-	93.02944520697742803, 93.92446296229978486, 94.82121673107967297,
-	95.71969454214321615, 96.61988458827809723, 97.52177522288820910,
-	98.42535495673848800, 99.33061245478741341, 100.23753653310367895,
-	101.14611615586458981, 102.05634043243354370, 102.96819861451382394,
-	103.88168009337621811, 104.79677439715833032, 105.71347118823287303,
-	106.63176026064346047, 107.55163153760463501, 108.47307506906540198,
-	109.39608102933323153, 110.32063971475740516, 111.24674154146920557,
-	112.17437704317786995, 113.10353686902013237, 114.03421178146170689,
-	114.96639265424990128, 115.90007047041454769, 116.83523632031698014,
-	117.77188139974506953, 118.70999700805310795, 119.64957454634490830,
-	120.59060551569974962, 121.53308151543865279, 122.47699424143097247,
-	123.42233548443955726, 124.36909712850338394, 125.31727114935689826,
-	126.26684961288492559, 127.21782467361175861, 128.17018857322420899,
-	129.12393363912724453, 130.07905228303084755, 131.03553699956862033,
-	131.99338036494577864, 132.95257503561629164, 133.91311374698926784,
-	134.87498931216194364, 135.83819462068046846, 136.80272263732638294,
-	137.76856640092901785, 138.73571902320256299, 139.70417368760718091,
-	140.67392364823425055, 141.64496222871400732, 142.61728282114600574,
-	143.59087888505104047, 144.56574394634486680, 145.54187159633210058,
-	146.51925549072063859, 147.49788934865566148, 148.47776695177302031,
-	149.45888214327129617, 150.44122882700193600, 151.42480096657754984,
-	152.40959258449737490, 153.39559776128982094, 154.38281063467164245,
-	155.37122539872302696, 156.36083630307879844, 157.35163765213474107,
-	158.34362380426921391, 159.33678917107920370, 160.33112821663092973,
-	161.32663545672428995, 162.32330545817117695, 163.32113283808695314,
-	164.32011226319519892, 165.32023844914485267, 166.32150615984036790,
-	167.32391020678358018, 168.32744544842768164, 169.33210678954270634,
-	170.33788918059275375, 171.34478761712384198, 172.35279713916281707,
-	173.36191283062726143, 174.37212981874515094, 175.38344327348534080,
-	176.39584840699734514, 177.40934047306160437, 178.42391476654847793,
-	179.43956662288721304, 180.45629141754378111, 181.47408456550741107,
-	182.49294152078630304, 183.51285777591152737, 184.53382886144947861,
-	185.55585034552262869, 186.57891783333786861, 187.60302696672312095,
-	188.62817342367162610, 189.65435291789341932, 190.68156119837468054,
-	191.70979404894376330, 192.73904728784492590, 193.76931676731820176,
-	194.80059837318714244, 195.83288802445184729, 196.86618167288995096,
-	197.90047530266301123, 198.93576492992946214, 199.97204660246373464,
-	201.00931639928148797, 202.04757043027063901, 203.08680483582807597,
-	204.12701578650228385, 205.16819948264117102, 206.21035215404597807,
-	207.25347005962987623, 208.29754948708190909, 209.34258675253678916,
-	210.38857820024875878, 211.43552020227099320, 212.48340915813977858,
-	213.53224149456323744, 214.58201366511514152, 215.63272214993284592,
-	216.68436345542014010, 217.73693411395422004, 218.79043068359703739,
-	219.84484974781133815, 220.90018791517996988, 221.95644181913033322,
-	223.01360811766215875, 224.07168349307951871, 225.13066465172661879,
-	226.19054832372759734, 227.25133126272962159, 228.31301024565024704,
-	229.37558207242807384, 230.43904356577689896, 231.50339157094342113,
-	232.56862295546847008, 233.63473460895144740, 234.70172344281823484,
-	235.76958639009222907, 236.83832040516844586, 237.90792246359117712,
-	238.97838956183431947, 240.04971871708477238, 241.12190696702904802,
-	242.19495136964280846, 243.26884900298270509, 244.34359696498191283,
-	245.41919237324782443, 246.49563236486270057, 247.57291409618682110,
-	248.65103474266476269, 249.72999149863338175, 250.80978157713354904,
-	251.89040220972316320, 252.97185064629374551, 254.05412415488834199,
-	255.13722002152300661, 256.22113555000953511, 257.30586806178126835,
-	258.39141489572085675, 259.47777340799029844, 260.56494097186322279,
-	261.65291497755913497, 262.74169283208021852, 263.83127195904967266,
-	264.92164979855277807, 266.01282380697938379, 267.10479145686849733,
-	268.19755023675537586, 269.29109765101975427, 270.38543121973674488,
-	271.48054847852881721, 272.57644697842033565, 273.67312428569374561,
-	274.77057798174683967, 275.86880566295326389, 276.96780494052313770,
-	278.06757344036617496, 279.16810880295668085, 280.26940868320008349,
-	281.37147075030043197, 282.47429268763045229, 283.57787219260217171,
-	284.68220697654078322, 285.78729476455760050, 286.89313329542699194,
-	287.99972032146268930, 289.10705360839756395, 290.21513093526289140,
-	291.32395009427028754, 292.43350889069523646, 293.54380514276073200,
-	294.65483668152336350, 295.76660135076059532, 296.87909700685889902,
-	297.99232151870342022, 299.10627276756946458, 300.22094864701409733,
-	301.33634706277030091, 302.45246593264130297, 303.56930318639643929,
-	304.68685676566872189, 305.80512462385280514, 306.92410472600477078,
-	308.04379504874236773, 309.16419358014690033, 310.28529831966631036,
-	311.40710727801865687, 312.52961847709792664, 313.65282994987899201,
-	314.77673974032603610, 315.90134590329950015, 317.02664650446632777,
-	318.15263962020929966, 319.27932333753892635, 320.40669575400545455,
-	321.53475497761127144, 322.66349912672620803, 323.79292633000159185,
-	324.92303472628691452, 326.05382246454587403, 327.18528770377525916,
-	328.31742861292224234, 329.45024337080525356, 330.58373016603343331,
-	331.71788719692847280, 332.85271267144611329, 333.98820480709991898,
-	335.12436183088397001, 336.26118197919845443, 337.39866349777429377,
-	338.53680464159958774, 339.67560367484657036, 340.81505887079896411,
-	341.95516851178109619, 343.09593088908627578, 344.23734430290727460,
-	345.37940706226686416, 346.52211748494903532, 347.66547389743118401,
-	348.80947463481720661, 349.95411804077025408, 351.09940246744753267,
-	352.24532627543504759, 353.39188783368263103, 354.53908551944078908,
-	355.68691771819692349, 356.83538282361303118, 357.98447923746385868,
-	359.13420536957539753
-};
+static const double ln_gamma_misc_expected[] = {INFINITY, 1.28802252469807743,
+    0.57236494292470008, 0.20328095143129538, 0.00000000000000000,
+    -0.09827183642181320, -0.12078223763524518, -0.08440112102048555,
+    0.00000000000000000, 0.12487171489239651, 0.28468287047291918,
+    0.47521466691493719, 0.69314718055994529, 0.93580193110872523,
+    1.20097360234707429, 1.48681557859341718, 1.79175946922805496,
+    2.11445692745037128, 2.45373657084244234, 2.80857141857573644,
+    3.17805383034794575, 3.56137591038669710, 3.95781396761871651,
+    4.36671603662228680, 4.78749174278204581, 5.21960398699022932,
+    5.66256205985714178, 6.11591589143154568, 6.57925121201010121,
+    7.05218545073853953, 7.53436423675873268, 8.02545839631598312,
+    8.52516136106541467, 9.03318691960512332, 9.54926725730099690,
+    10.07315123968123949, 10.60460290274525086, 11.14340011995171231,
+    11.68933342079726856, 12.24220494005076176, 12.80182748008146909,
+    13.36802367147604720, 13.94062521940376342, 14.51947222506051816,
+    15.10441257307551943, 15.69530137706046524, 16.29200047656724237,
+    16.89437797963419285, 17.50230784587389010, 18.11566950571089407,
+    18.73434751193644843, 19.35823122022435427, 19.98721449566188468,
+    20.62119544270163018, 21.26007615624470048, 21.90376249182879320,
+    22.55216385312342098, 23.20519299513386002, 23.86276584168908954,
+    24.52480131594137802, 25.19122118273868338, 25.86194990184851861,
+    26.53691449111561340, 27.21604439872720604, 27.89927138384089389,
+    28.58652940490193828, 29.27775451504081516, 29.97288476399884871,
+    30.67186010608067548, 31.37462231367769050, 32.08111489594735843,
+    32.79128302226991565, 33.50507345013689076, 34.22243445715505317,
+    34.94331577687681545, 35.66766853819134298, 36.39544520803305261,
+    37.12659953718355865, 37.86108650896109395, 38.59886229060776230,
+    39.33988418719949465, 40.08411059791735198, 40.83150097453079752,
+    41.58201578195490100, 42.33561646075348506, 43.09226539146988699,
+    43.85192586067515208, 44.61456202863158893, 45.38013889847690052,
+    46.14862228684032885, 46.91997879580877395, 47.69417578616628361,
+    48.47118135183522014, 49.25096429545256882, 50.03349410501914463,
+    50.81874093156324790, 51.60667556776436982, 52.39726942748592364,
+    53.19049452616926743, 53.98632346204390586, 54.78472939811231157,
+    55.58568604486942633, 56.38916764371992940, 57.19514895105859864,
+    58.00360522298051080, 58.81451220059079787, 59.62784609588432261,
+    60.44358357816834371, 61.26170176100199427, 62.08217818962842927,
+    62.90499082887649962, 63.73011805151035958, 64.55753862700632340,
+    65.38723171073768015, 66.21917683354901385, 67.05335389170279825,
+    67.88974313718154008, 68.72832516833013017, 69.56908092082363737,
+    70.41199165894616385, 71.25703896716800045, 72.10420474200799390,
+    72.95347118416940191, 73.80482079093779646, 74.65823634883015814,
+    75.51370092648485866, 76.37119786778275454, 77.23071078519033961,
+    78.09222355331530707, 78.95572030266725960, 79.82118541361435859,
+    80.68860351052903468, 81.55795945611502873, 82.42923834590904164,
+    83.30242550295004378, 84.17750647261028973, 85.05446701758152983,
+    85.93329311301090456, 86.81397094178107920, 87.69648688992882057,
+    88.58082754219766741, 89.46697967771913795, 90.35493026581838194,
+    91.24466646193963015, 92.13617560368709292, 93.02944520697742803,
+    93.92446296229978486, 94.82121673107967297, 95.71969454214321615,
+    96.61988458827809723, 97.52177522288820910, 98.42535495673848800,
+    99.33061245478741341, 100.23753653310367895, 101.14611615586458981,
+    102.05634043243354370, 102.96819861451382394, 103.88168009337621811,
+    104.79677439715833032, 105.71347118823287303, 106.63176026064346047,
+    107.55163153760463501, 108.47307506906540198, 109.39608102933323153,
+    110.32063971475740516, 111.24674154146920557, 112.17437704317786995,
+    113.10353686902013237, 114.03421178146170689, 114.96639265424990128,
+    115.90007047041454769, 116.83523632031698014, 117.77188139974506953,
+    118.70999700805310795, 119.64957454634490830, 120.59060551569974962,
+    121.53308151543865279, 122.47699424143097247, 123.42233548443955726,
+    124.36909712850338394, 125.31727114935689826, 126.26684961288492559,
+    127.21782467361175861, 128.17018857322420899, 129.12393363912724453,
+    130.07905228303084755, 131.03553699956862033, 131.99338036494577864,
+    132.95257503561629164, 133.91311374698926784, 134.87498931216194364,
+    135.83819462068046846, 136.80272263732638294, 137.76856640092901785,
+    138.73571902320256299, 139.70417368760718091, 140.67392364823425055,
+    141.64496222871400732, 142.61728282114600574, 143.59087888505104047,
+    144.56574394634486680, 145.54187159633210058, 146.51925549072063859,
+    147.49788934865566148, 148.47776695177302031, 149.45888214327129617,
+    150.44122882700193600, 151.42480096657754984, 152.40959258449737490,
+    153.39559776128982094, 154.38281063467164245, 155.37122539872302696,
+    156.36083630307879844, 157.35163765213474107, 158.34362380426921391,
+    159.33678917107920370, 160.33112821663092973, 161.32663545672428995,
+    162.32330545817117695, 163.32113283808695314, 164.32011226319519892,
+    165.32023844914485267, 166.32150615984036790, 167.32391020678358018,
+    168.32744544842768164, 169.33210678954270634, 170.33788918059275375,
+    171.34478761712384198, 172.35279713916281707, 173.36191283062726143,
+    174.37212981874515094, 175.38344327348534080, 176.39584840699734514,
+    177.40934047306160437, 178.42391476654847793, 179.43956662288721304,
+    180.45629141754378111, 181.47408456550741107, 182.49294152078630304,
+    183.51285777591152737, 184.53382886144947861, 185.55585034552262869,
+    186.57891783333786861, 187.60302696672312095, 188.62817342367162610,
+    189.65435291789341932, 190.68156119837468054, 191.70979404894376330,
+    192.73904728784492590, 193.76931676731820176, 194.80059837318714244,
+    195.83288802445184729, 196.86618167288995096, 197.90047530266301123,
+    198.93576492992946214, 199.97204660246373464, 201.00931639928148797,
+    202.04757043027063901, 203.08680483582807597, 204.12701578650228385,
+    205.16819948264117102, 206.21035215404597807, 207.25347005962987623,
+    208.29754948708190909, 209.34258675253678916, 210.38857820024875878,
+    211.43552020227099320, 212.48340915813977858, 213.53224149456323744,
+    214.58201366511514152, 215.63272214993284592, 216.68436345542014010,
+    217.73693411395422004, 218.79043068359703739, 219.84484974781133815,
+    220.90018791517996988, 221.95644181913033322, 223.01360811766215875,
+    224.07168349307951871, 225.13066465172661879, 226.19054832372759734,
+    227.25133126272962159, 228.31301024565024704, 229.37558207242807384,
+    230.43904356577689896, 231.50339157094342113, 232.56862295546847008,
+    233.63473460895144740, 234.70172344281823484, 235.76958639009222907,
+    236.83832040516844586, 237.90792246359117712, 238.97838956183431947,
+    240.04971871708477238, 241.12190696702904802, 242.19495136964280846,
+    243.26884900298270509, 244.34359696498191283, 245.41919237324782443,
+    246.49563236486270057, 247.57291409618682110, 248.65103474266476269,
+    249.72999149863338175, 250.80978157713354904, 251.89040220972316320,
+    252.97185064629374551, 254.05412415488834199, 255.13722002152300661,
+    256.22113555000953511, 257.30586806178126835, 258.39141489572085675,
+    259.47777340799029844, 260.56494097186322279, 261.65291497755913497,
+    262.74169283208021852, 263.83127195904967266, 264.92164979855277807,
+    266.01282380697938379, 267.10479145686849733, 268.19755023675537586,
+    269.29109765101975427, 270.38543121973674488, 271.48054847852881721,
+    272.57644697842033565, 273.67312428569374561, 274.77057798174683967,
+    275.86880566295326389, 276.96780494052313770, 278.06757344036617496,
+    279.16810880295668085, 280.26940868320008349, 281.37147075030043197,
+    282.47429268763045229, 283.57787219260217171, 284.68220697654078322,
+    285.78729476455760050, 286.89313329542699194, 287.99972032146268930,
+    289.10705360839756395, 290.21513093526289140, 291.32395009427028754,
+    292.43350889069523646, 293.54380514276073200, 294.65483668152336350,
+    295.76660135076059532, 296.87909700685889902, 297.99232151870342022,
+    299.10627276756946458, 300.22094864701409733, 301.33634706277030091,
+    302.45246593264130297, 303.56930318639643929, 304.68685676566872189,
+    305.80512462385280514, 306.92410472600477078, 308.04379504874236773,
+    309.16419358014690033, 310.28529831966631036, 311.40710727801865687,
+    312.52961847709792664, 313.65282994987899201, 314.77673974032603610,
+    315.90134590329950015, 317.02664650446632777, 318.15263962020929966,
+    319.27932333753892635, 320.40669575400545455, 321.53475497761127144,
+    322.66349912672620803, 323.79292633000159185, 324.92303472628691452,
+    326.05382246454587403, 327.18528770377525916, 328.31742861292224234,
+    329.45024337080525356, 330.58373016603343331, 331.71788719692847280,
+    332.85271267144611329, 333.98820480709991898, 335.12436183088397001,
+    336.26118197919845443, 337.39866349777429377, 338.53680464159958774,
+    339.67560367484657036, 340.81505887079896411, 341.95516851178109619,
+    343.09593088908627578, 344.23734430290727460, 345.37940706226686416,
+    346.52211748494903532, 347.66547389743118401, 348.80947463481720661,
+    349.95411804077025408, 351.09940246744753267, 352.24532627543504759,
+    353.39188783368263103, 354.53908551944078908, 355.68691771819692349,
+    356.83538282361303118, 357.98447923746385868, 359.13420536957539753};
 
 TEST_BEGIN(test_ln_gamma_misc) {
 	unsigned i;
 
-	for (i = 1; i < sizeof(ln_gamma_misc_expected)/sizeof(double); i++) {
+	for (i = 1; i < sizeof(ln_gamma_misc_expected) / sizeof(double); i++) {
 		double x = (double)i * 0.25;
-		expect_true(double_eq_rel(ln_gamma(x),
-		    ln_gamma_misc_expected[i], MAX_REL_ERR, MAX_ABS_ERR),
+		expect_true(
+		    double_eq_rel(ln_gamma(x), ln_gamma_misc_expected[i],
+		        MAX_REL_ERR, MAX_ABS_ERR),
 		    "Incorrect ln_gamma result for i=%u", i);
 	}
 }
 TEST_END
 
 /* Expected pt_norm([0.01..0.99] increment=0.01). */
-static const double pt_norm_expected[] = {
-	-INFINITY,
-	-2.32634787404084076, -2.05374891063182252, -1.88079360815125085,
-	-1.75068607125216946, -1.64485362695147264, -1.55477359459685305,
-	-1.47579102817917063, -1.40507156030963221, -1.34075503369021654,
-	-1.28155156554460081, -1.22652812003661049, -1.17498679206608991,
-	-1.12639112903880045, -1.08031934081495606, -1.03643338949378938,
-	-0.99445788320975281, -0.95416525314619416, -0.91536508784281390,
-	-0.87789629505122846, -0.84162123357291418, -0.80642124701824025,
-	-0.77219321418868492, -0.73884684918521371, -0.70630256284008752,
-	-0.67448975019608171, -0.64334540539291685, -0.61281299101662701,
-	-0.58284150727121620, -0.55338471955567281, -0.52440051270804067,
-	-0.49585034734745320, -0.46769879911450812, -0.43991316567323380,
-	-0.41246312944140462, -0.38532046640756751, -0.35845879325119373,
-	-0.33185334643681652, -0.30548078809939738, -0.27931903444745404,
-	-0.25334710313579978, -0.22754497664114931, -0.20189347914185077,
-	-0.17637416478086135, -0.15096921549677725, -0.12566134685507399,
-	-0.10043372051146975, -0.07526986209982976, -0.05015358346473352,
-	-0.02506890825871106, 0.00000000000000000, 0.02506890825871106,
-	0.05015358346473366, 0.07526986209982990, 0.10043372051146990,
-	0.12566134685507413, 0.15096921549677739, 0.17637416478086146,
-	0.20189347914185105, 0.22754497664114931, 0.25334710313579978,
-	0.27931903444745404, 0.30548078809939738, 0.33185334643681652,
-	0.35845879325119373, 0.38532046640756762, 0.41246312944140484,
-	0.43991316567323391, 0.46769879911450835, 0.49585034734745348,
-	0.52440051270804111, 0.55338471955567303, 0.58284150727121620,
-	0.61281299101662701, 0.64334540539291685, 0.67448975019608171,
-	0.70630256284008752, 0.73884684918521371, 0.77219321418868492,
-	0.80642124701824036, 0.84162123357291441, 0.87789629505122879,
-	0.91536508784281423, 0.95416525314619460, 0.99445788320975348,
-	1.03643338949378938, 1.08031934081495606, 1.12639112903880045,
-	1.17498679206608991, 1.22652812003661049, 1.28155156554460081,
-	1.34075503369021654, 1.40507156030963265, 1.47579102817917085,
-	1.55477359459685394, 1.64485362695147308, 1.75068607125217102,
-	1.88079360815125041, 2.05374891063182208, 2.32634787404084076
-};
+static const double pt_norm_expected[] = {-INFINITY, -2.32634787404084076,
+    -2.05374891063182252, -1.88079360815125085, -1.75068607125216946,
+    -1.64485362695147264, -1.55477359459685305, -1.47579102817917063,
+    -1.40507156030963221, -1.34075503369021654, -1.28155156554460081,
+    -1.22652812003661049, -1.17498679206608991, -1.12639112903880045,
+    -1.08031934081495606, -1.03643338949378938, -0.99445788320975281,
+    -0.95416525314619416, -0.91536508784281390, -0.87789629505122846,
+    -0.84162123357291418, -0.80642124701824025, -0.77219321418868492,
+    -0.73884684918521371, -0.70630256284008752, -0.67448975019608171,
+    -0.64334540539291685, -0.61281299101662701, -0.58284150727121620,
+    -0.55338471955567281, -0.52440051270804067, -0.49585034734745320,
+    -0.46769879911450812, -0.43991316567323380, -0.41246312944140462,
+    -0.38532046640756751, -0.35845879325119373, -0.33185334643681652,
+    -0.30548078809939738, -0.27931903444745404, -0.25334710313579978,
+    -0.22754497664114931, -0.20189347914185077, -0.17637416478086135,
+    -0.15096921549677725, -0.12566134685507399, -0.10043372051146975,
+    -0.07526986209982976, -0.05015358346473352, -0.02506890825871106,
+    0.00000000000000000, 0.02506890825871106, 0.05015358346473366,
+    0.07526986209982990, 0.10043372051146990, 0.12566134685507413,
+    0.15096921549677739, 0.17637416478086146, 0.20189347914185105,
+    0.22754497664114931, 0.25334710313579978, 0.27931903444745404,
+    0.30548078809939738, 0.33185334643681652, 0.35845879325119373,
+    0.38532046640756762, 0.41246312944140484, 0.43991316567323391,
+    0.46769879911450835, 0.49585034734745348, 0.52440051270804111,
+    0.55338471955567303, 0.58284150727121620, 0.61281299101662701,
+    0.64334540539291685, 0.67448975019608171, 0.70630256284008752,
+    0.73884684918521371, 0.77219321418868492, 0.80642124701824036,
+    0.84162123357291441, 0.87789629505122879, 0.91536508784281423,
+    0.95416525314619460, 0.99445788320975348, 1.03643338949378938,
+    1.08031934081495606, 1.12639112903880045, 1.17498679206608991,
+    1.22652812003661049, 1.28155156554460081, 1.34075503369021654,
+    1.40507156030963265, 1.47579102817917085, 1.55477359459685394,
+    1.64485362695147308, 1.75068607125217102, 1.88079360815125041,
+    2.05374891063182208, 2.32634787404084076};
 
 TEST_BEGIN(test_pt_norm) {
 	unsigned i;
 
-	for (i = 1; i < sizeof(pt_norm_expected)/sizeof(double); i++) {
+	for (i = 1; i < sizeof(pt_norm_expected) / sizeof(double); i++) {
 		double p = (double)i * 0.01;
 		expect_true(double_eq_rel(pt_norm(p), pt_norm_expected[i],
-		    MAX_REL_ERR, MAX_ABS_ERR),
+		                MAX_REL_ERR, MAX_ABS_ERR),
 		    "Incorrect pt_norm result for i=%u", i);
 	}
 }
@@ -254,49 +251,49 @@ TEST_END
  *                  df={0.1, 1.1, 10.1, 100.1, 1000.1}).
  */
 static const double pt_chi2_df[] = {0.1, 1.1, 10.1, 100.1, 1000.1};
-static const double pt_chi2_expected[] = {
-	1.168926411457320e-40, 1.347680397072034e-22, 3.886980416666260e-17,
-	8.245951724356564e-14, 2.068936347497604e-11, 1.562561743309233e-09,
-	5.459543043426564e-08, 1.114775688149252e-06, 1.532101202364371e-05,
-	1.553884683726585e-04, 1.239396954915939e-03, 8.153872320255721e-03,
-	4.631183739647523e-02, 2.473187311701327e-01, 2.175254800183617e+00,
+static const double pt_chi2_expected[] = {1.168926411457320e-40,
+    1.347680397072034e-22, 3.886980416666260e-17, 8.245951724356564e-14,
+    2.068936347497604e-11, 1.562561743309233e-09, 5.459543043426564e-08,
+    1.114775688149252e-06, 1.532101202364371e-05, 1.553884683726585e-04,
+    1.239396954915939e-03, 8.153872320255721e-03, 4.631183739647523e-02,
+    2.473187311701327e-01, 2.175254800183617e+00,
 
-	0.0003729887888876379, 0.0164409238228929513, 0.0521523015190650113,
-	0.1064701372271216612, 0.1800913735793082115, 0.2748704281195626931,
-	0.3939246282787986497, 0.5420727552260817816, 0.7267265822221973259,
-	0.9596554296000253670, 1.2607440376386165326, 1.6671185084541604304,
-	2.2604828984738705167, 3.2868613342148607082, 6.9298574921692139839,
+    0.0003729887888876379, 0.0164409238228929513, 0.0521523015190650113,
+    0.1064701372271216612, 0.1800913735793082115, 0.2748704281195626931,
+    0.3939246282787986497, 0.5420727552260817816, 0.7267265822221973259,
+    0.9596554296000253670, 1.2607440376386165326, 1.6671185084541604304,
+    2.2604828984738705167, 3.2868613342148607082, 6.9298574921692139839,
 
-	2.606673548632508, 4.602913725294877, 5.646152813924212,
-	6.488971315540869, 7.249823275816285, 7.977314231410841,
-	8.700354939944047, 9.441728024225892, 10.224338321374127,
-	11.076435368801061, 12.039320937038386, 13.183878752697167,
-	14.657791935084575, 16.885728216339373, 23.361991680031817,
+    2.606673548632508, 4.602913725294877, 5.646152813924212, 6.488971315540869,
+    7.249823275816285, 7.977314231410841, 8.700354939944047, 9.441728024225892,
+    10.224338321374127, 11.076435368801061, 12.039320937038386,
+    13.183878752697167, 14.657791935084575, 16.885728216339373,
+    23.361991680031817,
 
-	70.14844087392152, 80.92379498849355, 85.53325420085891,
-	88.94433120715347, 91.83732712857017, 94.46719943606301,
-	96.96896479994635, 99.43412843510363, 101.94074719829733,
-	104.57228644307247, 107.43900093448734, 110.71844673417287,
-	114.76616819871325, 120.57422505959563, 135.92318818757556,
+    70.14844087392152, 80.92379498849355, 85.53325420085891, 88.94433120715347,
+    91.83732712857017, 94.46719943606301, 96.96896479994635, 99.43412843510363,
+    101.94074719829733, 104.57228644307247, 107.43900093448734,
+    110.71844673417287, 114.76616819871325, 120.57422505959563,
+    135.92318818757556,
 
-	899.0072447849649, 937.9271278858220, 953.8117189560207,
-	965.3079371501154, 974.8974061207954, 983.4936235182347,
-	991.5691170518946, 999.4334123954690, 1007.3391826856553,
-	1015.5445154999951, 1024.3777075619569, 1034.3538789836223,
-	1046.4872561869577, 1063.5717461999654, 1107.0741966053859
-};
+    899.0072447849649, 937.9271278858220, 953.8117189560207, 965.3079371501154,
+    974.8974061207954, 983.4936235182347, 991.5691170518946, 999.4334123954690,
+    1007.3391826856553, 1015.5445154999951, 1024.3777075619569,
+    1034.3538789836223, 1046.4872561869577, 1063.5717461999654,
+    1107.0741966053859};
 
 TEST_BEGIN(test_pt_chi2) {
 	unsigned i, j;
 	unsigned e = 0;
 
-	for (i = 0; i < sizeof(pt_chi2_df)/sizeof(double); i++) {
+	for (i = 0; i < sizeof(pt_chi2_df) / sizeof(double); i++) {
 		double df = pt_chi2_df[i];
 		double ln_gamma_df = ln_gamma(df * 0.5);
 		for (j = 1; j < 100; j += 7) {
 			double p = (double)j * 0.01;
-			expect_true(double_eq_rel(pt_chi2(p, df, ln_gamma_df),
-			    pt_chi2_expected[e], MAX_REL_ERR, MAX_ABS_ERR),
+			expect_true(
+			    double_eq_rel(pt_chi2(p, df, ln_gamma_df),
+			        pt_chi2_expected[e], MAX_REL_ERR, MAX_ABS_ERR),
 			    "Incorrect pt_chi2 result for i=%u, j=%u", i, j);
 			e++;
 		}
@@ -309,56 +306,56 @@ TEST_END
  *                   shape=[0.5..3.0] increment=0.5).
  */
 static const double pt_gamma_shape[] = {0.5, 1.0, 1.5, 2.0, 2.5, 3.0};
-static const double pt_gamma_expected[] = {
-	7.854392895485103e-05, 5.043466107888016e-03, 1.788288957794883e-02,
-	3.900956150232906e-02, 6.913847560638034e-02, 1.093710833465766e-01,
-	1.613412523825817e-01, 2.274682115597864e-01, 3.114117323127083e-01,
-	4.189466220207417e-01, 5.598106789059246e-01, 7.521856146202706e-01,
-	1.036125427911119e+00, 1.532450860038180e+00, 3.317448300510606e+00,
+static const double pt_gamma_expected[] = {7.854392895485103e-05,
+    5.043466107888016e-03, 1.788288957794883e-02, 3.900956150232906e-02,
+    6.913847560638034e-02, 1.093710833465766e-01, 1.613412523825817e-01,
+    2.274682115597864e-01, 3.114117323127083e-01, 4.189466220207417e-01,
+    5.598106789059246e-01, 7.521856146202706e-01, 1.036125427911119e+00,
+    1.532450860038180e+00, 3.317448300510606e+00,
 
-	0.01005033585350144, 0.08338160893905107, 0.16251892949777497,
-	0.24846135929849966, 0.34249030894677596, 0.44628710262841947,
-	0.56211891815354142, 0.69314718055994529, 0.84397007029452920,
-	1.02165124753198167, 1.23787435600161766, 1.51412773262977574,
-	1.89711998488588196, 2.52572864430825783, 4.60517018598809091,
+    0.01005033585350144, 0.08338160893905107, 0.16251892949777497,
+    0.24846135929849966, 0.34249030894677596, 0.44628710262841947,
+    0.56211891815354142, 0.69314718055994529, 0.84397007029452920,
+    1.02165124753198167, 1.23787435600161766, 1.51412773262977574,
+    1.89711998488588196, 2.52572864430825783, 4.60517018598809091,
 
-	0.05741590094955853, 0.24747378084860744, 0.39888572212236084,
-	0.54394139997444901, 0.69048812513915159, 0.84311389861296104,
-	1.00580622221479898, 1.18298694218766931, 1.38038096305861213,
-	1.60627736383027453, 1.87396970522337947, 2.20749220408081070,
-	2.65852391865854942, 3.37934630984842244, 5.67243336507218476,
+    0.05741590094955853, 0.24747378084860744, 0.39888572212236084,
+    0.54394139997444901, 0.69048812513915159, 0.84311389861296104,
+    1.00580622221479898, 1.18298694218766931, 1.38038096305861213,
+    1.60627736383027453, 1.87396970522337947, 2.20749220408081070,
+    2.65852391865854942, 3.37934630984842244, 5.67243336507218476,
 
-	0.1485547402532659, 0.4657458011640391, 0.6832386130709406,
-	0.8794297834672100, 1.0700752852474524, 1.2629614217350744,
-	1.4638400448580779, 1.6783469900166610, 1.9132338090606940,
-	2.1778589228618777, 2.4868823970010991, 2.8664695666264195,
-	3.3724415436062114, 4.1682658512758071, 6.6383520679938108,
+    0.1485547402532659, 0.4657458011640391, 0.6832386130709406,
+    0.8794297834672100, 1.0700752852474524, 1.2629614217350744,
+    1.4638400448580779, 1.6783469900166610, 1.9132338090606940,
+    2.1778589228618777, 2.4868823970010991, 2.8664695666264195,
+    3.3724415436062114, 4.1682658512758071, 6.6383520679938108,
 
-	0.2771490383641385, 0.7195001279643727, 0.9969081732265243,
-	1.2383497880608061, 1.4675206597269927, 1.6953064251816552,
-	1.9291243435606809, 2.1757300955477641, 2.4428032131216391,
-	2.7406534569230616, 3.0851445039665513, 3.5043101122033367,
-	4.0575997065264637, 4.9182956424675286, 7.5431362346944937,
+    0.2771490383641385, 0.7195001279643727, 0.9969081732265243,
+    1.2383497880608061, 1.4675206597269927, 1.6953064251816552,
+    1.9291243435606809, 2.1757300955477641, 2.4428032131216391,
+    2.7406534569230616, 3.0851445039665513, 3.5043101122033367,
+    4.0575997065264637, 4.9182956424675286, 7.5431362346944937,
 
-	0.4360451650782932, 0.9983600902486267, 1.3306365880734528,
-	1.6129750834753802, 1.8767241606994294, 2.1357032436097660,
-	2.3988853336865565, 2.6740603137235603, 2.9697561737517959,
-	3.2971457713883265, 3.6731795898504660, 4.1275751617770631,
-	4.7230515633946677, 5.6417477865306020, 8.4059469148854635
-};
+    0.4360451650782932, 0.9983600902486267, 1.3306365880734528,
+    1.6129750834753802, 1.8767241606994294, 2.1357032436097660,
+    2.3988853336865565, 2.6740603137235603, 2.9697561737517959,
+    3.2971457713883265, 3.6731795898504660, 4.1275751617770631,
+    4.7230515633946677, 5.6417477865306020, 8.4059469148854635};
 
 TEST_BEGIN(test_pt_gamma_shape) {
 	unsigned i, j;
 	unsigned e = 0;
 
-	for (i = 0; i < sizeof(pt_gamma_shape)/sizeof(double); i++) {
+	for (i = 0; i < sizeof(pt_gamma_shape) / sizeof(double); i++) {
 		double shape = pt_gamma_shape[i];
 		double ln_gamma_shape = ln_gamma(shape);
 		for (j = 1; j < 100; j += 7) {
 			double p = (double)j * 0.01;
-			expect_true(double_eq_rel(pt_gamma(p, shape, 1.0,
-			    ln_gamma_shape), pt_gamma_expected[e], MAX_REL_ERR,
-			    MAX_ABS_ERR),
+			expect_true(
+			    double_eq_rel(
+			        pt_gamma(p, shape, 1.0, ln_gamma_shape),
+			        pt_gamma_expected[e], MAX_REL_ERR, MAX_ABS_ERR),
 			    "Incorrect pt_gamma result for i=%u, j=%u", i, j);
 			e++;
 		}
@@ -370,21 +367,16 @@ TEST_BEGIN(test_pt_gamma_scale) {
 	double shape = 1.0;
 	double ln_gamma_shape = ln_gamma(shape);
 
-	expect_true(double_eq_rel(
-	    pt_gamma(0.5, shape, 1.0, ln_gamma_shape) * 10.0,
-	    pt_gamma(0.5, shape, 10.0, ln_gamma_shape), MAX_REL_ERR,
-	    MAX_ABS_ERR),
+	expect_true(
+	    double_eq_rel(pt_gamma(0.5, shape, 1.0, ln_gamma_shape) * 10.0,
+	        pt_gamma(0.5, shape, 10.0, ln_gamma_shape), MAX_REL_ERR,
+	        MAX_ABS_ERR),
 	    "Scale should be trivially equivalent to external multiplication");
 }
 TEST_END
 
 int
 main(void) {
-	return test(
-	    test_ln_gamma_factorial,
-	    test_ln_gamma_misc,
-	    test_pt_norm,
-	    test_pt_chi2,
-	    test_pt_gamma_shape,
-	    test_pt_gamma_scale);
+	return test(test_ln_gamma_factorial, test_ln_gamma_misc, test_pt_norm,
+	    test_pt_chi2, test_pt_gamma_shape, test_pt_gamma_scale);
 }
diff --git a/test/unit/mpsc_queue.c b/test/unit/mpsc_queue.c
index 895edf84..d22d5488 100644
--- a/test/unit/mpsc_queue.c
+++ b/test/unit/mpsc_queue.c
@@ -12,10 +12,10 @@ struct elem_s {
 };
 
 /* Include both proto and gen to make sure they match up. */
-mpsc_queue_proto(static, elem_mpsc_queue_, elem_mpsc_queue_t, elem_t,
-    elem_list_t);
-mpsc_queue_gen(static, elem_mpsc_queue_, elem_mpsc_queue_t, elem_t,
-    elem_list_t, link);
+mpsc_queue_proto(
+    static, elem_mpsc_queue_, elem_mpsc_queue_t, elem_t, elem_list_t);
+mpsc_queue_gen(
+    static, elem_mpsc_queue_, elem_mpsc_queue_t, elem_t, elem_list_t, link);
 
 static void
 init_elems_simple(elem_t *elems, int nelems, int thread) {
@@ -29,8 +29,8 @@ init_elems_simple(elem_t *elems, int nelems, int thread) {
 static void
 check_elems_simple(elem_list_t *list, int nelems, int thread) {
 	elem_t *elem;
-	int next_idx = 0;
-	ql_foreach(elem, list, link) {
+	int     next_idx = 0;
+	ql_foreach (elem, list, link) {
 		expect_d_lt(next_idx, nelems, "Too many list items");
 		expect_d_eq(thread, elem->thread, "");
 		expect_d_eq(next_idx, elem->idx, "List out of order");
@@ -39,9 +39,9 @@ check_elems_simple(elem_list_t *list, int nelems, int thread) {
 }
 
 TEST_BEGIN(test_simple) {
-	enum {NELEMS = 10};
-	elem_t elems[NELEMS];
-	elem_list_t list;
+	enum { NELEMS = 10 };
+	elem_t            elems[NELEMS];
+	elem_list_t       list;
 	elem_mpsc_queue_t queue;
 
 	/* Pop empty queue onto empty list -> empty list */
@@ -82,7 +82,6 @@ TEST_BEGIN(test_simple) {
 	}
 	elem_mpsc_queue_pop_batch(&queue, &list);
 	check_elems_simple(&list, NELEMS, 0);
-
 }
 TEST_END
 
@@ -137,7 +136,7 @@ TEST_BEGIN(test_push_single_or_batch) {
 TEST_END
 
 TEST_BEGIN(test_multi_op) {
-	enum {NELEMS = 20};
+	enum { NELEMS = 20 };
 	elem_t elems[NELEMS];
 	init_elems_simple(elems, NELEMS, 0);
 	elem_list_t push_list;
@@ -176,30 +175,29 @@ TEST_BEGIN(test_multi_op) {
 	elem_mpsc_queue_pop_batch(&queue, &result_list);
 
 	check_elems_simple(&result_list, NELEMS, 0);
-
 }
 TEST_END
 
 typedef struct pusher_arg_s pusher_arg_t;
 struct pusher_arg_s {
 	elem_mpsc_queue_t *queue;
-	int thread;
-	elem_t *elems;
-	int nelems;
+	int                thread;
+	elem_t            *elems;
+	int                nelems;
 };
 
 typedef struct popper_arg_s popper_arg_t;
 struct popper_arg_s {
 	elem_mpsc_queue_t *queue;
-	int npushers;
-	int nelems_per_pusher;
-	int *pusher_counts;
+	int                npushers;
+	int                nelems_per_pusher;
+	int               *pusher_counts;
 };
 
 static void *
 thd_pusher(void *void_arg) {
 	pusher_arg_t *arg = (pusher_arg_t *)void_arg;
-	int next_idx = 0;
+	int           next_idx = 0;
 	while (next_idx < arg->nelems) {
 		/* Push 10 items in batch. */
 		elem_list_t list;
@@ -216,7 +214,6 @@ thd_pusher(void *void_arg) {
 			elem_mpsc_queue_push(arg->queue, &arg->elems[next_idx]);
 			next_idx++;
 		}
-
 	}
 	return NULL;
 }
@@ -224,13 +221,13 @@ thd_pusher(void *void_arg) {
 static void *
 thd_popper(void *void_arg) {
 	popper_arg_t *arg = (popper_arg_t *)void_arg;
-	int done_pushers = 0;
+	int           done_pushers = 0;
 	while (done_pushers < arg->npushers) {
 		elem_list_t list;
 		ql_new(&list);
 		elem_mpsc_queue_pop_batch(arg->queue, &list);
 		elem_t *elem;
-		ql_foreach(elem, &list, link) {
+		ql_foreach (elem, &list, link) {
 			int thread = elem->thread;
 			int idx = elem->idx;
 			expect_d_eq(arg->pusher_counts[thread], idx,
@@ -248,12 +245,12 @@ thd_popper(void *void_arg) {
 TEST_BEGIN(test_multiple_threads) {
 	enum {
 		NPUSHERS = 4,
-		NELEMS_PER_PUSHER = 1000*1000,
+		NELEMS_PER_PUSHER = 1000 * 1000,
 	};
-	thd_t pushers[NPUSHERS];
+	thd_t        pushers[NPUSHERS];
 	pusher_arg_t pusher_arg[NPUSHERS];
 
-	thd_t popper;
+	thd_t        popper;
 	popper_arg_t popper_arg;
 
 	elem_mpsc_queue_t queue;
@@ -296,9 +293,6 @@ TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_simple,
-	    test_push_single_or_batch,
-	    test_multi_op,
-	    test_multiple_threads);
+	return test_no_reentrancy(test_simple, test_push_single_or_batch,
+	    test_multi_op, test_multiple_threads);
 }
diff --git a/test/unit/mq.c b/test/unit/mq.c
index f833f77c..9b3b547a 100644
--- a/test/unit/mq.c
+++ b/test/unit/mq.c
@@ -1,22 +1,22 @@
 #include "test/jemalloc_test.h"
 
-#define NSENDERS	3
-#define NMSGS		100000
+#define NSENDERS 3
+#define NMSGS 100000
 
 typedef struct mq_msg_s mq_msg_t;
 struct mq_msg_s {
-	mq_msg(mq_msg_t)	link;
+	mq_msg(mq_msg_t) link;
 };
 mq_gen(static, mq_, mq_t, mq_msg_t, link)
 
-TEST_BEGIN(test_mq_basic) {
-	mq_t mq;
+    TEST_BEGIN(test_mq_basic) {
+	mq_t     mq;
 	mq_msg_t msg;
 
 	expect_false(mq_init(&mq), "Unexpected mq_init() failure");
 	expect_u_eq(mq_count(&mq), 0, "mq should be empty");
-	expect_ptr_null(mq_tryget(&mq),
-	    "mq_tryget() should fail when the queue is empty");
+	expect_ptr_null(
+	    mq_tryget(&mq), "mq_tryget() should fail when the queue is empty");
 
 	mq_put(&mq, &msg);
 	expect_u_eq(mq_count(&mq), 1, "mq should contain one message");
@@ -31,7 +31,7 @@ TEST_END
 
 static void *
 thd_receiver_start(void *arg) {
-	mq_t *mq = (mq_t *)arg;
+	mq_t    *mq = (mq_t *)arg;
 	unsigned i;
 
 	for (i = 0; i < (NSENDERS * NMSGS); i++) {
@@ -44,12 +44,12 @@ thd_receiver_start(void *arg) {
 
 static void *
 thd_sender_start(void *arg) {
-	mq_t *mq = (mq_t *)arg;
+	mq_t    *mq = (mq_t *)arg;
 	unsigned i;
 
 	for (i = 0; i < NMSGS; i++) {
 		mq_msg_t *msg;
-		void *p;
+		void     *p;
 		p = mallocx(sizeof(mq_msg_t), 0);
 		expect_ptr_not_null(p, "Unexpected mallocx() failure");
 		msg = (mq_msg_t *)p;
@@ -59,9 +59,9 @@ thd_sender_start(void *arg) {
 }
 
 TEST_BEGIN(test_mq_threaded) {
-	mq_t mq;
-	thd_t receiver;
-	thd_t senders[NSENDERS];
+	mq_t     mq;
+	thd_t    receiver;
+	thd_t    senders[NSENDERS];
 	unsigned i;
 
 	expect_false(mq_init(&mq), "Unexpected mq_init() failure");
@@ -82,8 +82,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_mq_basic,
-	    test_mq_threaded);
+	return test(test_mq_basic, test_mq_threaded);
 }
-
diff --git a/test/unit/mtx.c b/test/unit/mtx.c
index 4aeebc13..0fe15a90 100644
--- a/test/unit/mtx.c
+++ b/test/unit/mtx.c
@@ -1,7 +1,7 @@
 #include "test/jemalloc_test.h"
 
-#define NTHREADS	2
-#define NINCRS		2000000
+#define NTHREADS 2
+#define NINCRS 2000000
 
 TEST_BEGIN(test_mtx_basic) {
 	mtx_t mtx;
@@ -14,14 +14,14 @@ TEST_BEGIN(test_mtx_basic) {
 TEST_END
 
 typedef struct {
-	mtx_t		mtx;
-	unsigned	x;
+	mtx_t    mtx;
+	unsigned x;
 } thd_start_arg_t;
 
 static void *
 thd_start(void *varg) {
 	thd_start_arg_t *arg = (thd_start_arg_t *)varg;
-	unsigned i;
+	unsigned         i;
 
 	for (i = 0; i < NINCRS; i++) {
 		mtx_lock(&arg->mtx);
@@ -33,8 +33,8 @@ thd_start(void *varg) {
 
 TEST_BEGIN(test_mtx_race) {
 	thd_start_arg_t arg;
-	thd_t thds[NTHREADS];
-	unsigned i;
+	thd_t           thds[NTHREADS];
+	unsigned        i;
 
 	expect_false(mtx_init(&arg.mtx), "Unexpected mtx_init() failure");
 	arg.x = 0;
@@ -44,14 +44,12 @@ TEST_BEGIN(test_mtx_race) {
 	for (i = 0; i < NTHREADS; i++) {
 		thd_join(thds[i], NULL);
 	}
-	expect_u_eq(arg.x, NTHREADS * NINCRS,
-	    "Race-related counter corruption");
+	expect_u_eq(
+	    arg.x, NTHREADS * NINCRS, "Race-related counter corruption");
 }
 TEST_END
 
 int
 main(void) {
-	return test(
-	    test_mtx_basic,
-	    test_mtx_race);
+	return test(test_mtx_basic, test_mtx_race);
 }
diff --git a/test/unit/ncached_max.c b/test/unit/ncached_max.c
index 1a0d2885..4724f55b 100644
--- a/test/unit/ncached_max.c
+++ b/test/unit/ncached_max.c
@@ -2,10 +2,10 @@
 #include "test/san.h"
 
 const char *malloc_conf =
-"tcache_ncached_max:256-1024:1001|2048-2048:0|8192-8192:1,tcache_max:4096";
+    "tcache_ncached_max:256-1024:1001|2048-2048:0|8192-8192:1,tcache_max:4096";
 extern void tcache_bin_info_compute(
     cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]);
-extern bool tcache_get_default_ncached_max_set(szind_t ind);
+extern bool                    tcache_get_default_ncached_max_set(szind_t ind);
 extern const cache_bin_info_t *tcache_get_default_ncached_max(void);
 
 static void
@@ -13,54 +13,54 @@ check_bins_info(cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]) {
 	size_t mib_get[4], mib_get_len;
 	mib_get_len = sizeof(mib_get) / sizeof(size_t);
 	const char *get_name = "thread.tcache.ncached_max.read_sizeclass";
-	size_t ncached_max;
-	size_t sz = sizeof(size_t);
+	size_t      ncached_max;
+	size_t      sz = sizeof(size_t);
 	expect_d_eq(mallctlnametomib(get_name, mib_get, &mib_get_len), 0,
 	    "Unexpected mallctlnametomib() failure");
 
 	for (szind_t i = 0; i < TCACHE_NBINS_MAX; i++) {
 		size_t bin_size = sz_index2size(i);
-		expect_d_eq(mallctlbymib(mib_get, mib_get_len,
-		    (void *)&ncached_max, &sz,
-		    (void *)&bin_size, sizeof(size_t)), 0,
-		    "Unexpected mallctlbymib() failure");
+		expect_d_eq(
+		    mallctlbymib(mib_get, mib_get_len, (void *)&ncached_max,
+		        &sz, (void *)&bin_size, sizeof(size_t)),
+		    0, "Unexpected mallctlbymib() failure");
 		expect_zu_eq(ncached_max, tcache_bin_info[i].ncached_max,
 		    "Unexpected ncached_max for bin %d", i);
 		/* Check ncached_max returned under a non-bin size. */
 		bin_size--;
 		size_t temp_ncached_max = 0;
 		expect_d_eq(mallctlbymib(mib_get, mib_get_len,
-		    (void *)&temp_ncached_max, &sz,
-		    (void *)&bin_size, sizeof(size_t)), 0,
-		    "Unexpected mallctlbymib() failure");
+		                (void *)&temp_ncached_max, &sz,
+		                (void *)&bin_size, sizeof(size_t)),
+		    0, "Unexpected mallctlbymib() failure");
 		expect_zu_eq(temp_ncached_max, ncached_max,
 		    "Unexpected ncached_max for inaccurate bin size.");
 	}
 }
 
 static void *
-ncached_max_check(void* args) {
+ncached_max_check(void *args) {
 	cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX];
 	cache_bin_info_t tcache_bin_info_backup[TCACHE_NBINS_MAX];
-	tsd_t *tsd = tsd_fetch();
-	tcache_t *tcache = tsd_tcachep_get(tsd);
+	tsd_t           *tsd = tsd_fetch();
+	tcache_t        *tcache = tsd_tcachep_get(tsd);
 	assert(tcache != NULL);
 	tcache_slow_t *tcache_slow = tcache->tcache_slow;
 
-
 	tcache_bin_info_compute(tcache_bin_info);
-	memcpy(tcache_bin_info_backup, tcache_bin_info,
-	    sizeof(tcache_bin_info));
+	memcpy(
+	    tcache_bin_info_backup, tcache_bin_info, sizeof(tcache_bin_info));
 	/* Check ncached_max set by malloc_conf. */
 	for (szind_t i = 0; i < TCACHE_NBINS_MAX; i++) {
-		bool first_range = (i >= sz_size2index(256) &&
-		    i <= sz_size2index(1024));
-		bool second_range = (i == sz_size2index(2048));
-		bool third_range = (i == sz_size2index(8192));
+		bool           first_range = (i >= sz_size2index(256)
+                    && i <= sz_size2index(1024));
+		bool           second_range = (i == sz_size2index(2048));
+		bool           third_range = (i == sz_size2index(8192));
 		cache_bin_sz_t target_ncached_max = 0;
 		if (first_range || second_range || third_range) {
-			target_ncached_max = first_range ? 1001:
-			    (second_range ? 0: 1);
+			target_ncached_max = first_range
+			    ? 1001
+			    : (second_range ? 0 : 1);
 			expect_true(tcache_get_default_ncached_max_set(i),
 			    "Unexpected state for bin %u", i);
 			expect_zu_eq(target_ncached_max,
@@ -88,13 +88,13 @@ ncached_max_check(void* args) {
 	    "Unexpected mallctlnametomib() failure");
 
 	/* Test the ncached_max set with tcache on. */
-	char inputs[100] = "8-128:1|160-160:11|170-320:22|224-8388609:0";
+	char  inputs[100] = "8-128:1|160-160:11|170-320:22|224-8388609:0";
 	char *inputp = inputs;
 	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
-	    (void *)&inputp, sizeof(char *)), 0,
-	    "Unexpected mallctlbymib() failure");
+	                (void *)&inputp, sizeof(char *)),
+	    0, "Unexpected mallctlbymib() failure");
 	for (szind_t i = 0; i < TCACHE_NBINS_MAX; i++) {
-		if (i >= sz_size2index(8) &&i <= sz_size2index(128)) {
+		if (i >= sz_size2index(8) && i <= sz_size2index(128)) {
 			cache_bin_info_init(&tcache_bin_info[i], 1);
 		}
 		if (i == sz_size2index(160)) {
@@ -119,16 +119,17 @@ ncached_max_check(void* args) {
 	 * the new setting will not be carried on.  Instead, the default
 	 * settings will be applied.
 	 */
-	bool e0 = false, e1;
+	bool   e0 = false, e1;
 	size_t bool_sz = sizeof(bool);
 	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e1, &bool_sz,
-	    (void *)&e0, bool_sz), 0, "Unexpected mallctl() error");
+	                (void *)&e0, bool_sz),
+	    0, "Unexpected mallctl() error");
 	expect_true(e1, "Unexpected previous tcache state");
 	strcpy(inputs, "0-112:8");
 	/* Setting returns ENOENT when the tcache is disabled. */
 	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
-	    (void *)&inputp, sizeof(char *)), ENOENT,
-	    "Unexpected mallctlbymib() failure");
+	                (void *)&inputp, sizeof(char *)),
+	    ENOENT, "Unexpected mallctlbymib() failure");
 	/* All ncached_max should return 0 once tcache is disabled. */
 	for (szind_t i = 0; i < TCACHE_NBINS_MAX; i++) {
 		cache_bin_info_init(&tcache_bin_info[i], 0);
@@ -137,12 +138,13 @@ ncached_max_check(void* args) {
 
 	e0 = true;
 	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e1, &bool_sz,
-	    (void *)&e0, bool_sz), 0, "Unexpected mallctl() error");
+	                (void *)&e0, bool_sz),
+	    0, "Unexpected mallctl() error");
 	expect_false(e1, "Unexpected previous tcache state");
 	memcpy(tcache_bin_info, tcache_bin_info_backup,
 	    sizeof(tcache_bin_info_backup));
 	for (szind_t i = tcache_nbins_get(tcache_slow); i < TCACHE_NBINS_MAX;
-	    i++) {
+	     i++) {
 		cache_bin_info_init(&tcache_bin_info[i], 0);
 	}
 	check_bins_info(tcache_bin_info);
@@ -152,22 +154,22 @@ ncached_max_check(void* args) {
 	 * resetting tcache_max.  The ncached_max changes should stay.
 	 */
 	size_t tcache_max = 1024;
-	assert_d_eq(mallctl("thread.tcache.max",
-	    NULL, NULL, (void *)&tcache_max, sizeof(size_t)),.0,
-	    "Unexpected.mallctl().failure");
+	assert_d_eq(mallctl("thread.tcache.max", NULL, NULL,
+	                (void *)&tcache_max, sizeof(size_t)),
+	    .0, "Unexpected.mallctl().failure");
 	for (szind_t i = sz_size2index(1024) + 1; i < TCACHE_NBINS_MAX; i++) {
 		cache_bin_info_init(&tcache_bin_info[i], 0);
 	}
 	strcpy(inputs, "2048-6144:123");
 	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
-	    (void *)&inputp, sizeof(char *)), 0,
-	    "Unexpected mallctlbymib() failure");
+	                (void *)&inputp, sizeof(char *)),
+	    0, "Unexpected mallctlbymib() failure");
 	check_bins_info(tcache_bin_info);
 
 	tcache_max = 6144;
-	assert_d_eq(mallctl("thread.tcache.max",
-	    NULL, NULL, (void *)&tcache_max, sizeof(size_t)),.0,
-	    "Unexpected.mallctl().failure");
+	assert_d_eq(mallctl("thread.tcache.max", NULL, NULL,
+	                (void *)&tcache_max, sizeof(size_t)),
+	    .0, "Unexpected.mallctl().failure");
 	memcpy(tcache_bin_info, tcache_bin_info_backup,
 	    sizeof(tcache_bin_info_backup));
 	for (szind_t i = sz_size2index(2048); i < TCACHE_NBINS_MAX; i++) {
@@ -182,15 +184,15 @@ ncached_max_check(void* args) {
 	/* Test an empty input, it should do nothing. */
 	strcpy(inputs, "");
 	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
-	    (void *)&inputp, sizeof(char *)), 0,
-	    "Unexpected mallctlbymib() failure");
+	                (void *)&inputp, sizeof(char *)),
+	    0, "Unexpected mallctlbymib() failure");
 	check_bins_info(tcache_bin_info);
 
 	/* Test a half-done string, it should return EINVAL and do nothing. */
 	strcpy(inputs, "4-1024:7|256-1024");
 	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
-	    (void *)&inputp, sizeof(char *)), EINVAL,
-	    "Unexpected mallctlbymib() failure");
+	                (void *)&inputp, sizeof(char *)),
+	    EINVAL, "Unexpected mallctlbymib() failure");
 	check_bins_info(tcache_bin_info);
 
 	/*
@@ -199,8 +201,8 @@ ncached_max_check(void* args) {
 	 */
 	strcpy(inputs, "1024-256:7");
 	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
-	    (void *)&inputp, sizeof(char *)), 0,
-	    "Unexpected mallctlbymib() failure");
+	                (void *)&inputp, sizeof(char *)),
+	    0, "Unexpected mallctlbymib() failure");
 	check_bins_info(tcache_bin_info);
 
 	/*
@@ -216,8 +218,8 @@ ncached_max_check(void* args) {
 	long_inputs[200 * 9 + 8] = '\0';
 	inputp = long_inputs;
 	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
-	    (void *)&inputp, sizeof(char *)), EINVAL,
-	    "Unexpected mallctlbymib() failure");
+	                (void *)&inputp, sizeof(char *)),
+	    EINVAL, "Unexpected mallctlbymib() failure");
 	check_bins_info(tcache_bin_info);
 	free(long_inputs);
 
@@ -228,17 +230,17 @@ ncached_max_check(void* args) {
 	strcpy(inputs, "k8-1024:77p");
 	inputp = inputs;
 	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
-	    (void *)&inputp, sizeof(char *)), EINVAL,
-	    "Unexpected mallctlbymib() failure");
+	                (void *)&inputp, sizeof(char *)),
+	    EINVAL, "Unexpected mallctlbymib() failure");
 	check_bins_info(tcache_bin_info);
 
 	/* Test large ncached_max, it should return success but capped. */
 	strcpy(inputs, "1024-1024:65540");
 	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
-	    (void *)&inputp, sizeof(char *)), 0,
-	    "Unexpected mallctlbymib() failure");
-	cache_bin_info_init(&tcache_bin_info[sz_size2index(1024)],
-	    CACHE_BIN_NCACHED_MAX);
+	                (void *)&inputp, sizeof(char *)),
+	    0, "Unexpected mallctlbymib() failure");
+	cache_bin_info_init(
+	    &tcache_bin_info[sz_size2index(1024)], CACHE_BIN_NCACHED_MAX);
 	check_bins_info(tcache_bin_info);
 
 	return NULL;
@@ -262,7 +264,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_ncached_max);
+	return test(test_ncached_max);
 }
-
diff --git a/test/unit/nstime.c b/test/unit/nstime.c
index 43fd3954..8c095d09 100644
--- a/test/unit/nstime.c
+++ b/test/unit/nstime.c
@@ -1,6 +1,6 @@
 #include "test/jemalloc_test.h"
 
-#define BILLION	UINT64_C(1000000000)
+#define BILLION UINT64_C(1000000000)
 
 TEST_BEGIN(test_nstime_init) {
 	nstime_t nst;
@@ -43,24 +43,24 @@ TEST_BEGIN(test_nstime_compare) {
 	nstime_init2(&nstb, 42, 42);
 	expect_d_eq(nstime_compare(&nsta, &nstb), 1,
 	    "nsta should be greater than nstb");
-	expect_d_eq(nstime_compare(&nstb, &nsta), -1,
-	    "nstb should be less than nsta");
+	expect_d_eq(
+	    nstime_compare(&nstb, &nsta), -1, "nstb should be less than nsta");
 
 	nstime_init2(&nstb, 42, 44);
-	expect_d_eq(nstime_compare(&nsta, &nstb), -1,
-	    "nsta should be less than nstb");
+	expect_d_eq(
+	    nstime_compare(&nsta, &nstb), -1, "nsta should be less than nstb");
 	expect_d_eq(nstime_compare(&nstb, &nsta), 1,
 	    "nstb should be greater than nsta");
 
 	nstime_init2(&nstb, 41, BILLION - 1);
 	expect_d_eq(nstime_compare(&nsta, &nstb), 1,
 	    "nsta should be greater than nstb");
-	expect_d_eq(nstime_compare(&nstb, &nsta), -1,
-	    "nstb should be less than nsta");
+	expect_d_eq(
+	    nstime_compare(&nstb, &nsta), -1, "nstb should be less than nsta");
 
 	nstime_init2(&nstb, 43, 0);
-	expect_d_eq(nstime_compare(&nsta, &nstb), -1,
-	    "nsta should be less than nstb");
+	expect_d_eq(
+	    nstime_compare(&nsta, &nstb), -1, "nsta should be less than nstb");
 	expect_d_eq(nstime_compare(&nstb, &nsta), 1,
 	    "nstb should be greater than nsta");
 }
@@ -73,15 +73,15 @@ TEST_BEGIN(test_nstime_add) {
 	nstime_copy(&nstb, &nsta);
 	nstime_add(&nsta, &nstb);
 	nstime_init2(&nstb, 84, 86);
-	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
-	    "Incorrect addition result");
+	expect_d_eq(
+	    nstime_compare(&nsta, &nstb), 0, "Incorrect addition result");
 
 	nstime_init2(&nsta, 42, BILLION - 1);
 	nstime_copy(&nstb, &nsta);
 	nstime_add(&nsta, &nstb);
 	nstime_init2(&nstb, 85, BILLION - 2);
-	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
-	    "Incorrect addition result");
+	expect_d_eq(
+	    nstime_compare(&nsta, &nstb), 0, "Incorrect addition result");
 }
 TEST_END
 
@@ -91,14 +91,14 @@ TEST_BEGIN(test_nstime_iadd) {
 	nstime_init2(&nsta, 42, BILLION - 1);
 	nstime_iadd(&nsta, 1);
 	nstime_init2(&nstb, 43, 0);
-	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
-	    "Incorrect addition result");
+	expect_d_eq(
+	    nstime_compare(&nsta, &nstb), 0, "Incorrect addition result");
 
 	nstime_init2(&nsta, 42, 1);
 	nstime_iadd(&nsta, BILLION + 1);
 	nstime_init2(&nstb, 43, 2);
-	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
-	    "Incorrect addition result");
+	expect_d_eq(
+	    nstime_compare(&nsta, &nstb), 0, "Incorrect addition result");
 }
 TEST_END
 
@@ -109,15 +109,15 @@ TEST_BEGIN(test_nstime_subtract) {
 	nstime_copy(&nstb, &nsta);
 	nstime_subtract(&nsta, &nstb);
 	nstime_init_zero(&nstb);
-	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
-	    "Incorrect subtraction result");
+	expect_d_eq(
+	    nstime_compare(&nsta, &nstb), 0, "Incorrect subtraction result");
 
 	nstime_init2(&nsta, 42, 43);
 	nstime_init2(&nstb, 41, 44);
 	nstime_subtract(&nsta, &nstb);
 	nstime_init2(&nstb, 0, BILLION - 1);
-	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
-	    "Incorrect subtraction result");
+	expect_d_eq(
+	    nstime_compare(&nsta, &nstb), 0, "Incorrect subtraction result");
 }
 TEST_END
 
@@ -125,16 +125,16 @@ TEST_BEGIN(test_nstime_isubtract) {
 	nstime_t nsta, nstb;
 
 	nstime_init2(&nsta, 42, 43);
-	nstime_isubtract(&nsta, 42*BILLION + 43);
+	nstime_isubtract(&nsta, 42 * BILLION + 43);
 	nstime_init_zero(&nstb);
-	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
-	    "Incorrect subtraction result");
+	expect_d_eq(
+	    nstime_compare(&nsta, &nstb), 0, "Incorrect subtraction result");
 
 	nstime_init2(&nsta, 42, 43);
-	nstime_isubtract(&nsta, 41*BILLION + 44);
+	nstime_isubtract(&nsta, 41 * BILLION + 44);
 	nstime_init2(&nstb, 0, BILLION - 1);
-	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
-	    "Incorrect subtraction result");
+	expect_d_eq(
+	    nstime_compare(&nsta, &nstb), 0, "Incorrect subtraction result");
 }
 TEST_END
 
@@ -144,14 +144,14 @@ TEST_BEGIN(test_nstime_imultiply) {
 	nstime_init2(&nsta, 42, 43);
 	nstime_imultiply(&nsta, 10);
 	nstime_init2(&nstb, 420, 430);
-	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
-	    "Incorrect multiplication result");
+	expect_d_eq(
+	    nstime_compare(&nsta, &nstb), 0, "Incorrect multiplication result");
 
 	nstime_init2(&nsta, 42, 666666666);
 	nstime_imultiply(&nsta, 3);
 	nstime_init2(&nstb, 127, 999999998);
-	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
-	    "Incorrect multiplication result");
+	expect_d_eq(
+	    nstime_compare(&nsta, &nstb), 0, "Incorrect multiplication result");
 }
 TEST_END
 
@@ -162,15 +162,15 @@ TEST_BEGIN(test_nstime_idivide) {
 	nstime_copy(&nstb, &nsta);
 	nstime_imultiply(&nsta, 10);
 	nstime_idivide(&nsta, 10);
-	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
-	    "Incorrect division result");
+	expect_d_eq(
+	    nstime_compare(&nsta, &nstb), 0, "Incorrect division result");
 
 	nstime_init2(&nsta, 42, 666666666);
 	nstime_copy(&nstb, &nsta);
 	nstime_imultiply(&nsta, 3);
 	nstime_idivide(&nsta, 3);
-	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
-	    "Incorrect division result");
+	expect_d_eq(
+	    nstime_compare(&nsta, &nstb), 0, "Incorrect division result");
 }
 TEST_END
 
@@ -180,24 +180,24 @@ TEST_BEGIN(test_nstime_divide) {
 	nstime_init2(&nsta, 42, 43);
 	nstime_copy(&nstb, &nsta);
 	nstime_imultiply(&nsta, 10);
-	expect_u64_eq(nstime_divide(&nsta, &nstb), 10,
-	    "Incorrect division result");
+	expect_u64_eq(
+	    nstime_divide(&nsta, &nstb), 10, "Incorrect division result");
 
 	nstime_init2(&nsta, 42, 43);
 	nstime_copy(&nstb, &nsta);
 	nstime_imultiply(&nsta, 10);
 	nstime_init(&nstc, 1);
 	nstime_add(&nsta, &nstc);
-	expect_u64_eq(nstime_divide(&nsta, &nstb), 10,
-	    "Incorrect division result");
+	expect_u64_eq(
+	    nstime_divide(&nsta, &nstb), 10, "Incorrect division result");
 
 	nstime_init2(&nsta, 42, 43);
 	nstime_copy(&nstb, &nsta);
 	nstime_imultiply(&nsta, 10);
 	nstime_init(&nstc, 1);
 	nstime_subtract(&nsta, &nstc);
-	expect_u64_eq(nstime_divide(&nsta, &nstb), 9,
-	    "Incorrect division result");
+	expect_u64_eq(
+	    nstime_divide(&nsta, &nstb), 9, "Incorrect division result");
 }
 TEST_END
 
@@ -213,8 +213,8 @@ test_nstime_since_once(nstime_t *t) {
 	nstime_copy(&new_t, t);
 	nstime_subtract(&new_t, &old_t);
 
-	expect_u64_ge(nstime_ns(&new_t), ns_since,
-	    "Incorrect time since result");
+	expect_u64_ge(
+	    nstime_ns(&new_t), ns_since, "Incorrect time since result");
 }
 
 TEST_BEGIN(test_nstime_ns_since) {
@@ -253,19 +253,9 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_nstime_init,
-	    test_nstime_init2,
-	    test_nstime_copy,
-	    test_nstime_compare,
-	    test_nstime_add,
-	    test_nstime_iadd,
-	    test_nstime_subtract,
-	    test_nstime_isubtract,
-	    test_nstime_imultiply,
-	    test_nstime_idivide,
-	    test_nstime_divide,
-	    test_nstime_ns_since,
-	    test_nstime_ms_since,
-	    test_nstime_monotonic);
+	return test(test_nstime_init, test_nstime_init2, test_nstime_copy,
+	    test_nstime_compare, test_nstime_add, test_nstime_iadd,
+	    test_nstime_subtract, test_nstime_isubtract, test_nstime_imultiply,
+	    test_nstime_idivide, test_nstime_divide, test_nstime_ns_since,
+	    test_nstime_ms_since, test_nstime_monotonic);
 }
diff --git a/test/unit/oversize_threshold.c b/test/unit/oversize_threshold.c
index 95ce6537..5d9aae10 100644
--- a/test/unit/oversize_threshold.c
+++ b/test/unit/oversize_threshold.c
@@ -5,7 +5,7 @@
 static void
 arena_mallctl(const char *mallctl_str, unsigned arena, void *oldp,
     size_t *oldlen, void *newp, size_t newlen) {
-	int err;
+	int  err;
 	char buf[100];
 	malloc_snprintf(buf, sizeof(buf), mallctl_str, arena);
 
@@ -14,13 +14,13 @@ arena_mallctl(const char *mallctl_str, unsigned arena, void *oldp,
 }
 
 TEST_BEGIN(test_oversize_threshold_get_set) {
-	int err;
+	int    err;
 	size_t old_threshold;
 	size_t new_threshold;
 	size_t threshold_sz = sizeof(old_threshold);
 
 	unsigned arena;
-	size_t arena_sz = sizeof(arena);
+	size_t   arena_sz = sizeof(arena);
 	err = mallctl("arenas.create", (void *)&arena, &arena_sz, NULL, 0);
 	expect_d_eq(0, err, "Arena creation failed");
 
@@ -38,13 +38,14 @@ TEST_BEGIN(test_oversize_threshold_get_set) {
 	/* Just a read */
 	arena_mallctl("arena.%u.oversize_threshold", arena, &old_threshold,
 	    &threshold_sz, NULL, 0);
-	expect_zu_eq(2 * 1024 * 1024, old_threshold, "Should have read old value");
+	expect_zu_eq(
+	    2 * 1024 * 1024, old_threshold, "Should have read old value");
 }
 TEST_END
 
 static size_t max_purged = 0;
 static bool
-purge_forced_record_max(extent_hooks_t* hooks, void *addr, size_t sz,
+purge_forced_record_max(extent_hooks_t *hooks, void *addr, size_t sz,
     size_t offset, size_t length, unsigned arena_ind) {
 	if (length > max_purged) {
 		max_purged = length;
@@ -73,7 +74,7 @@ TEST_BEGIN(test_oversize_threshold) {
 	int err;
 
 	unsigned arena;
-	size_t arena_sz = sizeof(arena);
+	size_t   arena_sz = sizeof(arena);
 	err = mallctl("arenas.create", (void *)&arena, &arena_sz, NULL, 0);
 	expect_d_eq(0, err, "Arena creation failed");
 	arena_mallctl("arena.%u.extent_hooks", arena, NULL, NULL, &extent_hooks,
@@ -121,8 +122,8 @@ TEST_BEGIN(test_oversize_threshold) {
 	ptr = mallocx(2 * 1024 * 1024, MALLOCX_ARENA(arena));
 	dallocx(ptr, MALLOCX_TCACHE_NONE);
 	if (!is_background_thread_enabled()) {
-		expect_zu_ge(max_purged, 2 * 1024 * 1024,
-		    "Expected a 2MB purge");
+		expect_zu_ge(
+		    max_purged, 2 * 1024 * 1024, "Expected a 2MB purge");
 	}
 }
 TEST_END
@@ -130,7 +131,5 @@ TEST_END
 int
 main(void) {
 	return test_no_reentrancy(
-	    test_oversize_threshold_get_set,
-	    test_oversize_threshold);
+	    test_oversize_threshold_get_set, test_oversize_threshold);
 }
-
diff --git a/test/unit/pa.c b/test/unit/pa.c
index d44bb95c..8552225f 100644
--- a/test/unit/pa.c
+++ b/test/unit/pa.c
@@ -16,8 +16,8 @@ merge_hook(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
 }
 
 static bool
-split_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
-    size_t size_a, size_t size_b, bool committed, unsigned arena_ind) {
+split_hook(extent_hooks_t *extent_hooks, void *addr, size_t size, size_t size_a,
+    size_t size_b, bool committed, unsigned arena_ind) {
 	return !maps_coalesce;
 }
 
@@ -39,13 +39,13 @@ init_test_extent_hooks(extent_hooks_t *hooks) {
 
 typedef struct test_data_s test_data_t;
 struct test_data_s {
-	pa_shard_t shard;
-	pa_central_t central;
-	base_t *base;
-	emap_t emap;
+	pa_shard_t       shard;
+	pa_central_t     central;
+	base_t          *base;
+	emap_t           emap;
 	pa_shard_stats_t stats;
-	malloc_mutex_t stats_mtx;
-	extent_hooks_t hooks;
+	malloc_mutex_t   stats_mtx;
+	extent_hooks_t   hooks;
 };
 
 static test_data_t *
@@ -66,8 +66,8 @@ init_test_data(ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
 	nstime_t time;
 	nstime_init(&time, 0);
 
-	err = pa_central_init(&test_data->central, base, opt_hpa,
-	    &hpa_hooks_default);
+	err = pa_central_init(
+	    &test_data->central, base, opt_hpa, &hpa_hooks_default);
 	assert_false(err, "");
 
 	const size_t pa_oversize_threshold = 8 * 1024 * 1024;
@@ -80,7 +80,8 @@ init_test_data(ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
 	return test_data;
 }
 
-void destroy_test_data(test_data_t *data) {
+void
+destroy_test_data(test_data_t *data) {
 	base_delete(TSDN_NULL, data->base);
 	free(data);
 }
@@ -89,28 +90,28 @@ static void *
 do_alloc_free_purge(void *arg) {
 	test_data_t *test_data = (test_data_t *)arg;
 	for (int i = 0; i < 10 * 1000; i++) {
-		bool deferred_work_generated = false;
+		bool     deferred_work_generated = false;
 		edata_t *edata = pa_alloc(TSDN_NULL, &test_data->shard, PAGE,
 		    PAGE, /* slab */ false, /* szind */ 0, /* zero */ false,
 		    /* guarded */ false, &deferred_work_generated);
 		assert_ptr_not_null(edata, "");
 		pa_dalloc(TSDN_NULL, &test_data->shard, edata,
 		    &deferred_work_generated);
-		malloc_mutex_lock(TSDN_NULL,
-		    &test_data->shard.pac.decay_dirty.mtx);
+		malloc_mutex_lock(
+		    TSDN_NULL, &test_data->shard.pac.decay_dirty.mtx);
 		pac_decay_all(TSDN_NULL, &test_data->shard.pac,
 		    &test_data->shard.pac.decay_dirty,
 		    &test_data->shard.pac.stats->decay_dirty,
 		    &test_data->shard.pac.ecache_dirty, true);
-		malloc_mutex_unlock(TSDN_NULL,
-		    &test_data->shard.pac.decay_dirty.mtx);
+		malloc_mutex_unlock(
+		    TSDN_NULL, &test_data->shard.pac.decay_dirty.mtx);
 	}
 	return NULL;
 }
 
 TEST_BEGIN(test_alloc_free_purge_thds) {
 	test_data_t *test_data = init_test_data(0, 0);
-	thd_t thds[4];
+	thd_t        thds[4];
 	for (int i = 0; i < 4; i++) {
 		thd_create(&thds[i], do_alloc_free_purge, test_data);
 	}
@@ -122,6 +123,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_alloc_free_purge_thds);
+	return test(test_alloc_free_purge_thds);
 }
diff --git a/test/unit/pack.c b/test/unit/pack.c
index e6392825..e3024512 100644
--- a/test/unit/pack.c
+++ b/test/unit/pack.c
@@ -4,9 +4,9 @@
  * Size class that is a divisor of the page size, ideally 4+ regions per run.
  */
 #if LG_PAGE <= 14
-#define SZ	(ZU(1) << (LG_PAGE - 2))
+#	define SZ (ZU(1) << (LG_PAGE - 2))
 #else
-#define SZ	ZU(4096)
+#	define SZ ZU(4096)
 #endif
 
 /*
@@ -14,11 +14,11 @@
  * if mmap()ed memory grows downward, downward growth of mmap()ed memory is
  * tested.
  */
-#define NSLABS	8
+#define NSLABS 8
 
 static unsigned
 binind_compute(void) {
-	size_t sz;
+	size_t   sz;
 	unsigned nbins, i;
 
 	sz = sizeof(nbins);
@@ -27,16 +27,17 @@ binind_compute(void) {
 
 	for (i = 0; i < nbins; i++) {
 		size_t mib[4];
-		size_t miblen = sizeof(mib)/sizeof(size_t);
+		size_t miblen = sizeof(mib) / sizeof(size_t);
 		size_t size;
 
-		expect_d_eq(mallctlnametomib("arenas.bin.0.size", mib,
-		    &miblen), 0, "Unexpected mallctlnametomb failure");
+		expect_d_eq(mallctlnametomib("arenas.bin.0.size", mib, &miblen),
+		    0, "Unexpected mallctlnametomb failure");
 		mib[2] = (size_t)i;
 
 		sz = sizeof(size);
-		expect_d_eq(mallctlbymib(mib, miblen, (void *)&size, &sz, NULL,
-		    0), 0, "Unexpected mallctlbymib failure");
+		expect_d_eq(
+		    mallctlbymib(mib, miblen, (void *)&size, &sz, NULL, 0), 0,
+		    "Unexpected mallctlbymib failure");
 		if (size == SZ) {
 			return i;
 		}
@@ -49,24 +50,24 @@ binind_compute(void) {
 static size_t
 nregs_per_run_compute(void) {
 	uint32_t nregs;
-	size_t sz;
+	size_t   sz;
 	unsigned binind = binind_compute();
-	size_t mib[4];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
+	size_t   mib[4];
+	size_t   miblen = sizeof(mib) / sizeof(size_t);
 
 	expect_d_eq(mallctlnametomib("arenas.bin.0.nregs", mib, &miblen), 0,
 	    "Unexpected mallctlnametomb failure");
 	mib[2] = (size_t)binind;
 	sz = sizeof(nregs);
-	expect_d_eq(mallctlbymib(mib, miblen, (void *)&nregs, &sz, NULL,
-	    0), 0, "Unexpected mallctlbymib failure");
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&nregs, &sz, NULL, 0), 0,
+	    "Unexpected mallctlbymib failure");
 	return nregs;
 }
 
 static unsigned
 arenas_create_mallctl(void) {
 	unsigned arena_ind;
-	size_t sz;
+	size_t   sz;
 
 	sz = sizeof(arena_ind);
 	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
@@ -78,7 +79,7 @@ arenas_create_mallctl(void) {
 static void
 arena_reset_mallctl(unsigned arena_ind) {
 	size_t mib[3];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
+	size_t miblen = sizeof(mib) / sizeof(size_t);
 
 	expect_d_eq(mallctlnametomib("arena.0.reset", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
@@ -88,23 +89,23 @@ arena_reset_mallctl(unsigned arena_ind) {
 }
 
 TEST_BEGIN(test_pack) {
-	bool prof_enabled;
+	bool   prof_enabled;
 	size_t sz = sizeof(prof_enabled);
 	if (mallctl("opt.prof", (void *)&prof_enabled, &sz, NULL, 0) == 0) {
 		test_skip_if(prof_enabled);
 	}
 
 	unsigned arena_ind = arenas_create_mallctl();
-	size_t nregs_per_run = nregs_per_run_compute();
-	size_t nregs = nregs_per_run * NSLABS;
+	size_t   nregs_per_run = nregs_per_run_compute();
+	size_t   nregs = nregs_per_run * NSLABS;
 	VARIABLE_ARRAY(void *, ptrs, nregs);
 	size_t i, j, offset;
 
 	/* Fill matrix. */
 	for (i = offset = 0; i < NSLABS; i++) {
 		for (j = 0; j < nregs_per_run; j++) {
-			void *p = mallocx(SZ, MALLOCX_ARENA(arena_ind) |
-			    MALLOCX_TCACHE_NONE);
+			void *p = mallocx(
+			    SZ, MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE);
 			expect_ptr_not_null(p,
 			    "Unexpected mallocx(%zu, MALLOCX_ARENA(%u) |"
 			    " MALLOCX_TCACHE_NONE) failure, run=%zu, reg=%zu",
@@ -119,16 +120,15 @@ TEST_BEGIN(test_pack) {
 	 * layout policy.
 	 */
 	offset = 0;
-	for (i = offset = 0;
-	    i < NSLABS;
-	    i++, offset = (offset + 1) % nregs_per_run) {
+	for (i = offset = 0; i < NSLABS;
+	     i++, offset = (offset + 1) % nregs_per_run) {
 		for (j = 0; j < nregs_per_run; j++) {
 			void *p = ptrs[(i * nregs_per_run) + j];
 			if (offset == j) {
 				continue;
 			}
-			dallocx(p, MALLOCX_ARENA(arena_ind) |
-			    MALLOCX_TCACHE_NONE);
+			dallocx(
+			    p, MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE);
 		}
 	}
 
@@ -137,17 +137,16 @@ TEST_BEGIN(test_pack) {
 	 * that the matrix is unmodified.
 	 */
 	offset = 0;
-	for (i = offset = 0;
-	    i < NSLABS;
-	    i++, offset = (offset + 1) % nregs_per_run) {
+	for (i = offset = 0; i < NSLABS;
+	     i++, offset = (offset + 1) % nregs_per_run) {
 		for (j = 0; j < nregs_per_run; j++) {
 			void *p;
 
 			if (offset == j) {
 				continue;
 			}
-			p = mallocx(SZ, MALLOCX_ARENA(arena_ind) |
-			    MALLOCX_TCACHE_NONE);
+			p = mallocx(
+			    SZ, MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE);
 			expect_ptr_eq(p, ptrs[(i * nregs_per_run) + j],
 			    "Unexpected refill discrepancy, run=%zu, reg=%zu\n",
 			    i, j);
@@ -161,6 +160,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_pack);
+	return test(test_pack);
 }
diff --git a/test/unit/pages.c b/test/unit/pages.c
index 8dfd1a72..dbee2f0c 100644
--- a/test/unit/pages.c
+++ b/test/unit/pages.c
@@ -2,8 +2,8 @@
 
 TEST_BEGIN(test_pages_huge) {
 	size_t alloc_size;
-	bool commit;
-	void *pages, *hugepage;
+	bool   commit;
+	void  *pages, *hugepage;
 
 	alloc_size = HUGEPAGE * 2 - PAGE;
 	commit = true;
@@ -11,11 +11,12 @@ TEST_BEGIN(test_pages_huge) {
 	expect_ptr_not_null(pages, "Unexpected pages_map() error");
 
 	if (init_system_thp_mode == thp_mode_default) {
-	    hugepage = (void *)(ALIGNMENT_CEILING((uintptr_t)pages, HUGEPAGE));
-	    expect_b_ne(pages_huge(hugepage, HUGEPAGE), have_madvise_huge,
-	        "Unexpected pages_huge() result");
-	    expect_false(pages_nohuge(hugepage, HUGEPAGE),
-	        "Unexpected pages_nohuge() result");
+		hugepage = (void *)(ALIGNMENT_CEILING(
+		    (uintptr_t)pages, HUGEPAGE));
+		expect_b_ne(pages_huge(hugepage, HUGEPAGE), have_madvise_huge,
+		    "Unexpected pages_huge() result");
+		expect_false(pages_nohuge(hugepage, HUGEPAGE),
+		    "Unexpected pages_nohuge() result");
 	}
 
 	pages_unmap(pages, alloc_size);
@@ -24,6 +25,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_pages_huge);
+	return test(test_pages_huge);
 }
diff --git a/test/unit/peak.c b/test/unit/peak.c
index 11129785..80eda30d 100644
--- a/test/unit/peak.c
+++ b/test/unit/peak.c
@@ -4,11 +4,10 @@
 
 TEST_BEGIN(test_peak) {
 	peak_t peak = PEAK_INITIALIZER;
-	expect_u64_eq(0, peak_max(&peak),
-	    "Peak should be zero at initialization");
+	expect_u64_eq(
+	    0, peak_max(&peak), "Peak should be zero at initialization");
 	peak_update(&peak, 100, 50);
-	expect_u64_eq(50, peak_max(&peak),
-	    "Missed update");
+	expect_u64_eq(50, peak_max(&peak), "Missed update");
 	peak_update(&peak, 100, 100);
 	expect_u64_eq(50, peak_max(&peak), "Dallocs shouldn't change peak");
 	peak_update(&peak, 100, 200);
@@ -42,6 +41,5 @@ TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_peak);
+	return test_no_reentrancy(test_peak);
 }
diff --git a/test/unit/ph.c b/test/unit/ph.c
index 0339f993..c9e4da9c 100644
--- a/test/unit/ph.c
+++ b/test/unit/ph.c
@@ -8,9 +8,9 @@ ph_structs(heap, node_t, BFS_ENUMERATE_MAX);
 
 struct node_s {
 #define NODE_MAGIC 0x9823af7e
-	uint32_t magic;
+	uint32_t    magic;
 	heap_link_t link;
-	uint64_t key;
+	uint64_t    key;
 };
 
 static int
@@ -31,7 +31,6 @@ node_cmp(const node_t *a, const node_t *b) {
 
 static int
 node_cmp_magic(const node_t *a, const node_t *b) {
-
 	expect_u32_eq(a->magic, NODE_MAGIC, "Bad magic");
 	expect_u32_eq(b->magic, NODE_MAGIC, "Bad magic");
 
@@ -58,12 +57,12 @@ node_lchild_get(const node_t *node) {
 static void
 node_print(const node_t *node, unsigned depth) {
 	unsigned i;
-	node_t *leftmost_child, *sibling;
+	node_t  *leftmost_child, *sibling;
 
 	for (i = 0; i < depth; i++) {
 		malloc_printf("\t");
 	}
-	malloc_printf("%2"FMTu64"\n", node->key);
+	malloc_printf("%2" FMTu64 "\n", node->key);
 
 	leftmost_child = node_lchild_get(node);
 	if (leftmost_child == NULL) {
@@ -71,8 +70,8 @@ node_print(const node_t *node, unsigned depth) {
 	}
 	node_print(leftmost_child, depth + 1);
 
-	for (sibling = node_next_get(leftmost_child); sibling !=
-	    NULL; sibling = node_next_get(sibling)) {
+	for (sibling = node_next_get(leftmost_child); sibling != NULL;
+	     sibling = node_next_get(sibling)) {
 		node_print(sibling, depth + 1);
 	}
 }
@@ -89,7 +88,7 @@ heap_print(const heap_t *heap) {
 	node_print(heap->ph.root, 0);
 
 	for (auxelm = node_next_get(heap->ph.root); auxelm != NULL;
-	    auxelm = node_next_get(auxelm)) {
+	     auxelm = node_next_get(auxelm)) {
 		expect_ptr_eq(node_next_get(node_prev_get(auxelm)), auxelm,
 		    "auxelm's prev doesn't link to auxelm");
 		node_print(auxelm, 0);
@@ -102,7 +101,7 @@ label_return:
 static unsigned
 node_validate(const node_t *node, const node_t *parent) {
 	unsigned nnodes = 1;
-	node_t *leftmost_child, *sibling;
+	node_t  *leftmost_child, *sibling;
 
 	if (parent != NULL) {
 		expect_d_ge(node_cmp_magic(node, parent), 0,
@@ -113,12 +112,12 @@ node_validate(const node_t *node, const node_t *parent) {
 	if (leftmost_child == NULL) {
 		return nnodes;
 	}
-	expect_ptr_eq(node_prev_get(leftmost_child),
-	    (void *)node, "Leftmost child does not link to node");
+	expect_ptr_eq(node_prev_get(leftmost_child), (void *)node,
+	    "Leftmost child does not link to node");
 	nnodes += node_validate(leftmost_child, node);
 
-	for (sibling = node_next_get(leftmost_child); sibling !=
-	    NULL; sibling = node_next_get(sibling)) {
+	for (sibling = node_next_get(leftmost_child); sibling != NULL;
+	     sibling = node_next_get(sibling)) {
 		expect_ptr_eq(node_next_get(node_prev_get(sibling)), sibling,
 		    "sibling's prev doesn't link to sibling");
 		nnodes += node_validate(sibling, node);
@@ -129,7 +128,7 @@ node_validate(const node_t *node, const node_t *parent) {
 static unsigned
 heap_validate(const heap_t *heap) {
 	unsigned nnodes = 0;
-	node_t *auxelm;
+	node_t  *auxelm;
 
 	if (heap->ph.root == NULL) {
 		goto label_return;
@@ -138,7 +137,7 @@ heap_validate(const heap_t *heap) {
 	nnodes += node_validate(heap->ph.root, NULL);
 
 	for (auxelm = node_next_get(heap->ph.root); auxelm != NULL;
-	    auxelm = node_next_get(auxelm)) {
+	     auxelm = node_next_get(auxelm)) {
 		expect_ptr_eq(node_next_get(node_prev_get(auxelm)), auxelm,
 		    "auxelm's prev doesn't link to auxelm");
 		nnodes += node_validate(auxelm, NULL);
@@ -186,10 +185,10 @@ TEST_BEGIN(test_ph_random) {
 #define NNODES 25
 #define NBAGS 250
 #define SEED 42
-	sfmt_t *sfmt;
+	sfmt_t  *sfmt;
 	uint64_t bag[NNODES];
-	heap_t heap;
-	node_t nodes[NNODES];
+	heap_t   heap;
+	node_t   nodes[NNODES];
 	unsigned i, j, k;
 
 	sfmt = init_gen_rand(SEED);
@@ -216,8 +215,8 @@ TEST_BEGIN(test_ph_random) {
 		for (j = 1; j <= NNODES; j++) {
 			/* Initialize heap and nodes. */
 			heap_new(&heap);
-			expect_u_eq(heap_validate(&heap), 0,
-			    "Incorrect node count");
+			expect_u_eq(
+			    heap_validate(&heap), 0, "Incorrect node count");
 			for (k = 0; k < j; k++) {
 				nodes[k].magic = NODE_MAGIC;
 				nodes[k].key = bag[k];
@@ -237,8 +236,8 @@ TEST_BEGIN(test_ph_random) {
 				    "Incorrect node count");
 			}
 
-			expect_false(heap_empty(&heap),
-			    "Heap should not be empty");
+			expect_false(
+			    heap_empty(&heap), "Heap should not be empty");
 
 			/* Enumerate nodes. */
 			heap_enumerate_helper_t helper;
@@ -247,14 +246,14 @@ TEST_BEGIN(test_ph_random) {
 			expect_u_eq(max_queue_size, BFS_ENUMERATE_MAX,
 			    "Incorrect bfs queue length initialized");
 			assert(max_queue_size == BFS_ENUMERATE_MAX);
-			heap_enumerate_prepare(&heap, &helper,
-			    BFS_ENUMERATE_MAX, max_queue_size);
+			heap_enumerate_prepare(
+			    &heap, &helper, BFS_ENUMERATE_MAX, max_queue_size);
 			size_t node_count = 0;
-			while(heap_enumerate_next(&heap, &helper)) {
-				node_count ++;
+			while (heap_enumerate_next(&heap, &helper)) {
+				node_count++;
 			}
-			expect_lu_eq(node_count, j,
-			    "Unexpected enumeration results.");
+			expect_lu_eq(
+			    node_count, j, "Unexpected enumeration results.");
 
 			/* Remove nodes. */
 			switch (i % 6) {
@@ -263,13 +262,13 @@ TEST_BEGIN(test_ph_random) {
 					expect_u_eq(heap_validate(&heap), j - k,
 					    "Incorrect node count");
 					node_remove(&heap, &nodes[k]);
-					expect_u_eq(heap_validate(&heap), j - k
-					    - 1, "Incorrect node count");
+					expect_u_eq(heap_validate(&heap),
+					    j - k - 1, "Incorrect node count");
 				}
 				break;
 			case 1:
 				for (k = j; k > 0; k--) {
-					node_remove(&heap, &nodes[k-1]);
+					node_remove(&heap, &nodes[k - 1]);
 					expect_u_eq(heap_validate(&heap), k - 1,
 					    "Incorrect node count");
 				}
@@ -278,58 +277,62 @@ TEST_BEGIN(test_ph_random) {
 				node_t *prev = NULL;
 				for (k = 0; k < j; k++) {
 					node_t *node = node_remove_first(&heap);
-					expect_u_eq(heap_validate(&heap), j - k
-					    - 1, "Incorrect node count");
+					expect_u_eq(heap_validate(&heap),
+					    j - k - 1, "Incorrect node count");
 					if (prev != NULL) {
-						expect_d_ge(node_cmp(node,
-						    prev), 0,
+						expect_d_ge(
+						    node_cmp(node, prev), 0,
 						    "Bad removal order");
 					}
 					prev = node;
 				}
 				break;
-			} case 3: {
+			}
+			case 3: {
 				node_t *prev = NULL;
 				for (k = 0; k < j; k++) {
 					node_t *node = heap_first(&heap);
 					expect_u_eq(heap_validate(&heap), j - k,
 					    "Incorrect node count");
 					if (prev != NULL) {
-						expect_d_ge(node_cmp(node,
-						    prev), 0,
+						expect_d_ge(
+						    node_cmp(node, prev), 0,
 						    "Bad removal order");
 					}
 					node_remove(&heap, node);
-					expect_u_eq(heap_validate(&heap), j - k
-					    - 1, "Incorrect node count");
+					expect_u_eq(heap_validate(&heap),
+					    j - k - 1, "Incorrect node count");
 					prev = node;
 				}
 				break;
-			} case 4: {
+			}
+			case 4: {
 				for (k = 0; k < j; k++) {
 					node_remove_any(&heap);
-					expect_u_eq(heap_validate(&heap), j - k
-					    - 1, "Incorrect node count");
+					expect_u_eq(heap_validate(&heap),
+					    j - k - 1, "Incorrect node count");
 				}
 				break;
-			} case 5: {
+			}
+			case 5: {
 				for (k = 0; k < j; k++) {
 					node_t *node = heap_any(&heap);
 					expect_u_eq(heap_validate(&heap), j - k,
 					    "Incorrect node count");
 					node_remove(&heap, node);
-					expect_u_eq(heap_validate(&heap), j - k
-					    - 1, "Incorrect node count");
+					expect_u_eq(heap_validate(&heap),
+					    j - k - 1, "Incorrect node count");
 				}
 				break;
-			} default:
+			}
+			default:
 				not_reached();
 			}
 
-			expect_ptr_null(heap_first(&heap),
-			    "Heap should be empty");
-			expect_ptr_null(heap_any(&heap),
-			    "Heap should be empty");
+			expect_ptr_null(
+			    heap_first(&heap), "Heap should be empty");
+			expect_ptr_null(
+			    heap_any(&heap), "Heap should be empty");
 			expect_true(heap_empty(&heap), "Heap should be empty");
 		}
 	}
@@ -341,7 +344,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_ph_empty,
-	    test_ph_random);
+	return test(test_ph_empty, test_ph_random);
 }
diff --git a/test/unit/prng.c b/test/unit/prng.c
index a6d9b014..20b8470e 100644
--- a/test/unit/prng.c
+++ b/test/unit/prng.c
@@ -9,32 +9,31 @@ TEST_BEGIN(test_prng_lg_range_u32) {
 	ra = prng_lg_range_u32(&sa, 32);
 	sa = 42;
 	rb = prng_lg_range_u32(&sa, 32);
-	expect_u32_eq(ra, rb,
-	    "Repeated generation should produce repeated results");
+	expect_u32_eq(
+	    ra, rb, "Repeated generation should produce repeated results");
 
 	sb = 42;
 	rb = prng_lg_range_u32(&sb, 32);
-	expect_u32_eq(ra, rb,
-	    "Equivalent generation should produce equivalent results");
+	expect_u32_eq(
+	    ra, rb, "Equivalent generation should produce equivalent results");
 
 	sa = 42;
 	ra = prng_lg_range_u32(&sa, 32);
 	rb = prng_lg_range_u32(&sa, 32);
-	expect_u32_ne(ra, rb,
-	    "Full-width results must not immediately repeat");
+	expect_u32_ne(ra, rb, "Full-width results must not immediately repeat");
 
 	sa = 42;
 	ra = prng_lg_range_u32(&sa, 32);
 	for (lg_range = 31; lg_range > 0; lg_range--) {
 		sb = 42;
 		rb = prng_lg_range_u32(&sb, lg_range);
-		expect_u32_eq((rb & (UINT32_C(0xffffffff) << lg_range)),
-		    0, "High order bits should be 0, lg_range=%u", lg_range);
+		expect_u32_eq((rb & (UINT32_C(0xffffffff) << lg_range)), 0,
+		    "High order bits should be 0, lg_range=%u", lg_range);
 		expect_u32_eq(rb, (ra >> (32 - lg_range)),
 		    "Expected high order bits of full-width result, "
-		    "lg_range=%u", lg_range);
+		    "lg_range=%u",
+		    lg_range);
 	}
-
 }
 TEST_END
 
@@ -46,19 +45,18 @@ TEST_BEGIN(test_prng_lg_range_u64) {
 	ra = prng_lg_range_u64(&sa, 64);
 	sa = 42;
 	rb = prng_lg_range_u64(&sa, 64);
-	expect_u64_eq(ra, rb,
-	    "Repeated generation should produce repeated results");
+	expect_u64_eq(
+	    ra, rb, "Repeated generation should produce repeated results");
 
 	sb = 42;
 	rb = prng_lg_range_u64(&sb, 64);
-	expect_u64_eq(ra, rb,
-	    "Equivalent generation should produce equivalent results");
+	expect_u64_eq(
+	    ra, rb, "Equivalent generation should produce equivalent results");
 
 	sa = 42;
 	ra = prng_lg_range_u64(&sa, 64);
 	rb = prng_lg_range_u64(&sa, 64);
-	expect_u64_ne(ra, rb,
-	    "Full-width results must not immediately repeat");
+	expect_u64_ne(ra, rb, "Full-width results must not immediately repeat");
 
 	sa = 42;
 	ra = prng_lg_range_u64(&sa, 64);
@@ -69,47 +67,48 @@ TEST_BEGIN(test_prng_lg_range_u64) {
 		    0, "High order bits should be 0, lg_range=%u", lg_range);
 		expect_u64_eq(rb, (ra >> (64 - lg_range)),
 		    "Expected high order bits of full-width result, "
-		    "lg_range=%u", lg_range);
+		    "lg_range=%u",
+		    lg_range);
 	}
 }
 TEST_END
 
 TEST_BEGIN(test_prng_lg_range_zu) {
-	size_t sa, sb;
-	size_t ra, rb;
+	size_t   sa, sb;
+	size_t   ra, rb;
 	unsigned lg_range;
 
 	sa = 42;
 	ra = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR));
 	sa = 42;
 	rb = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR));
-	expect_zu_eq(ra, rb,
-	    "Repeated generation should produce repeated results");
+	expect_zu_eq(
+	    ra, rb, "Repeated generation should produce repeated results");
 
 	sb = 42;
 	rb = prng_lg_range_zu(&sb, ZU(1) << (3 + LG_SIZEOF_PTR));
-	expect_zu_eq(ra, rb,
-	    "Equivalent generation should produce equivalent results");
+	expect_zu_eq(
+	    ra, rb, "Equivalent generation should produce equivalent results");
 
 	sa = 42;
 	ra = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR));
 	rb = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR));
-	expect_zu_ne(ra, rb,
-	    "Full-width results must not immediately repeat");
+	expect_zu_ne(ra, rb, "Full-width results must not immediately repeat");
 
 	sa = 42;
 	ra = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR));
 	for (lg_range = (ZU(1) << (3 + LG_SIZEOF_PTR)) - 1; lg_range > 0;
-	    lg_range--) {
+	     lg_range--) {
 		sb = 42;
 		rb = prng_lg_range_zu(&sb, lg_range);
-		expect_zu_eq((rb & (SIZE_T_MAX << lg_range)),
-		    0, "High order bits should be 0, lg_range=%u", lg_range);
-		expect_zu_eq(rb, (ra >> ((ZU(1) << (3 + LG_SIZEOF_PTR)) -
-		    lg_range)), "Expected high order bits of full-width "
-		    "result, lg_range=%u", lg_range);
+		expect_zu_eq((rb & (SIZE_T_MAX << lg_range)), 0,
+		    "High order bits should be 0, lg_range=%u", lg_range);
+		expect_zu_eq(rb,
+		    (ra >> ((ZU(1) << (3 + LG_SIZEOF_PTR)) - lg_range)),
+		    "Expected high order bits of full-width "
+		    "result, lg_range=%u",
+		    lg_range);
 	}
-
 }
 TEST_END
 
@@ -158,13 +157,12 @@ TEST_END
 TEST_BEGIN(test_prng_range_zu) {
 	size_t range;
 
-	const size_t max_range = 10000000;
-	const size_t range_step = 97;
+	const size_t   max_range = 10000000;
+	const size_t   range_step = 97;
 	const unsigned nreps = 10;
 
-
 	for (range = 2; range < max_range; range += range_step) {
-		size_t s;
+		size_t   s;
 		unsigned rep;
 
 		s = range;
@@ -179,11 +177,7 @@ TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_prng_lg_range_u32,
-	    test_prng_lg_range_u64,
-	    test_prng_lg_range_zu,
-	    test_prng_range_u32,
-	    test_prng_range_u64,
-	    test_prng_range_zu);
+	return test_no_reentrancy(test_prng_lg_range_u32,
+	    test_prng_lg_range_u64, test_prng_lg_range_zu, test_prng_range_u32,
+	    test_prng_range_u64, test_prng_range_zu);
 }
diff --git a/test/unit/prof_accum.c b/test/unit/prof_accum.c
index ef392acd..940468b9 100644
--- a/test/unit/prof_accum.c
+++ b/test/unit/prof_accum.c
@@ -3,10 +3,10 @@
 #include "jemalloc/internal/prof_data.h"
 #include "jemalloc/internal/prof_sys.h"
 
-#define NTHREADS		4
-#define NALLOCS_PER_THREAD	50
-#define DUMP_INTERVAL		1
-#define BT_COUNT_CHECK_INTERVAL	5
+#define NTHREADS 4
+#define NALLOCS_PER_THREAD 50
+#define DUMP_INTERVAL 1
+#define BT_COUNT_CHECK_INTERVAL 5
 
 static int
 prof_dump_open_file_intercept(const char *filename, int mode) {
@@ -20,13 +20,13 @@ prof_dump_open_file_intercept(const char *filename, int mode) {
 
 static void *
 alloc_from_permuted_backtrace(unsigned thd_ind, unsigned iteration) {
-	return btalloc(1, thd_ind*NALLOCS_PER_THREAD + iteration);
+	return btalloc(1, thd_ind * NALLOCS_PER_THREAD + iteration);
 }
 
 static void *
 thd_start(void *varg) {
 	unsigned thd_ind = *(unsigned *)varg;
-	size_t bt_count_prev, bt_count;
+	size_t   bt_count_prev, bt_count;
 	unsigned i_prev, i;
 
 	i_prev = 0;
@@ -39,10 +39,10 @@ thd_start(void *varg) {
 			    0, "Unexpected error while dumping heap profile");
 		}
 
-		if (i % BT_COUNT_CHECK_INTERVAL == 0 ||
-		    i+1 == NALLOCS_PER_THREAD) {
+		if (i % BT_COUNT_CHECK_INTERVAL == 0
+		    || i + 1 == NALLOCS_PER_THREAD) {
 			bt_count = prof_bt_count();
-			expect_zu_le(bt_count_prev+(i-i_prev), bt_count,
+			expect_zu_le(bt_count_prev + (i - i_prev), bt_count,
 			    "Expected larger backtrace count increase");
 			i_prev = i;
 			bt_count_prev = bt_count;
@@ -53,17 +53,17 @@ thd_start(void *varg) {
 }
 
 TEST_BEGIN(test_idump) {
-	bool active;
-	thd_t thds[NTHREADS];
+	bool     active;
+	thd_t    thds[NTHREADS];
 	unsigned thd_args[NTHREADS];
 	unsigned i;
 
 	test_skip_if(!config_prof);
 
 	active = true;
-	expect_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active,
-	    sizeof(active)), 0,
-	    "Unexpected mallctl failure while activating profiling");
+	expect_d_eq(
+	    mallctl("prof.active", NULL, NULL, (void *)&active, sizeof(active)),
+	    0, "Unexpected mallctl failure while activating profiling");
 
 	prof_dump_open_file = prof_dump_open_file_intercept;
 
@@ -79,6 +79,5 @@ TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_idump);
+	return test_no_reentrancy(test_idump);
 }
diff --git a/test/unit/prof_active.c b/test/unit/prof_active.c
index af29e7ad..fc8b150b 100644
--- a/test/unit/prof_active.c
+++ b/test/unit/prof_active.c
@@ -4,37 +4,37 @@
 
 static void
 mallctl_bool_get(const char *name, bool expected, const char *func, int line) {
-	bool old;
+	bool   old;
 	size_t sz;
 
 	sz = sizeof(old);
 	expect_d_eq(mallctl(name, (void *)&old, &sz, NULL, 0), 0,
 	    "%s():%d: Unexpected mallctl failure reading %s", func, line, name);
-	expect_b_eq(old, expected, "%s():%d: Unexpected %s value", func, line,
-	    name);
+	expect_b_eq(
+	    old, expected, "%s():%d: Unexpected %s value", func, line, name);
 }
 
 static void
 mallctl_bool_set(const char *name, bool old_expected, bool val_new,
     const char *func, int line) {
-	bool old;
+	bool   old;
 	size_t sz;
 
 	sz = sizeof(old);
-	expect_d_eq(mallctl(name, (void *)&old, &sz, (void *)&val_new,
-	    sizeof(val_new)), 0,
-	    "%s():%d: Unexpected mallctl failure reading/writing %s", func,
+	expect_d_eq(
+	    mallctl(name, (void *)&old, &sz, (void *)&val_new, sizeof(val_new)),
+	    0, "%s():%d: Unexpected mallctl failure reading/writing %s", func,
 	    line, name);
 	expect_b_eq(old, old_expected, "%s():%d: Unexpected %s value", func,
 	    line, name);
 }
 
 static void
-mallctl_prof_active_get_impl(bool prof_active_old_expected, const char *func,
-    int line) {
+mallctl_prof_active_get_impl(
+    bool prof_active_old_expected, const char *func, int line) {
 	mallctl_bool_get("prof.active", prof_active_old_expected, func, line);
 }
-#define mallctl_prof_active_get(a)					\
+#define mallctl_prof_active_get(a)                                             \
 	mallctl_prof_active_get_impl(a, __func__, __LINE__)
 
 static void
@@ -43,16 +43,16 @@ mallctl_prof_active_set_impl(bool prof_active_old_expected,
 	mallctl_bool_set("prof.active", prof_active_old_expected,
 	    prof_active_new, func, line);
 }
-#define mallctl_prof_active_set(a, b)					\
+#define mallctl_prof_active_set(a, b)                                          \
 	mallctl_prof_active_set_impl(a, b, __func__, __LINE__)
 
 static void
-mallctl_thread_prof_active_get_impl(bool thread_prof_active_old_expected,
-    const char *func, int line) {
-	mallctl_bool_get("thread.prof.active", thread_prof_active_old_expected,
-	    func, line);
+mallctl_thread_prof_active_get_impl(
+    bool thread_prof_active_old_expected, const char *func, int line) {
+	mallctl_bool_get(
+	    "thread.prof.active", thread_prof_active_old_expected, func, line);
 }
-#define mallctl_thread_prof_active_get(a)				\
+#define mallctl_thread_prof_active_get(a)                                      \
 	mallctl_thread_prof_active_get_impl(a, __func__, __LINE__)
 
 static void
@@ -61,24 +61,23 @@ mallctl_thread_prof_active_set_impl(bool thread_prof_active_old_expected,
 	mallctl_bool_set("thread.prof.active", thread_prof_active_old_expected,
 	    thread_prof_active_new, func, line);
 }
-#define mallctl_thread_prof_active_set(a, b)				\
+#define mallctl_thread_prof_active_set(a, b)                                   \
 	mallctl_thread_prof_active_set_impl(a, b, __func__, __LINE__)
 
 static void
 prof_sampling_probe_impl(bool expect_sample, const char *func, int line) {
-	void *p;
+	void  *p;
 	size_t expected_backtraces = expect_sample ? 1 : 0;
 
-	expect_zu_eq(prof_bt_count(), 0, "%s():%d: Expected 0 backtraces", func,
-	    line);
+	expect_zu_eq(
+	    prof_bt_count(), 0, "%s():%d: Expected 0 backtraces", func, line);
 	p = mallocx(1, 0);
 	expect_ptr_not_null(p, "Unexpected mallocx() failure");
 	expect_zu_eq(prof_bt_count(), expected_backtraces,
 	    "%s():%d: Unexpected backtrace count", func, line);
 	dallocx(p, 0);
 }
-#define prof_sampling_probe(a)						\
-	prof_sampling_probe_impl(a, __func__, __LINE__)
+#define prof_sampling_probe(a) prof_sampling_probe_impl(a, __func__, __LINE__)
 
 TEST_BEGIN(test_prof_active) {
 	test_skip_if(!config_prof);
@@ -114,6 +113,5 @@ TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_prof_active);
+	return test_no_reentrancy(test_prof_active);
 }
diff --git a/test/unit/prof_gdump.c b/test/unit/prof_gdump.c
index 46e45036..4cca9bdb 100644
--- a/test/unit/prof_gdump.c
+++ b/test/unit/prof_gdump.c
@@ -18,16 +18,16 @@ prof_dump_open_file_intercept(const char *filename, int mode) {
 
 TEST_BEGIN(test_gdump) {
 	test_skip_if(opt_hpa);
-	bool active, gdump, gdump_old;
-	void *p, *q, *r, *s;
+	bool   active, gdump, gdump_old;
+	void  *p, *q, *r, *s;
 	size_t sz;
 
 	test_skip_if(!config_prof);
 
 	active = true;
-	expect_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active,
-	    sizeof(active)), 0,
-	    "Unexpected mallctl failure while activating profiling");
+	expect_d_eq(
+	    mallctl("prof.active", NULL, NULL, (void *)&active, sizeof(active)),
+	    0, "Unexpected mallctl failure while activating profiling");
 
 	prof_dump_open_file = prof_dump_open_file_intercept;
 
@@ -44,8 +44,8 @@ TEST_BEGIN(test_gdump) {
 	gdump = false;
 	sz = sizeof(gdump_old);
 	expect_d_eq(mallctl("prof.gdump", (void *)&gdump_old, &sz,
-	    (void *)&gdump, sizeof(gdump)), 0,
-	    "Unexpected mallctl failure while disabling prof.gdump");
+	                (void *)&gdump, sizeof(gdump)),
+	    0, "Unexpected mallctl failure while disabling prof.gdump");
 	assert(gdump_old);
 	did_prof_dump_open = false;
 	r = mallocx((1U << SC_LG_LARGE_MINCLASS), 0);
@@ -55,8 +55,8 @@ TEST_BEGIN(test_gdump) {
 	gdump = true;
 	sz = sizeof(gdump_old);
 	expect_d_eq(mallctl("prof.gdump", (void *)&gdump_old, &sz,
-	    (void *)&gdump, sizeof(gdump)), 0,
-	    "Unexpected mallctl failure while enabling prof.gdump");
+	                (void *)&gdump, sizeof(gdump)),
+	    0, "Unexpected mallctl failure while enabling prof.gdump");
 	assert(!gdump_old);
 	did_prof_dump_open = false;
 	s = mallocx((1U << SC_LG_LARGE_MINCLASS), 0);
@@ -72,6 +72,5 @@ TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_gdump);
+	return test_no_reentrancy(test_gdump);
 }
diff --git a/test/unit/prof_hook.c b/test/unit/prof_hook.c
index fd2871e5..1d58469c 100644
--- a/test/unit/prof_hook.c
+++ b/test/unit/prof_hook.c
@@ -14,10 +14,10 @@ bool mock_dump_hook_called = false;
 bool mock_prof_sample_hook_called = false;
 bool mock_prof_sample_free_hook_called = false;
 
-void *sampled_ptr = NULL;
+void  *sampled_ptr = NULL;
 size_t sampled_ptr_sz = 0;
 size_t sampled_ptr_usz = 0;
-void *free_sampled_ptr = NULL;
+void  *free_sampled_ptr = NULL;
 size_t free_sampled_ptr_sz = 0;
 
 void
@@ -49,7 +49,6 @@ mock_bt_augmenting_hook(void **vec, unsigned *len, unsigned max_len) {
 		(*len)++;
 	}
 
-
 	mock_bt_hook_called = true;
 }
 
@@ -61,14 +60,15 @@ mock_dump_hook(const char *filename) {
 }
 
 void
-mock_prof_sample_hook(const void *ptr, size_t sz, void **vec, unsigned len, size_t usz) {
+mock_prof_sample_hook(
+    const void *ptr, size_t sz, void **vec, unsigned len, size_t usz) {
 	mock_prof_sample_hook_called = true;
 	sampled_ptr = (void *)ptr;
 	sampled_ptr_sz = sz;
 	sampled_ptr_usz = usz;
 	for (unsigned i = 0; i < len; i++) {
-		expect_ptr_not_null((void **)vec[i],
-		    "Backtrace should not contain NULL");
+		expect_ptr_not_null(
+		    (void **)vec[i], "Backtrace should not contain NULL");
 	}
 }
 
@@ -80,7 +80,6 @@ mock_prof_sample_free_hook(const void *ptr, size_t sz) {
 }
 
 TEST_BEGIN(test_prof_backtrace_hook_replace) {
-
 	test_skip_if(!config_prof);
 
 	mock_bt_hook_called = false;
@@ -91,15 +90,16 @@ TEST_BEGIN(test_prof_backtrace_hook_replace) {
 	expect_false(mock_bt_hook_called, "Called mock hook before it's set");
 
 	prof_backtrace_hook_t null_hook = NULL;
-	expect_d_eq(mallctl("experimental.hooks.prof_backtrace",
-	    NULL, 0, (void *)&null_hook,  sizeof(null_hook)),
-		EINVAL, "Incorrectly allowed NULL backtrace hook");
+	expect_d_eq(mallctl("experimental.hooks.prof_backtrace", NULL, 0,
+	                (void *)&null_hook, sizeof(null_hook)),
+	    EINVAL, "Incorrectly allowed NULL backtrace hook");
 
 	size_t default_bt_hook_sz = sizeof(prof_backtrace_hook_t);
 	prof_backtrace_hook_t hook = &mock_bt_hook;
 	expect_d_eq(mallctl("experimental.hooks.prof_backtrace",
-	    (void *)&default_bt_hook, &default_bt_hook_sz, (void *)&hook,
-	    sizeof(hook)), 0, "Unexpected mallctl failure setting hook");
+	                (void *)&default_bt_hook, &default_bt_hook_sz,
+	                (void *)&hook, sizeof(hook)),
+	    0, "Unexpected mallctl failure setting hook");
 
 	void *p1 = mallocx(1, 0);
 	assert_ptr_not_null(p1, "Failed to allocate");
@@ -107,11 +107,11 @@ TEST_BEGIN(test_prof_backtrace_hook_replace) {
 	expect_true(mock_bt_hook_called, "Didn't call mock hook");
 
 	prof_backtrace_hook_t current_hook;
-	size_t current_hook_sz = sizeof(prof_backtrace_hook_t);
+	size_t                current_hook_sz = sizeof(prof_backtrace_hook_t);
 	expect_d_eq(mallctl("experimental.hooks.prof_backtrace",
-	    (void *)&current_hook, &current_hook_sz, (void *)&default_bt_hook,
-	    sizeof(default_bt_hook)), 0,
-	    "Unexpected mallctl failure resetting hook to default");
+	                (void *)&current_hook, &current_hook_sz,
+	                (void *)&default_bt_hook, sizeof(default_bt_hook)),
+	    0, "Unexpected mallctl failure resetting hook to default");
 
 	expect_ptr_eq(current_hook, hook,
 	    "Hook returned by mallctl is not equal to mock hook");
@@ -122,7 +122,6 @@ TEST_BEGIN(test_prof_backtrace_hook_replace) {
 TEST_END
 
 TEST_BEGIN(test_prof_backtrace_hook_augment) {
-
 	test_skip_if(!config_prof);
 
 	mock_bt_hook_called = false;
@@ -135,8 +134,9 @@ TEST_BEGIN(test_prof_backtrace_hook_augment) {
 	size_t default_bt_hook_sz = sizeof(prof_backtrace_hook_t);
 	prof_backtrace_hook_t hook = &mock_bt_augmenting_hook;
 	expect_d_eq(mallctl("experimental.hooks.prof_backtrace",
-	    (void *)&default_bt_hook, &default_bt_hook_sz, (void *)&hook,
-	    sizeof(hook)), 0, "Unexpected mallctl failure setting hook");
+	                (void *)&default_bt_hook, &default_bt_hook_sz,
+	                (void *)&hook, sizeof(hook)),
+	    0, "Unexpected mallctl failure setting hook");
 
 	void *p1 = mallocx(1, 0);
 	assert_ptr_not_null(p1, "Failed to allocate");
@@ -144,11 +144,11 @@ TEST_BEGIN(test_prof_backtrace_hook_augment) {
 	expect_true(mock_bt_hook_called, "Didn't call mock hook");
 
 	prof_backtrace_hook_t current_hook;
-	size_t current_hook_sz = sizeof(prof_backtrace_hook_t);
+	size_t                current_hook_sz = sizeof(prof_backtrace_hook_t);
 	expect_d_eq(mallctl("experimental.hooks.prof_backtrace",
-	    (void *)&current_hook, &current_hook_sz, (void *)&default_bt_hook,
-	    sizeof(default_bt_hook)), 0,
-	    "Unexpected mallctl failure resetting hook to default");
+	                (void *)&current_hook, &current_hook_sz,
+	                (void *)&default_bt_hook, sizeof(default_bt_hook)),
+	    0, "Unexpected mallctl failure resetting hook to default");
 
 	expect_ptr_eq(current_hook, hook,
 	    "Hook returned by mallctl is not equal to mock hook");
@@ -159,34 +159,36 @@ TEST_BEGIN(test_prof_backtrace_hook_augment) {
 TEST_END
 
 TEST_BEGIN(test_prof_dump_hook) {
-
 	test_skip_if(!config_prof);
 	expect_u_eq(opt_prof_bt_max, 200, "Unexpected backtrace stack depth");
 
 	mock_dump_hook_called = false;
 
 	expect_d_eq(mallctl("prof.dump", NULL, NULL, (void *)&dump_filename,
-	    sizeof(dump_filename)), 0, "Failed to dump heap profile");
+	                sizeof(dump_filename)),
+	    0, "Failed to dump heap profile");
 
 	expect_false(mock_dump_hook_called, "Called dump hook before it's set");
 
-	size_t default_bt_hook_sz = sizeof(prof_dump_hook_t);
+	size_t           default_bt_hook_sz = sizeof(prof_dump_hook_t);
 	prof_dump_hook_t hook = &mock_dump_hook;
-	expect_d_eq(mallctl("experimental.hooks.prof_dump",
-	    (void *)&default_bt_hook, &default_bt_hook_sz, (void *)&hook,
-	    sizeof(hook)), 0, "Unexpected mallctl failure setting hook");
+	expect_d_eq(
+	    mallctl("experimental.hooks.prof_dump", (void *)&default_bt_hook,
+	        &default_bt_hook_sz, (void *)&hook, sizeof(hook)),
+	    0, "Unexpected mallctl failure setting hook");
 
 	expect_d_eq(mallctl("prof.dump", NULL, NULL, (void *)&dump_filename,
-	    sizeof(dump_filename)), 0, "Failed to dump heap profile");
+	                sizeof(dump_filename)),
+	    0, "Failed to dump heap profile");
 
 	expect_true(mock_dump_hook_called, "Didn't call mock hook");
 
 	prof_dump_hook_t current_hook;
-	size_t current_hook_sz = sizeof(prof_dump_hook_t);
+	size_t           current_hook_sz = sizeof(prof_dump_hook_t);
 	expect_d_eq(mallctl("experimental.hooks.prof_dump",
-	    (void *)&current_hook, &current_hook_sz, (void *)&default_bt_hook,
-	    sizeof(default_bt_hook)), 0,
-	    "Unexpected mallctl failure resetting hook to default");
+	                (void *)&current_hook, &current_hook_sz,
+	                (void *)&default_bt_hook, sizeof(default_bt_hook)),
+	    0, "Unexpected mallctl failure resetting hook to default");
 
 	expect_ptr_eq(current_hook, hook,
 	    "Hook returned by mallctl is not equal to mock hook");
@@ -195,12 +197,12 @@ TEST_END
 
 /* Need the do_write flag because NULL is a valid to_write value. */
 static void
-read_write_prof_sample_hook(prof_sample_hook_t *to_read, bool do_write,
-    prof_sample_hook_t to_write) {
+read_write_prof_sample_hook(
+    prof_sample_hook_t *to_read, bool do_write, prof_sample_hook_t to_write) {
 	size_t hook_sz = sizeof(prof_sample_hook_t);
-	expect_d_eq(mallctl("experimental.hooks.prof_sample",
-	    (void *)to_read, &hook_sz, do_write ? &to_write : NULL, hook_sz), 0,
-	    "Unexpected prof_sample_hook mallctl failure");
+	expect_d_eq(mallctl("experimental.hooks.prof_sample", (void *)to_read,
+	                &hook_sz, do_write ? &to_write : NULL, hook_sz),
+	    0, "Unexpected prof_sample_hook mallctl failure");
 }
 
 static void
@@ -220,9 +222,10 @@ static void
 read_write_prof_sample_free_hook(prof_sample_free_hook_t *to_read,
     bool do_write, prof_sample_free_hook_t to_write) {
 	size_t hook_sz = sizeof(prof_sample_free_hook_t);
-	expect_d_eq(mallctl("experimental.hooks.prof_sample_free",
-	    (void *)to_read, &hook_sz, do_write ? &to_write : NULL, hook_sz), 0,
-	    "Unexpected prof_sample_free_hook mallctl failure");
+	expect_d_eq(
+	    mallctl("experimental.hooks.prof_sample_free", (void *)to_read,
+	        &hook_sz, do_write ? &to_write : NULL, hook_sz),
+	    0, "Unexpected prof_sample_free_hook mallctl failure");
 }
 
 static void
@@ -248,38 +251,40 @@ check_prof_sample_hooks(bool sample_hook_set, bool sample_free_hook_set) {
 	expect_zu_eq(sampled_ptr_sz, 0, "Unexpected sampled ptr size");
 	expect_zu_eq(sampled_ptr_usz, 0, "Unexpected sampled ptr usize");
 	expect_ptr_null(free_sampled_ptr, "Unexpected free sampled ptr");
-	expect_zu_eq(free_sampled_ptr_sz, 0,
-	    "Unexpected free sampled ptr size");
+	expect_zu_eq(
+	    free_sampled_ptr_sz, 0, "Unexpected free sampled ptr size");
 
 	prof_sample_hook_t curr_hook = read_prof_sample_hook();
 	expect_ptr_eq(curr_hook, sample_hook_set ? mock_prof_sample_hook : NULL,
 	    "Unexpected non NULL default hook");
 
 	prof_sample_free_hook_t curr_free_hook = read_prof_sample_free_hook();
-	expect_ptr_eq(curr_free_hook, sample_free_hook_set ?
-	    mock_prof_sample_free_hook : NULL,
+	expect_ptr_eq(curr_free_hook,
+	    sample_free_hook_set ? mock_prof_sample_free_hook : NULL,
 	    "Unexpected non NULL default hook");
 
 	size_t alloc_sz = 10;
 	size_t alloc_usz = 16;
-	void *p = mallocx(alloc_sz, 0);
+	void  *p = mallocx(alloc_sz, 0);
 	expect_ptr_not_null(p, "Failed to allocate");
 	expect_true(mock_prof_sample_hook_called == sample_hook_set,
-	   "Incorrect prof_sample hook usage");
+	    "Incorrect prof_sample hook usage");
 	if (sample_hook_set) {
 		expect_ptr_eq(p, sampled_ptr, "Unexpected sampled ptr");
-		expect_zu_eq(alloc_sz, sampled_ptr_sz,
-		    "Unexpected sampled usize");
-		expect_zu_eq(alloc_usz, sampled_ptr_usz, "Unexpected sampled usize");
+		expect_zu_eq(
+		    alloc_sz, sampled_ptr_sz, "Unexpected sampled usize");
+		expect_zu_eq(
+		    alloc_usz, sampled_ptr_usz, "Unexpected sampled usize");
 	}
 
 	dallocx(p, 0);
 	expect_true(mock_prof_sample_free_hook_called == sample_free_hook_set,
-	   "Incorrect prof_sample_free hook usage");
+	    "Incorrect prof_sample_free hook usage");
 	if (sample_free_hook_set) {
 		size_t usz = sz_s2u(alloc_sz);
 		expect_ptr_eq(p, free_sampled_ptr, "Unexpected sampled ptr");
-		expect_zu_eq(usz, free_sampled_ptr_sz, "Unexpected sampled usize");
+		expect_zu_eq(
+		    usz, free_sampled_ptr_sz, "Unexpected sampled usize");
 	}
 
 	sampled_ptr = free_sampled_ptr = NULL;
@@ -312,14 +317,14 @@ TEST_BEGIN(test_prof_sample_hooks) {
 	check_prof_sample_hooks(true, false);
 
 	prof_sample_free_hook_t sample_free_hook;
-	read_write_prof_sample_free_hook(&sample_free_hook, true,
-	    mock_prof_sample_free_hook);
+	read_write_prof_sample_free_hook(
+	    &sample_free_hook, true, mock_prof_sample_free_hook);
 	expect_ptr_null(sample_free_hook, "Unexpected non NULL default hook");
 	check_prof_sample_hooks(true, true);
 
 	read_write_prof_sample_hook(&sample_hook, true, NULL);
-	expect_ptr_eq(sample_hook, mock_prof_sample_hook,
-	    "Unexpected prof_sample hook");
+	expect_ptr_eq(
+	    sample_hook, mock_prof_sample_hook, "Unexpected prof_sample hook");
 	check_prof_sample_hooks(false, true);
 
 	read_write_prof_sample_free_hook(&sample_free_hook, true, NULL);
@@ -331,9 +336,7 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_prof_backtrace_hook_replace,
-	    test_prof_backtrace_hook_augment,
-	    test_prof_dump_hook,
+	return test(test_prof_backtrace_hook_replace,
+	    test_prof_backtrace_hook_augment, test_prof_dump_hook,
 	    test_prof_sample_hooks);
 }
diff --git a/test/unit/prof_idump.c b/test/unit/prof_idump.c
index 455ac529..b16b4a1f 100644
--- a/test/unit/prof_idump.c
+++ b/test/unit/prof_idump.c
@@ -13,8 +13,9 @@ prof_dump_open_file_intercept(const char *filename, int mode) {
 	did_prof_dump_open = true;
 
 	const char filename_prefix[] = TEST_PREFIX ".";
-	expect_d_eq(strncmp(filename_prefix, filename, sizeof(filename_prefix)
-	    - 1), 0, "Dump file name should start with \"" TEST_PREFIX ".\"");
+	expect_d_eq(
+	    strncmp(filename_prefix, filename, sizeof(filename_prefix) - 1), 0,
+	    "Dump file name should start with \"" TEST_PREFIX ".\"");
 
 	fd = open("/dev/null", O_WRONLY);
 	assert_d_ne(fd, -1, "Unexpected open() failure");
@@ -23,7 +24,7 @@ prof_dump_open_file_intercept(const char *filename, int mode) {
 }
 
 TEST_BEGIN(test_idump) {
-	bool active;
+	bool  active;
 	void *p;
 
 	const char *test_prefix = TEST_PREFIX;
@@ -33,12 +34,12 @@ TEST_BEGIN(test_idump) {
 	active = true;
 
 	expect_d_eq(mallctl("prof.prefix", NULL, NULL, (void *)&test_prefix,
-	    sizeof(test_prefix)), 0,
-	    "Unexpected mallctl failure while overwriting dump prefix");
+	                sizeof(test_prefix)),
+	    0, "Unexpected mallctl failure while overwriting dump prefix");
 
-	expect_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active,
-	    sizeof(active)), 0,
-	    "Unexpected mallctl failure while activating profiling");
+	expect_d_eq(
+	    mallctl("prof.active", NULL, NULL, (void *)&active, sizeof(active)),
+	    0, "Unexpected mallctl failure while activating profiling");
 
 	prof_dump_open_file = prof_dump_open_file_intercept;
 
@@ -52,6 +53,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_idump);
+	return test(test_idump);
 }
diff --git a/test/unit/prof_log.c b/test/unit/prof_log.c
index a32fdd0b..8cfc19ff 100644
--- a/test/unit/prof_log.c
+++ b/test/unit/prof_log.c
@@ -4,22 +4,25 @@
 #define N_PARAM 100
 #define N_THREADS 10
 
-static void expect_rep(void) {
+static void
+expect_rep(void) {
 	expect_b_eq(prof_log_rep_check(), false, "Rep check failed");
 }
 
-static void expect_log_empty(void) {
-	expect_zu_eq(prof_log_bt_count(), 0,
-	    "The log has backtraces; it isn't empty");
-	expect_zu_eq(prof_log_thr_count(), 0,
-	    "The log has threads; it isn't empty");
+static void
+expect_log_empty(void) {
+	expect_zu_eq(
+	    prof_log_bt_count(), 0, "The log has backtraces; it isn't empty");
+	expect_zu_eq(
+	    prof_log_thr_count(), 0, "The log has threads; it isn't empty");
 	expect_zu_eq(prof_log_alloc_count(), 0,
 	    "The log has allocations; it isn't empty");
 }
 
 void *buf[N_PARAM];
 
-static void f(void) {
+static void
+f(void) {
 	int i;
 	for (i = 0; i < N_PARAM; i++) {
 		buf[i] = malloc(100);
@@ -46,8 +49,8 @@ TEST_BEGIN(test_prof_log_many_logs) {
 		f();
 		expect_zu_eq(prof_log_thr_count(), 1, "Wrong thread count");
 		expect_rep();
-		expect_b_eq(prof_log_is_logging(), true,
-		    "Logging should still be on");
+		expect_b_eq(
+		    prof_log_is_logging(), true, "Logging should still be on");
 		expect_d_eq(mallctl("prof.log_stop", NULL, NULL, NULL, 0), 0,
 		    "Unexpected mallctl failure when stopping logging");
 		expect_b_eq(prof_log_is_logging(), false,
@@ -58,7 +61,8 @@ TEST_END
 
 thd_t thr_buf[N_THREADS];
 
-static void *f_thread(void *unused) {
+static void *
+f_thread(void *unused) {
 	int i;
 	for (i = 0; i < N_PARAM; i++) {
 		void *p = malloc(100);
@@ -70,7 +74,6 @@ static void *f_thread(void *unused) {
 }
 
 TEST_BEGIN(test_prof_log_many_threads) {
-
 	test_skip_if(!config_prof);
 
 	int i;
@@ -83,32 +86,34 @@ TEST_BEGIN(test_prof_log_many_threads) {
 	for (i = 0; i < N_THREADS; i++) {
 		thd_join(thr_buf[i], NULL);
 	}
-	expect_zu_eq(prof_log_thr_count(), N_THREADS,
-	    "Wrong number of thread entries");
+	expect_zu_eq(
+	    prof_log_thr_count(), N_THREADS, "Wrong number of thread entries");
 	expect_rep();
 	expect_d_eq(mallctl("prof.log_stop", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl failure when stopping logging");
 }
 TEST_END
 
-static void f3(void) {
+static void
+f3(void) {
 	void *p = malloc(100);
 	free(p);
 }
 
-static void f1(void) {
+static void
+f1(void) {
 	void *p = malloc(100);
 	f3();
 	free(p);
 }
 
-static void f2(void) {
+static void
+f2(void) {
 	void *p = malloc(100);
 	free(p);
 }
 
 TEST_BEGIN(test_prof_log_many_traces) {
-
 	test_skip_if(!config_prof);
 
 	expect_d_eq(mallctl("prof.log_start", NULL, NULL, NULL, 0), 0,
@@ -144,8 +149,6 @@ main(void) {
 	if (config_prof) {
 		prof_log_dummy_set(true);
 	}
-	return test_no_reentrancy(
-	    test_prof_log_many_logs,
-	    test_prof_log_many_traces,
-	    test_prof_log_many_threads);
+	return test_no_reentrancy(test_prof_log_many_logs,
+	    test_prof_log_many_traces, test_prof_log_many_threads);
 }
diff --git a/test/unit/prof_mdump.c b/test/unit/prof_mdump.c
index 0559339e..0200f92f 100644
--- a/test/unit/prof_mdump.c
+++ b/test/unit/prof_mdump.c
@@ -3,7 +3,7 @@
 #include "jemalloc/internal/prof_sys.h"
 
 static const char *test_filename = "test_filename";
-static bool did_prof_dump_open;
+static bool        did_prof_dump_open;
 
 static int
 prof_dump_open_file_intercept(const char *filename, int mode) {
@@ -35,8 +35,8 @@ TEST_BEGIN(test_mdump_normal) {
 	prof_dump_open_file = prof_dump_open_file_intercept;
 	did_prof_dump_open = false;
 	expect_d_eq(mallctl("prof.dump", NULL, NULL, (void *)&test_filename,
-	    sizeof(test_filename)), 0,
-	    "Unexpected mallctl failure while dumping");
+	                sizeof(test_filename)),
+	    0, "Unexpected mallctl failure while dumping");
 	expect_true(did_prof_dump_open, "Expected a profile dump");
 
 	dallocx(p, 0);
@@ -89,7 +89,8 @@ static void
 expect_write_failure(int count) {
 	prof_dump_write_file_count = count;
 	expect_d_eq(mallctl("prof.dump", NULL, NULL, (void *)&test_filename,
-	    sizeof(test_filename)), EFAULT, "Dump should err");
+	                sizeof(test_filename)),
+	    EFAULT, "Dump should err");
 	expect_d_eq(prof_dump_write_file_count, 0,
 	    "Dumping stopped after a wrong number of writes");
 }
@@ -98,7 +99,7 @@ TEST_BEGIN(test_mdump_output_error) {
 	test_skip_if(!config_prof);
 	test_skip_if(!config_debug);
 
-	prof_dump_open_file_t *open_file_orig = prof_dump_open_file;
+	prof_dump_open_file_t  *open_file_orig = prof_dump_open_file;
 	prof_dump_write_file_t *write_file_orig = prof_dump_write_file;
 
 	prof_dump_write_file = prof_dump_write_file_error;
@@ -168,9 +169,9 @@ TEST_BEGIN(test_mdump_maps_error) {
 	test_skip_if(!config_debug);
 	test_skip_if(prof_dump_open_maps == NULL);
 
-	prof_dump_open_file_t *open_file_orig = prof_dump_open_file;
+	prof_dump_open_file_t  *open_file_orig = prof_dump_open_file;
 	prof_dump_write_file_t *write_file_orig = prof_dump_write_file;
-	prof_dump_open_maps_t *open_maps_orig = prof_dump_open_maps;
+	prof_dump_open_maps_t  *open_maps_orig = prof_dump_open_maps;
 
 	prof_dump_open_file = prof_dump_open_file_intercept;
 	prof_dump_write_file = prof_dump_write_maps_file_error;
@@ -186,8 +187,8 @@ TEST_BEGIN(test_mdump_maps_error) {
 	started_piping_maps_file = false;
 	prof_dump_write_file_count = 0;
 	expect_d_eq(mallctl("prof.dump", NULL, NULL, (void *)&test_filename,
-	    sizeof(test_filename)), 0,
-	    "mallctl should not fail in case of maps file opening failure");
+	                sizeof(test_filename)),
+	    0, "mallctl should not fail in case of maps file opening failure");
 	expect_false(started_piping_maps_file, "Shouldn't start piping maps");
 	expect_d_eq(prof_dump_write_file_count, 0,
 	    "Dumping stopped after a wrong number of writes");
@@ -211,7 +212,5 @@ TEST_END
 int
 main(void) {
 	return test(
-	    test_mdump_normal,
-	    test_mdump_output_error,
-	    test_mdump_maps_error);
+	    test_mdump_normal, test_mdump_output_error, test_mdump_maps_error);
 }
diff --git a/test/unit/prof_recent.c b/test/unit/prof_recent.c
index 24ee6f42..b8fd0ca8 100644
--- a/test/unit/prof_recent.c
+++ b/test/unit/prof_recent.c
@@ -32,18 +32,20 @@ TEST_BEGIN(test_prof_recent_off) {
 	test_skip_if(config_prof);
 
 	const ssize_t past_ref = 0, future_ref = 0;
-	const size_t len_ref = sizeof(ssize_t);
+	const size_t  len_ref = sizeof(ssize_t);
 
 	ssize_t past = past_ref, future = future_ref;
-	size_t len = len_ref;
+	size_t  len = len_ref;
 
-#define ASSERT_SHOULD_FAIL(opt, a, b, c, d) do {			\
-	assert_d_eq(mallctl("experimental.prof_recent." opt, a, b, c,	\
-	    d), ENOENT, "Should return ENOENT when config_prof is off");\
-	assert_zd_eq(past, past_ref, "output was touched");		\
-	assert_zu_eq(len, len_ref, "output length was touched");	\
-	assert_zd_eq(future, future_ref, "input was touched");		\
-} while (0)
+#define ASSERT_SHOULD_FAIL(opt, a, b, c, d)                                    \
+	do {                                                                   \
+		assert_d_eq(                                                   \
+		    mallctl("experimental.prof_recent." opt, a, b, c, d),      \
+		    ENOENT, "Should return ENOENT when config_prof is off");   \
+		assert_zd_eq(past, past_ref, "output was touched");            \
+		assert_zu_eq(len, len_ref, "output length was touched");       \
+		assert_zd_eq(future, future_ref, "input was touched");         \
+	} while (0)
 
 	ASSERT_SHOULD_FAIL("alloc_max", NULL, NULL, NULL, 0);
 	ASSERT_SHOULD_FAIL("alloc_max", &past, &len, NULL, 0);
@@ -58,40 +60,45 @@ TEST_BEGIN(test_prof_recent_on) {
 	test_skip_if(!config_prof);
 
 	ssize_t past, future;
-	size_t len = sizeof(ssize_t);
+	size_t  len = sizeof(ssize_t);
 
 	confirm_prof_setup();
 
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    NULL, NULL, NULL, 0), 0, "no-op mallctl should be allowed");
+	assert_d_eq(
+	    mallctl("experimental.prof_recent.alloc_max", NULL, NULL, NULL, 0),
+	    0, "no-op mallctl should be allowed");
 	confirm_prof_setup();
 
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    &past, &len, NULL, 0), 0, "Read error");
+	assert_d_eq(
+	    mallctl("experimental.prof_recent.alloc_max", &past, &len, NULL, 0),
+	    0, "Read error");
 	expect_zd_eq(past, OPT_ALLOC_MAX, "Wrong read result");
 	future = OPT_ALLOC_MAX + 1;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    NULL, NULL, &future, len), 0, "Write error");
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max", NULL, NULL,
+	                &future, len),
+	    0, "Write error");
 	future = -1;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    &past, &len, &future, len), 0, "Read/write error");
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max", &past, &len,
+	                &future, len),
+	    0, "Read/write error");
 	expect_zd_eq(past, OPT_ALLOC_MAX + 1, "Wrong read result");
 	future = -2;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    &past, &len, &future, len), EINVAL,
-	    "Invalid write should return EINVAL");
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max", &past, &len,
+	                &future, len),
+	    EINVAL, "Invalid write should return EINVAL");
 	expect_zd_eq(past, OPT_ALLOC_MAX + 1,
 	    "Output should not be touched given invalid write");
 	future = OPT_ALLOC_MAX;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    &past, &len, &future, len), 0, "Read/write error");
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max", &past, &len,
+	                &future, len),
+	    0, "Read/write error");
 	expect_zd_eq(past, -1, "Wrong read result");
 	future = OPT_ALLOC_MAX + 2;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    &past, &len, &future, len * 2), EINVAL,
-	    "Invalid write should return EINVAL");
-	expect_zd_eq(past, -1,
-	    "Output should not be touched given invalid write");
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max", &past, &len,
+	                &future, len * 2),
+	    EINVAL, "Invalid write should return EINVAL");
+	expect_zd_eq(
+	    past, -1, "Output should not be touched given invalid write");
 
 	confirm_prof_setup();
 }
@@ -107,8 +114,8 @@ confirm_malloc(void *p) {
 	assert_ptr_not_null(e, "NULL edata for living pointer");
 	prof_recent_t *n = edata_prof_recent_alloc_get_no_lock_test(e);
 	assert_ptr_not_null(n, "Record in edata should not be NULL");
-	expect_ptr_not_null(n->alloc_tctx,
-	    "alloc_tctx in record should not be NULL");
+	expect_ptr_not_null(
+	    n->alloc_tctx, "alloc_tctx in record should not be NULL");
 	expect_ptr_eq(e, prof_recent_alloc_edata_get_no_lock_test(n),
 	    "edata pointer in record is not correct");
 	expect_ptr_null(n->dalloc_tctx, "dalloc_tctx in record should be NULL");
@@ -116,17 +123,17 @@ confirm_malloc(void *p) {
 
 static void
 confirm_record_size(prof_recent_t *n, unsigned kth) {
-	expect_zu_eq(n->size, NTH_REQ_SIZE(kth),
-	    "Recorded allocation size is wrong");
+	expect_zu_eq(
+	    n->size, NTH_REQ_SIZE(kth), "Recorded allocation size is wrong");
 }
 
 static void
 confirm_record_living(prof_recent_t *n) {
-	expect_ptr_not_null(n->alloc_tctx,
-	    "alloc_tctx in record should not be NULL");
+	expect_ptr_not_null(
+	    n->alloc_tctx, "alloc_tctx in record should not be NULL");
 	edata_t *edata = prof_recent_alloc_edata_get_no_lock_test(n);
-	assert_ptr_not_null(edata,
-	    "Recorded edata should not be NULL for living pointer");
+	assert_ptr_not_null(
+	    edata, "Recorded edata should not be NULL for living pointer");
 	expect_ptr_eq(n, edata_prof_recent_alloc_get_no_lock_test(edata),
 	    "Record in edata is not correct");
 	expect_ptr_null(n->dalloc_tctx, "dalloc_tctx in record should be NULL");
@@ -134,8 +141,8 @@ confirm_record_living(prof_recent_t *n) {
 
 static void
 confirm_record_released(prof_recent_t *n) {
-	expect_ptr_not_null(n->alloc_tctx,
-	    "alloc_tctx in record should not be NULL");
+	expect_ptr_not_null(
+	    n->alloc_tctx, "alloc_tctx in record should not be NULL");
 	expect_ptr_null(prof_recent_alloc_edata_get_no_lock_test(n),
 	    "Recorded edata should be NULL for released pointer");
 	expect_ptr_not_null(n->dalloc_tctx,
@@ -145,12 +152,12 @@ confirm_record_released(prof_recent_t *n) {
 TEST_BEGIN(test_prof_recent_alloc) {
 	test_skip_if(!config_prof);
 
-	bool b;
-	unsigned i, c;
-	size_t req_size;
-	void *p;
+	bool           b;
+	unsigned       i, c;
+	size_t         req_size;
+	void          *p;
 	prof_recent_t *n;
-	ssize_t future;
+	ssize_t        future;
 
 	confirm_prof_setup();
 
@@ -175,7 +182,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 			continue;
 		}
 		c = 0;
-		ql_foreach(n, &prof_recent_alloc_list, link) {
+		ql_foreach (n, &prof_recent_alloc_list, link) {
 			++c;
 			confirm_record_size(n, i + c - OPT_ALLOC_MAX);
 			if (c == OPT_ALLOC_MAX) {
@@ -184,8 +191,8 @@ TEST_BEGIN(test_prof_recent_alloc) {
 				confirm_record_released(n);
 			}
 		}
-		assert_u_eq(c, OPT_ALLOC_MAX,
-		    "Incorrect total number of allocations");
+		assert_u_eq(
+		    c, OPT_ALLOC_MAX, "Incorrect total number of allocations");
 		free(p);
 	}
 
@@ -204,13 +211,13 @@ TEST_BEGIN(test_prof_recent_alloc) {
 		p = malloc(req_size);
 		assert_ptr_not_null(p, "malloc failed unexpectedly");
 		c = 0;
-		ql_foreach(n, &prof_recent_alloc_list, link) {
+		ql_foreach (n, &prof_recent_alloc_list, link) {
 			confirm_record_size(n, c + OPT_ALLOC_MAX);
 			confirm_record_released(n);
 			++c;
 		}
-		assert_u_eq(c, OPT_ALLOC_MAX,
-		    "Incorrect total number of allocations");
+		assert_u_eq(
+		    c, OPT_ALLOC_MAX, "Incorrect total number of allocations");
 		free(p);
 	}
 
@@ -231,91 +238,96 @@ TEST_BEGIN(test_prof_recent_alloc) {
 		p = malloc(req_size);
 		confirm_malloc(p);
 		c = 0;
-		ql_foreach(n, &prof_recent_alloc_list, link) {
+		ql_foreach (n, &prof_recent_alloc_list, link) {
 			++c;
 			confirm_record_size(n,
 			    /* Is the allocation from the third batch? */
-			    i + c - OPT_ALLOC_MAX >= 3 * OPT_ALLOC_MAX ?
-			    /* If yes, then it's just recorded. */
-			    i + c - OPT_ALLOC_MAX :
-			    /*
+			    i + c - OPT_ALLOC_MAX >= 3 * OPT_ALLOC_MAX
+			        ?
+			        /* If yes, then it's just recorded. */
+			        i + c - OPT_ALLOC_MAX
+			        :
+			        /*
 			     * Otherwise, it should come from the first batch
 			     * instead of the second batch.
 			     */
-			    i + c - 2 * OPT_ALLOC_MAX);
+			        i + c - 2 * OPT_ALLOC_MAX);
 			if (c == OPT_ALLOC_MAX) {
 				confirm_record_living(n);
 			} else {
 				confirm_record_released(n);
 			}
 		}
-		assert_u_eq(c, OPT_ALLOC_MAX,
-		    "Incorrect total number of allocations");
+		assert_u_eq(
+		    c, OPT_ALLOC_MAX, "Incorrect total number of allocations");
 		free(p);
 	}
 
 	/* Increasing the limit shouldn't alter the list of records. */
 	future = OPT_ALLOC_MAX + 1;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max", NULL, NULL,
+	                &future, sizeof(ssize_t)),
+	    0, "Write error");
 	c = 0;
-	ql_foreach(n, &prof_recent_alloc_list, link) {
+	ql_foreach (n, &prof_recent_alloc_list, link) {
 		confirm_record_size(n, c + 3 * OPT_ALLOC_MAX);
 		confirm_record_released(n);
 		++c;
 	}
-	assert_u_eq(c, OPT_ALLOC_MAX,
-	    "Incorrect total number of allocations");
+	assert_u_eq(c, OPT_ALLOC_MAX, "Incorrect total number of allocations");
 
 	/*
 	 * Decreasing the limit shouldn't alter the list of records as long as
 	 * the new limit is still no less than the length of the list.
 	 */
 	future = OPT_ALLOC_MAX;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max", NULL, NULL,
+	                &future, sizeof(ssize_t)),
+	    0, "Write error");
 	c = 0;
-	ql_foreach(n, &prof_recent_alloc_list, link) {
+	ql_foreach (n, &prof_recent_alloc_list, link) {
 		confirm_record_size(n, c + 3 * OPT_ALLOC_MAX);
 		confirm_record_released(n);
 		++c;
 	}
-	assert_u_eq(c, OPT_ALLOC_MAX,
-	    "Incorrect total number of allocations");
+	assert_u_eq(c, OPT_ALLOC_MAX, "Incorrect total number of allocations");
 
 	/*
 	 * Decreasing the limit should shorten the list of records if the new
 	 * limit is less than the length of the list.
 	 */
 	future = OPT_ALLOC_MAX - 1;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max", NULL, NULL,
+	                &future, sizeof(ssize_t)),
+	    0, "Write error");
 	c = 0;
-	ql_foreach(n, &prof_recent_alloc_list, link) {
+	ql_foreach (n, &prof_recent_alloc_list, link) {
 		++c;
 		confirm_record_size(n, c + 3 * OPT_ALLOC_MAX);
 		confirm_record_released(n);
 	}
-	assert_u_eq(c, OPT_ALLOC_MAX - 1,
-	    "Incorrect total number of allocations");
+	assert_u_eq(
+	    c, OPT_ALLOC_MAX - 1, "Incorrect total number of allocations");
 
 	/* Setting to unlimited shouldn't alter the list of records. */
 	future = -1;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max", NULL, NULL,
+	                &future, sizeof(ssize_t)),
+	    0, "Write error");
 	c = 0;
-	ql_foreach(n, &prof_recent_alloc_list, link) {
+	ql_foreach (n, &prof_recent_alloc_list, link) {
 		++c;
 		confirm_record_size(n, c + 3 * OPT_ALLOC_MAX);
 		confirm_record_released(n);
 	}
-	assert_u_eq(c, OPT_ALLOC_MAX - 1,
-	    "Incorrect total number of allocations");
+	assert_u_eq(
+	    c, OPT_ALLOC_MAX - 1, "Incorrect total number of allocations");
 
 	/* Downshift to only one record. */
 	future = 1;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max", NULL, NULL,
+	                &future, sizeof(ssize_t)),
+	    0, "Write error");
 	assert_false(ql_empty(&prof_recent_alloc_list), "Recent list is empty");
 	n = ql_first(&prof_recent_alloc_list);
 	confirm_record_size(n, 4 * OPT_ALLOC_MAX - 1);
@@ -325,17 +337,19 @@ TEST_BEGIN(test_prof_recent_alloc) {
 
 	/* Completely turn off. */
 	future = 0;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
-	assert_true(ql_empty(&prof_recent_alloc_list),
-	    "Recent list should be empty");
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max", NULL, NULL,
+	                &future, sizeof(ssize_t)),
+	    0, "Write error");
+	assert_true(
+	    ql_empty(&prof_recent_alloc_list), "Recent list should be empty");
 
 	/* Restore the settings. */
 	future = OPT_ALLOC_MAX;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
-	assert_true(ql_empty(&prof_recent_alloc_list),
-	    "Recent list should be empty");
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max", NULL, NULL,
+	                &future, sizeof(ssize_t)),
+	    0, "Write error");
+	assert_true(
+	    ql_empty(&prof_recent_alloc_list), "Recent list should be empty");
 
 	confirm_prof_setup();
 }
@@ -344,7 +358,7 @@ TEST_END
 #undef NTH_REQ_SIZE
 
 #define DUMP_OUT_SIZE 4096
-static char dump_out[DUMP_OUT_SIZE];
+static char   dump_out[DUMP_OUT_SIZE];
 static size_t dump_out_len = 0;
 
 static void
@@ -359,14 +373,15 @@ static void
 call_dump(void) {
 	static void *in[2] = {test_dump_write_cb, NULL};
 	dump_out_len = 0;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_dump",
-	    NULL, NULL, in, sizeof(in)), 0, "Dump mallctl raised error");
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_dump", NULL, NULL,
+	                in, sizeof(in)),
+	    0, "Dump mallctl raised error");
 }
 
 typedef struct {
 	size_t size;
 	size_t usize;
-	bool released;
+	bool   released;
 } confirm_record_t;
 
 #define DUMP_ERROR "Dump output is wrong"
@@ -375,7 +390,7 @@ static void
 confirm_record(const char *template, const confirm_record_t *records,
     const size_t n_records) {
 	static const char *types[2] = {"alloc", "dalloc"};
-	static char buf[64];
+	static char        buf[64];
 
 	/*
 	 * The template string would be in the form of:
@@ -384,32 +399,35 @@ confirm_record(const char *template, const confirm_record_t *records,
 	 * "{...,\"recent_alloc\":[...]}".
 	 * Using "- 2" serves to cut right before the ending "]}".
 	 */
-	assert_d_eq(memcmp(dump_out, template, strlen(template) - 2), 0,
-	    DUMP_ERROR);
+	assert_d_eq(
+	    memcmp(dump_out, template, strlen(template) - 2), 0, DUMP_ERROR);
 	assert_d_eq(memcmp(dump_out + strlen(dump_out) - 2,
-	    template + strlen(template) - 2, 2), 0, DUMP_ERROR);
+	                template + strlen(template) - 2, 2),
+	    0, DUMP_ERROR);
 
-	const char *start = dump_out + strlen(template) - 2;
-	const char *end = dump_out + strlen(dump_out) - 2;
+	const char             *start = dump_out + strlen(template) - 2;
+	const char             *end = dump_out + strlen(dump_out) - 2;
 	const confirm_record_t *record;
 	for (record = records; record < records + n_records; ++record) {
+#define ASSERT_CHAR(c)                                                         \
+	do {                                                                   \
+		assert_true(start < end, DUMP_ERROR);                          \
+		assert_c_eq(*start++, c, DUMP_ERROR);                          \
+	} while (0)
 
-#define ASSERT_CHAR(c) do {						\
-	assert_true(start < end, DUMP_ERROR);				\
-	assert_c_eq(*start++, c, DUMP_ERROR);				\
-} while (0)
+#define ASSERT_STR(s)                                                          \
+	do {                                                                   \
+		const size_t len = strlen(s);                                  \
+		assert_true(start + len <= end, DUMP_ERROR);                   \
+		assert_d_eq(memcmp(start, s, len), 0, DUMP_ERROR);             \
+		start += len;                                                  \
+	} while (0)
 
-#define ASSERT_STR(s) do {						\
-	const size_t len = strlen(s);					\
-	assert_true(start + len <= end, DUMP_ERROR);			\
-	assert_d_eq(memcmp(start, s, len), 0, DUMP_ERROR);		\
-	start += len;							\
-} while (0)
-
-#define ASSERT_FORMATTED_STR(s, ...) do {				\
-	malloc_snprintf(buf, sizeof(buf), s, __VA_ARGS__);		\
-	ASSERT_STR(buf);						\
-} while (0)
+#define ASSERT_FORMATTED_STR(s, ...)                                           \
+	do {                                                                   \
+		malloc_snprintf(buf, sizeof(buf), s, __VA_ARGS__);             \
+		ASSERT_STR(buf);                                               \
+	} while (0)
 
 		if (record != records) {
 			ASSERT_CHAR(',');
@@ -442,10 +460,10 @@ confirm_record(const char *template, const confirm_record_t *records,
 			ASSERT_CHAR(',');
 
 			if (thd_has_setname() && opt_prof_sys_thread_name) {
-				ASSERT_FORMATTED_STR("\"%s_thread_name\"",
-				    *type);
-				ASSERT_FORMATTED_STR(":\"%s\",",
-				    test_thread_name);
+				ASSERT_FORMATTED_STR(
+				    "\"%s_thread_name\"", *type);
+				ASSERT_FORMATTED_STR(
+				    ":\"%s\",", test_thread_name);
 			}
 
 			ASSERT_FORMATTED_STR("\"%s_time\"", *type);
@@ -458,9 +476,9 @@ confirm_record(const char *template, const confirm_record_t *records,
 			ASSERT_FORMATTED_STR("\"%s_trace\"", *type);
 			ASSERT_CHAR(':');
 			ASSERT_CHAR('[');
-			while (isdigit(*start) || *start == 'x' ||
-			    (*start >= 'a' && *start <= 'f') ||
-			    *start == '\"' || *start == ',') {
+			while (isdigit(*start) || *start == 'x'
+			    || (*start >= 'a' && *start <= 'f')
+			    || *start == '\"' || *start == ',') {
 				++start;
 			}
 			ASSERT_CHAR(']');
@@ -483,7 +501,6 @@ confirm_record(const char *template, const confirm_record_t *records,
 #undef ASSERT_FORMATTED_STR
 #undef ASSERT_STR
 #undef ASSERT_CHAR
-
 	}
 	assert_ptr_eq(record, records + n_records, DUMP_ERROR);
 	assert_ptr_eq(start, end, DUMP_ERROR);
@@ -495,25 +512,30 @@ TEST_BEGIN(test_prof_recent_alloc_dump) {
 	thd_setname(test_thread_name);
 	confirm_prof_setup();
 
-	ssize_t future;
-	void *p, *q;
+	ssize_t          future;
+	void            *p, *q;
 	confirm_record_t records[2];
 
-	assert_zu_eq(lg_prof_sample, (size_t)0,
-	    "lg_prof_sample not set correctly");
+	assert_zu_eq(
+	    lg_prof_sample, (size_t)0, "lg_prof_sample not set correctly");
 
 	future = 0;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max", NULL, NULL,
+	                &future, sizeof(ssize_t)),
+	    0, "Write error");
 	call_dump();
-	expect_str_eq(dump_out, "{\"sample_interval\":1,"
-	    "\"recent_alloc_max\":0,\"recent_alloc\":[]}", DUMP_ERROR);
+	expect_str_eq(dump_out,
+	    "{\"sample_interval\":1,"
+	    "\"recent_alloc_max\":0,\"recent_alloc\":[]}",
+	    DUMP_ERROR);
 
 	future = 2;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max", NULL, NULL,
+	                &future, sizeof(ssize_t)),
+	    0, "Write error");
 	call_dump();
-	const char *template = "{\"sample_interval\":1,"
+	const char *template =
+	    "{\"sample_interval\":1,"
 	    "\"recent_alloc_max\":2,\"recent_alloc\":[]}";
 	expect_str_eq(dump_out, template, DUMP_ERROR);
 
@@ -542,8 +564,9 @@ TEST_BEGIN(test_prof_recent_alloc_dump) {
 	confirm_record(template, records, 2);
 
 	future = OPT_ALLOC_MAX;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max", NULL, NULL,
+	                &future, sizeof(ssize_t)),
+	    0, "Write error");
 	confirm_prof_setup();
 }
 TEST_END
@@ -558,14 +581,14 @@ TEST_END
 #define STRESS_ALLOC_MAX 4096
 
 typedef struct {
-	thd_t thd;
+	thd_t  thd;
 	size_t id;
-	void *ptrs[N_PTRS];
+	void  *ptrs[N_PTRS];
 	size_t count;
 } thd_data_t;
 
 static thd_data_t thd_data[N_THREADS];
-static ssize_t test_max;
+static ssize_t    test_max;
 
 static void
 test_write_cb(void *cbopaque, const char *str) {
@@ -575,11 +598,11 @@ test_write_cb(void *cbopaque, const char *str) {
 static void *
 f_thread(void *arg) {
 	const size_t thd_id = *(size_t *)arg;
-	thd_data_t *data_p = thd_data + thd_id;
+	thd_data_t  *data_p = thd_data + thd_id;
 	assert(data_p->id == thd_id);
 	data_p->count = 0;
 	uint64_t rand = (uint64_t)thd_id;
-	tsd_t *tsd = tsd_fetch();
+	tsd_t   *tsd = tsd_fetch();
 	assert(test_max > 1);
 	ssize_t last_max = -1;
 	for (int i = 0; i < N_ITERS; i++) {
@@ -603,15 +626,15 @@ f_thread(void *arg) {
 		} else if (rand % 5 == 1) {
 			last_max = prof_recent_alloc_max_ctl_read();
 		} else if (rand % 5 == 2) {
-			last_max =
-			    prof_recent_alloc_max_ctl_write(tsd, test_max * 2);
+			last_max = prof_recent_alloc_max_ctl_write(
+			    tsd, test_max * 2);
 		} else if (rand % 5 == 3) {
-			last_max =
-			    prof_recent_alloc_max_ctl_write(tsd, test_max);
+			last_max = prof_recent_alloc_max_ctl_write(
+			    tsd, test_max);
 		} else {
 			assert(rand % 5 == 4);
-			last_max =
-			    prof_recent_alloc_max_ctl_write(tsd, test_max / 2);
+			last_max = prof_recent_alloc_max_ctl_write(
+			    tsd, test_max / 2);
 		}
 		assert_zd_ge(last_max, -1, "Illegal last-N max");
 	}
@@ -640,8 +663,9 @@ TEST_BEGIN(test_prof_recent_stress) {
 	}
 
 	test_max = STRESS_ALLOC_MAX;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    NULL, NULL, &test_max, sizeof(ssize_t)), 0, "Write error");
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max", NULL, NULL,
+	                &test_max, sizeof(ssize_t)),
+	    0, "Write error");
 	for (size_t i = 0; i < N_THREADS; i++) {
 		thd_data_t *data_p = thd_data + i;
 		data_p->id = i;
@@ -653,8 +677,9 @@ TEST_BEGIN(test_prof_recent_stress) {
 	}
 
 	test_max = OPT_ALLOC_MAX;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    NULL, NULL, &test_max, sizeof(ssize_t)), 0, "Write error");
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max", NULL, NULL,
+	                &test_max, sizeof(ssize_t)),
+	    0, "Write error");
 	confirm_prof_setup();
 }
 TEST_END
@@ -666,11 +691,7 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_confirm_setup,
-	    test_prof_recent_off,
-	    test_prof_recent_on,
-	    test_prof_recent_alloc,
-	    test_prof_recent_alloc_dump,
-	    test_prof_recent_stress);
+	return test(test_confirm_setup, test_prof_recent_off,
+	    test_prof_recent_on, test_prof_recent_alloc,
+	    test_prof_recent_alloc_dump, test_prof_recent_stress);
 }
diff --git a/test/unit/prof_reset.c b/test/unit/prof_reset.c
index 9b33b205..0e64279e 100644
--- a/test/unit/prof_reset.c
+++ b/test/unit/prof_reset.c
@@ -15,8 +15,9 @@ prof_dump_open_file_intercept(const char *filename, int mode) {
 
 static void
 set_prof_active(bool active) {
-	expect_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active,
-	    sizeof(active)), 0, "Unexpected mallctl failure");
+	expect_d_eq(
+	    mallctl("prof.active", NULL, NULL, (void *)&active, sizeof(active)),
+	    0, "Unexpected mallctl failure");
 }
 
 static size_t
@@ -32,25 +33,26 @@ get_lg_prof_sample(void) {
 static void
 do_prof_reset(size_t lg_prof_sample_input) {
 	expect_d_eq(mallctl("prof.reset", NULL, NULL,
-	    (void *)&lg_prof_sample_input, sizeof(size_t)), 0,
-	    "Unexpected mallctl failure while resetting profile data");
+	                (void *)&lg_prof_sample_input, sizeof(size_t)),
+	    0, "Unexpected mallctl failure while resetting profile data");
 	expect_zu_eq(lg_prof_sample_input, get_lg_prof_sample(),
 	    "Expected profile sample rate change");
 }
 
 TEST_BEGIN(test_prof_reset_basic) {
-	size_t lg_prof_sample_orig, lg_prof_sample_cur, lg_prof_sample_next;
-	size_t sz;
+	size_t   lg_prof_sample_orig, lg_prof_sample_cur, lg_prof_sample_next;
+	size_t   sz;
 	unsigned i;
 
 	test_skip_if(!config_prof);
 
 	sz = sizeof(size_t);
 	expect_d_eq(mallctl("opt.lg_prof_sample", (void *)&lg_prof_sample_orig,
-	    &sz, NULL, 0), 0,
+	                &sz, NULL, 0),
+	    0,
 	    "Unexpected mallctl failure while reading profiling sample rate");
-	expect_zu_eq(lg_prof_sample_orig, 0,
-	    "Unexpected profiling sample rate");
+	expect_zu_eq(
+	    lg_prof_sample_orig, 0, "Unexpected profiling sample rate");
 	lg_prof_sample_cur = get_lg_prof_sample();
 	expect_zu_eq(lg_prof_sample_orig, lg_prof_sample_cur,
 	    "Unexpected disagreement between \"opt.lg_prof_sample\" and "
@@ -110,23 +112,24 @@ TEST_BEGIN(test_prof_reset_cleanup) {
 }
 TEST_END
 
-#define NTHREADS		4
-#define NALLOCS_PER_THREAD	(1U << 13)
-#define OBJ_RING_BUF_COUNT	1531
-#define RESET_INTERVAL		(1U << 10)
-#define DUMP_INTERVAL		3677
+#define NTHREADS 4
+#define NALLOCS_PER_THREAD (1U << 13)
+#define OBJ_RING_BUF_COUNT 1531
+#define RESET_INTERVAL (1U << 10)
+#define DUMP_INTERVAL 3677
 static void *
 thd_start(void *varg) {
 	unsigned thd_ind = *(unsigned *)varg;
 	unsigned i;
-	void *objs[OBJ_RING_BUF_COUNT];
+	void    *objs[OBJ_RING_BUF_COUNT];
 
 	memset(objs, 0, sizeof(objs));
 
 	for (i = 0; i < NALLOCS_PER_THREAD; i++) {
 		if (i % RESET_INTERVAL == 0) {
 			expect_d_eq(mallctl("prof.reset", NULL, NULL, NULL, 0),
-			    0, "Unexpected error while resetting heap profile "
+			    0,
+			    "Unexpected error while resetting heap profile "
 			    "data");
 		}
 
@@ -141,9 +144,9 @@ thd_start(void *varg) {
 				dallocx(*pp, 0);
 				*pp = NULL;
 			}
-			*pp = btalloc(1, thd_ind*NALLOCS_PER_THREAD + i);
-			expect_ptr_not_null(*pp,
-			    "Unexpected btalloc() failure");
+			*pp = btalloc(1, thd_ind * NALLOCS_PER_THREAD + i);
+			expect_ptr_not_null(
+			    *pp, "Unexpected btalloc() failure");
 		}
 	}
 
@@ -160,17 +163,16 @@ thd_start(void *varg) {
 }
 
 TEST_BEGIN(test_prof_reset) {
-	size_t lg_prof_sample_orig;
-	thd_t thds[NTHREADS];
+	size_t   lg_prof_sample_orig;
+	thd_t    thds[NTHREADS];
 	unsigned thd_args[NTHREADS];
 	unsigned i;
-	size_t bt_count, tdata_count;
+	size_t   bt_count, tdata_count;
 
 	test_skip_if(!config_prof);
 
 	bt_count = prof_bt_count();
-	expect_zu_eq(bt_count, 0,
-	    "Unexpected pre-existing tdata structures");
+	expect_zu_eq(bt_count, 0, "Unexpected pre-existing tdata structures");
 	tdata_count = prof_tdata_count();
 
 	lg_prof_sample_orig = get_lg_prof_sample();
@@ -186,8 +188,8 @@ TEST_BEGIN(test_prof_reset) {
 		thd_join(thds[i], NULL);
 	}
 
-	expect_zu_eq(prof_bt_count(), bt_count,
-	    "Unexpected bactrace count change");
+	expect_zu_eq(
+	    prof_bt_count(), bt_count, "Unexpected bactrace count change");
 	expect_zu_eq(prof_tdata_count(), tdata_count,
 	    "Unexpected remaining tdata structures");
 
@@ -205,9 +207,9 @@ TEST_END
 /* Test sampling at the same allocation site across resets. */
 #define NITER 10
 TEST_BEGIN(test_xallocx) {
-	size_t lg_prof_sample_orig;
+	size_t   lg_prof_sample_orig;
 	unsigned i;
-	void *ptrs[NITER];
+	void    *ptrs[NITER];
 
 	test_skip_if(!config_prof);
 
@@ -218,7 +220,7 @@ TEST_BEGIN(test_xallocx) {
 	do_prof_reset(0);
 
 	for (i = 0; i < NITER; i++) {
-		void *p;
+		void  *p;
 		size_t sz, nsz;
 
 		/* Reset profiling. */
@@ -233,13 +235,13 @@ TEST_BEGIN(test_xallocx) {
 
 		/* Perform successful xallocx(). */
 		sz = sallocx(p, 0);
-		expect_zu_eq(xallocx(p, sz, 0, 0), sz,
-		    "Unexpected xallocx() failure");
+		expect_zu_eq(
+		    xallocx(p, sz, 0, 0), sz, "Unexpected xallocx() failure");
 
 		/* Perform unsuccessful xallocx(). */
-		nsz = nallocx(sz+1, 0);
-		expect_zu_eq(xallocx(p, nsz, 0, 0), sz,
-		    "Unexpected xallocx() success");
+		nsz = nallocx(sz + 1, 0);
+		expect_zu_eq(
+		    xallocx(p, nsz, 0, 0), sz, "Unexpected xallocx() success");
 	}
 
 	for (i = 0; i < NITER; i++) {
@@ -258,9 +260,6 @@ main(void) {
 	/* Intercept dumping prior to running any tests. */
 	prof_dump_open_file = prof_dump_open_file_intercept;
 
-	return test_no_reentrancy(
-	    test_prof_reset_basic,
-	    test_prof_reset_cleanup,
-	    test_prof_reset,
-	    test_xallocx);
+	return test_no_reentrancy(test_prof_reset_basic,
+	    test_prof_reset_cleanup, test_prof_reset, test_xallocx);
 }
diff --git a/test/unit/prof_small.c b/test/unit/prof_small.c
index e3462c1f..993a83a7 100644
--- a/test/unit/prof_small.c
+++ b/test/unit/prof_small.c
@@ -1,6 +1,7 @@
 #include "test/jemalloc_test.h"
 
-static void assert_small_allocation_sampled(void *ptr, size_t size) {
+static void
+assert_small_allocation_sampled(void *ptr, size_t size) {
 	assert_ptr_not_null(ptr, "Unexpected malloc failure");
 	assert_zu_le(size, SC_SMALL_MAXCLASS, "Unexpected large size class");
 	edata_t *edata = emap_edata_lookup(TSDN_NULL, &arena_emap_global, ptr);
@@ -24,7 +25,7 @@ TEST_BEGIN(test_profile_small_allocations) {
 
 	for (szind_t index = 0; index < SC_NBINS; index++) {
 		size_t size = sz_index2size(index);
-		void *ptr = malloc(size);
+		void  *ptr = malloc(size);
 		assert_small_allocation_sampled(ptr, size);
 		free(ptr);
 	}
@@ -36,7 +37,7 @@ TEST_BEGIN(test_profile_small_allocations_sdallocx) {
 
 	for (szind_t index = 0; index < SC_NBINS; index++) {
 		size_t size = sz_index2size(index);
-		void *ptr = malloc(size);
+		void  *ptr = malloc(size);
 		assert_small_allocation_sampled(ptr, size);
 		/*
 		 * While free calls into ifree, sdallocx calls into isfree,
@@ -86,7 +87,7 @@ TEST_BEGIN(test_profile_small_reallocations_same_size_class) {
 
 	for (szind_t index = 0; index < SC_NBINS; index++) {
 		size_t size = sz_index2size(index);
-		void *ptr = malloc(size);
+		void  *ptr = malloc(size);
 		assert_small_allocation_sampled(ptr, size);
 		ptr = realloc(ptr, size - 1);
 		assert_small_allocation_sampled(ptr, size);
diff --git a/test/unit/prof_stats.c b/test/unit/prof_stats.c
index c88c4ae0..95ca051c 100644
--- a/test/unit/prof_stats.c
+++ b/test/unit/prof_stats.c
@@ -3,8 +3,8 @@
 #define N_PTRS 3
 
 static void
-test_combinations(szind_t ind, size_t sizes_array[N_PTRS],
-    int flags_array[N_PTRS]) {
+test_combinations(
+    szind_t ind, size_t sizes_array[N_PTRS], int flags_array[N_PTRS]) {
 #define MALLCTL_STR_LEN 64
 	assert(opt_prof && opt_prof_stats);
 
@@ -25,11 +25,13 @@ test_combinations(szind_t ind, size_t sizes_array[N_PTRS],
 	size_t stats_len = 2 * sizeof(uint64_t);
 
 	uint64_t live_stats_orig[2];
-	assert_d_eq(mallctl(mallctl_live_str, &live_stats_orig, &stats_len,
-	    NULL, 0), 0, "");
+	assert_d_eq(
+	    mallctl(mallctl_live_str, &live_stats_orig, &stats_len, NULL, 0), 0,
+	    "");
 	uint64_t accum_stats_orig[2];
-	assert_d_eq(mallctl(mallctl_accum_str, &accum_stats_orig, &stats_len,
-	    NULL, 0), 0, "");
+	assert_d_eq(
+	    mallctl(mallctl_accum_str, &accum_stats_orig, &stats_len, NULL, 0),
+	    0, "");
 
 	void *ptrs[N_PTRS];
 
@@ -40,8 +42,8 @@ test_combinations(szind_t ind, size_t sizes_array[N_PTRS],
 
 	for (size_t i = 0; i < N_PTRS; ++i) {
 		size_t sz = sizes_array[i];
-		int flags = flags_array[i];
-		void *p = mallocx(sz, flags);
+		int    flags = flags_array[i];
+		void  *p = mallocx(sz, flags);
 		assert_ptr_not_null(p, "malloc() failed");
 		assert(TEST_MALLOC_SIZE(p) == sz_index2size(ind));
 		ptrs[i] = p;
@@ -50,41 +52,45 @@ test_combinations(szind_t ind, size_t sizes_array[N_PTRS],
 		accum_req_sum += sz;
 		accum_count++;
 		uint64_t live_stats[2];
-		assert_d_eq(mallctl(mallctl_live_str, &live_stats, &stats_len,
-		    NULL, 0), 0, "");
-		expect_u64_eq(live_stats[0] - live_stats_orig[0],
-		    live_req_sum, "");
-		expect_u64_eq(live_stats[1] - live_stats_orig[1],
-		    live_count, "");
+		assert_d_eq(
+		    mallctl(mallctl_live_str, &live_stats, &stats_len, NULL, 0),
+		    0, "");
+		expect_u64_eq(
+		    live_stats[0] - live_stats_orig[0], live_req_sum, "");
+		expect_u64_eq(
+		    live_stats[1] - live_stats_orig[1], live_count, "");
 		uint64_t accum_stats[2];
 		assert_d_eq(mallctl(mallctl_accum_str, &accum_stats, &stats_len,
-		    NULL, 0), 0, "");
-		expect_u64_eq(accum_stats[0] - accum_stats_orig[0],
-		    accum_req_sum, "");
-		expect_u64_eq(accum_stats[1] - accum_stats_orig[1],
-		    accum_count, "");
+		                NULL, 0),
+		    0, "");
+		expect_u64_eq(
+		    accum_stats[0] - accum_stats_orig[0], accum_req_sum, "");
+		expect_u64_eq(
+		    accum_stats[1] - accum_stats_orig[1], accum_count, "");
 	}
 
 	for (size_t i = 0; i < N_PTRS; ++i) {
 		size_t sz = sizes_array[i];
-		int flags = flags_array[i];
+		int    flags = flags_array[i];
 		sdallocx(ptrs[i], sz, flags);
 		live_req_sum -= sz;
 		live_count--;
 		uint64_t live_stats[2];
-		assert_d_eq(mallctl(mallctl_live_str, &live_stats, &stats_len,
-		    NULL, 0), 0, "");
-		expect_u64_eq(live_stats[0] - live_stats_orig[0],
-		    live_req_sum, "");
-		expect_u64_eq(live_stats[1] - live_stats_orig[1],
-		    live_count, "");
+		assert_d_eq(
+		    mallctl(mallctl_live_str, &live_stats, &stats_len, NULL, 0),
+		    0, "");
+		expect_u64_eq(
+		    live_stats[0] - live_stats_orig[0], live_req_sum, "");
+		expect_u64_eq(
+		    live_stats[1] - live_stats_orig[1], live_count, "");
 		uint64_t accum_stats[2];
 		assert_d_eq(mallctl(mallctl_accum_str, &accum_stats, &stats_len,
-		    NULL, 0), 0, "");
-		expect_u64_eq(accum_stats[0] - accum_stats_orig[0],
-		    accum_req_sum, "");
-		expect_u64_eq(accum_stats[1] - accum_stats_orig[1],
-		    accum_count, "");
+		                NULL, 0),
+		    0, "");
+		expect_u64_eq(
+		    accum_stats[0] - accum_stats_orig[0], accum_req_sum, "");
+		expect_u64_eq(
+		    accum_stats[1] - accum_stats_orig[1], accum_count, "");
 	}
 #undef MALLCTL_STR_LEN
 }
@@ -92,9 +98,9 @@ test_combinations(szind_t ind, size_t sizes_array[N_PTRS],
 static void
 test_szind_wrapper(szind_t ind) {
 	size_t sizes_array[N_PTRS];
-	int flags_array[N_PTRS];
+	int    flags_array[N_PTRS];
 	for (size_t i = 0, sz = sz_index2size(ind) - N_PTRS; i < N_PTRS;
-	    ++i, ++sz) {
+	     ++i, ++sz) {
 		sizes_array[i] = sz;
 		flags_array[i] = 0;
 	}
@@ -115,10 +121,10 @@ TEST_END
 static void
 test_szind_aligned_wrapper(szind_t ind, unsigned lg_align) {
 	size_t sizes_array[N_PTRS];
-	int flags_array[N_PTRS];
-	int flags = MALLOCX_LG_ALIGN(lg_align);
+	int    flags_array[N_PTRS];
+	int    flags = MALLOCX_LG_ALIGN(lg_align);
 	for (size_t i = 0, sz = sz_index2size(ind) - N_PTRS; i < N_PTRS;
-	    ++i, ++sz) {
+	     ++i, ++sz) {
 		sizes_array[i] = sz;
 		flags_array[i] = flags;
 	}
@@ -136,7 +142,7 @@ TEST_BEGIN(test_prof_stats_aligned) {
 	}
 	for (szind_t ind = SC_NBINS - 5; ind < SC_NBINS + 5; ++ind) {
 		for (unsigned lg_align = SC_LG_LARGE_MINCLASS - 5;
-		    lg_align < SC_LG_LARGE_MINCLASS + 5; ++lg_align) {
+		     lg_align < SC_LG_LARGE_MINCLASS + 5; ++lg_align) {
 			test_szind_aligned_wrapper(ind, lg_align);
 		}
 	}
@@ -145,7 +151,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_prof_stats,
-	    test_prof_stats_aligned);
+	return test(test_prof_stats, test_prof_stats_aligned);
 }
diff --git a/test/unit/prof_sys_thread_name.c b/test/unit/prof_sys_thread_name.c
index 3aeb8cf1..242e2fc3 100644
--- a/test/unit/prof_sys_thread_name.c
+++ b/test/unit/prof_sys_thread_name.c
@@ -28,7 +28,7 @@ TEST_BEGIN(test_prof_sys_thread_name) {
 	test_skip_if(!config_prof);
 	test_skip_if(!opt_prof_sys_thread_name);
 
-	bool oldval;
+	bool   oldval;
 	size_t sz = sizeof(oldval);
 	assert_d_eq(mallctl("opt.prof_sys_thread_name", &oldval, &sz, NULL, 0),
 	    0, "mallctl failed");
@@ -43,8 +43,8 @@ TEST_BEGIN(test_prof_sys_thread_name) {
 	thread_name = test_thread_name;
 	assert_d_eq(mallctl("thread.prof.name", NULL, NULL, &thread_name, sz),
 	    ENOENT, "mallctl write for thread name should fail");
-	assert_ptr_eq(thread_name, test_thread_name,
-	    "Thread name should not be touched");
+	assert_ptr_eq(
+	    thread_name, test_thread_name, "Thread name should not be touched");
 
 	prof_sys_thread_name_read_t *orig_prof_sys_thread_name_read =
 	    prof_sys_thread_name_read;
@@ -69,14 +69,15 @@ TEST_BEGIN(test_prof_sys_thread_name) {
 	free(p);
 	assert_d_eq(mallctl("thread.prof.name", &thread_name, &sz, NULL, 0), 0,
 	    "mallctl read for thread name should not fail");
-	expect_str_eq(thread_name, "", "Thread name should be updated if the "
+	expect_str_eq(thread_name, "",
+	    "Thread name should be updated if the "
 	    "system call returns a different name");
 
 	prof_sys_thread_name_read = orig_prof_sys_thread_name_read;
 }
 TEST_END
 
-#define ITER (16*1024)
+#define ITER (16 * 1024)
 static void *
 thd_start(void *unused) {
 	/* Triggering samples which loads thread names. */
@@ -94,7 +95,7 @@ TEST_BEGIN(test_prof_sys_thread_name_mt) {
 	test_skip_if(!opt_prof_sys_thread_name);
 
 #define NTHREADS 4
-	thd_t thds[NTHREADS];
+	thd_t    thds[NTHREADS];
 	unsigned thd_args[NTHREADS];
 	unsigned i;
 
@@ -105,8 +106,8 @@ TEST_BEGIN(test_prof_sys_thread_name_mt) {
 	/* Prof dump which reads the thread names. */
 	for (i = 0; i < ITER; i++) {
 		expect_d_eq(mallctl("prof.dump", NULL, NULL,
-		    (void *)&dump_filename, sizeof(dump_filename)), 0,
-		    "Unexpected mallctl failure while dumping");
+		                (void *)&dump_filename, sizeof(dump_filename)),
+		    0, "Unexpected mallctl failure while dumping");
 	}
 
 	for (i = 0; i < NTHREADS; i++) {
@@ -119,7 +120,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_prof_sys_thread_name,
-	    test_prof_sys_thread_name_mt);
+	return test(test_prof_sys_thread_name, test_prof_sys_thread_name_mt);
 }
diff --git a/test/unit/prof_tctx.c b/test/unit/prof_tctx.c
index d19dd395..7fde7230 100644
--- a/test/unit/prof_tctx.c
+++ b/test/unit/prof_tctx.c
@@ -3,11 +3,11 @@
 #include "jemalloc/internal/prof_data.h"
 
 TEST_BEGIN(test_prof_realloc) {
-	tsd_t *tsd;
-	int flags;
-	void *p, *q;
+	tsd_t      *tsd;
+	int         flags;
+	void       *p, *q;
 	prof_info_t prof_info_p, prof_info_q;
-	prof_cnt_t cnt_0, cnt_1, cnt_2, cnt_3;
+	prof_cnt_t  cnt_0, cnt_1, cnt_2, cnt_3;
 
 	test_skip_if(!config_prof);
 
@@ -18,8 +18,8 @@ TEST_BEGIN(test_prof_realloc) {
 	p = mallocx(1024, flags);
 	expect_ptr_not_null(p, "Unexpected mallocx() failure");
 	prof_info_get(tsd, p, NULL, &prof_info_p);
-	expect_ptr_ne(prof_info_p.alloc_tctx, PROF_TCTX_SENTINEL,
-	    "Expected valid tctx");
+	expect_ptr_ne(
+	    prof_info_p.alloc_tctx, PROF_TCTX_SENTINEL, "Expected valid tctx");
 	prof_cnt_all(&cnt_1);
 	expect_u64_eq(cnt_0.curobjs + 1, cnt_1.curobjs,
 	    "Allocation should have increased sample size");
@@ -28,8 +28,8 @@ TEST_BEGIN(test_prof_realloc) {
 	expect_ptr_ne(p, q, "Expected move");
 	expect_ptr_not_null(p, "Unexpected rmallocx() failure");
 	prof_info_get(tsd, q, NULL, &prof_info_q);
-	expect_ptr_ne(prof_info_q.alloc_tctx, PROF_TCTX_SENTINEL,
-	    "Expected valid tctx");
+	expect_ptr_ne(
+	    prof_info_q.alloc_tctx, PROF_TCTX_SENTINEL, "Expected valid tctx");
 	prof_cnt_all(&cnt_2);
 	expect_u64_eq(cnt_1.curobjs, cnt_2.curobjs,
 	    "Reallocation should not have changed sample size");
@@ -43,6 +43,5 @@ TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_prof_realloc);
+	return test_no_reentrancy(test_prof_realloc);
 }
diff --git a/test/unit/prof_thread_name.c b/test/unit/prof_thread_name.c
index 0fc29f75..8b12c435 100644
--- a/test/unit/prof_thread_name.c
+++ b/test/unit/prof_thread_name.c
@@ -1,34 +1,34 @@
 #include "test/jemalloc_test.h"
 
 static void
-mallctl_thread_name_get_impl(const char *thread_name_expected, const char *func,
-    int line) {
+mallctl_thread_name_get_impl(
+    const char *thread_name_expected, const char *func, int line) {
 	const char *thread_name_old;
-	size_t sz;
+	size_t      sz;
 
 	sz = sizeof(thread_name_old);
-	expect_d_eq(mallctl("thread.prof.name", (void *)&thread_name_old, &sz,
-	    NULL, 0), 0,
-	    "%s():%d: Unexpected mallctl failure reading thread.prof.name",
+	expect_d_eq(
+	    mallctl("thread.prof.name", (void *)&thread_name_old, &sz, NULL, 0),
+	    0, "%s():%d: Unexpected mallctl failure reading thread.prof.name",
 	    func, line);
 	expect_str_eq(thread_name_old, thread_name_expected,
 	    "%s():%d: Unexpected thread.prof.name value", func, line);
 }
 
 static void
-mallctl_thread_name_set_impl(const char *thread_name, const char *func,
-    int line) {
+mallctl_thread_name_set_impl(
+    const char *thread_name, const char *func, int line) {
 	expect_d_eq(mallctl("thread.prof.name", NULL, NULL,
-	    (void *)&thread_name, sizeof(thread_name)), 0,
-	    "%s():%d: Unexpected mallctl failure writing thread.prof.name",
+	                (void *)&thread_name, sizeof(thread_name)),
+	    0, "%s():%d: Unexpected mallctl failure writing thread.prof.name",
 	    func, line);
 	mallctl_thread_name_get_impl(thread_name, func, line);
 }
 
-#define mallctl_thread_name_get(a)					\
+#define mallctl_thread_name_get(a)                                             \
 	mallctl_thread_name_get_impl(a, __func__, __LINE__)
 
-#define mallctl_thread_name_set(a)					\
+#define mallctl_thread_name_set(a)                                             \
 	mallctl_thread_name_set_impl(a, __func__, __LINE__)
 
 TEST_BEGIN(test_prof_thread_name_validation) {
@@ -44,34 +44,35 @@ TEST_BEGIN(test_prof_thread_name_validation) {
 	char long_name[] =
 	    "test case longer than expected; test case longer than expected";
 	expect_zu_gt(strlen(long_name), PROF_THREAD_NAME_MAX_LEN,
-	   "Long test name not long enough");
+	    "Long test name not long enough");
 	const char *test_name_long = long_name;
 	expect_d_eq(mallctl("thread.prof.name", NULL, NULL,
-	    (void *)&test_name_long, sizeof(test_name_long)), 0,
-	    "Unexpected mallctl failure from thread.prof.name");
+	                (void *)&test_name_long, sizeof(test_name_long)),
+	    0, "Unexpected mallctl failure from thread.prof.name");
 	/* Long name cut to match. */
 	long_name[PROF_THREAD_NAME_MAX_LEN - 1] = '\0';
 	mallctl_thread_name_get(test_name_long);
 
 	/* NULL input shouldn't be allowed. */
 	const char *test_name2 = NULL;
-	expect_d_eq(mallctl("thread.prof.name", NULL, NULL,
-	    (void *)&test_name2, sizeof(test_name2)), EINVAL,
-	    "Unexpected mallctl result writing to thread.prof.name");
+	expect_d_eq(mallctl("thread.prof.name", NULL, NULL, (void *)&test_name2,
+	                sizeof(test_name2)),
+	    EINVAL, "Unexpected mallctl result writing to thread.prof.name");
 
 	/* '\n' shouldn't be allowed. */
 	const char *test_name3 = "test\ncase";
-	expect_d_eq(mallctl("thread.prof.name", NULL, NULL,
-	    (void *)&test_name3, sizeof(test_name3)), EINVAL,
+	expect_d_eq(mallctl("thread.prof.name", NULL, NULL, (void *)&test_name3,
+	                sizeof(test_name3)),
+	    EINVAL,
 	    "Unexpected mallctl result writing \"%s\" to thread.prof.name",
 	    test_name3);
 
 	/* Simultaneous read/write shouldn't be allowed. */
 	const char *thread_name_old;
-	size_t sz = sizeof(thread_name_old);
+	size_t      sz = sizeof(thread_name_old);
 	expect_d_eq(mallctl("thread.prof.name", (void *)&thread_name_old, &sz,
-	    (void *)&test_name1, sizeof(test_name1)), EPERM,
-	    "Unexpected mallctl result from thread.prof.name");
+	                (void *)&test_name1, sizeof(test_name1)),
+	    EPERM, "Unexpected mallctl result from thread.prof.name");
 
 	mallctl_thread_name_set("");
 }
@@ -80,7 +81,7 @@ TEST_END
 static void *
 thd_start(void *varg) {
 	unsigned thd_ind = *(unsigned *)varg;
-	char thread_name[16] = "";
+	char     thread_name[16] = "";
 	unsigned i;
 
 	malloc_snprintf(thread_name, sizeof(thread_name), "thread %u", thd_ind);
@@ -107,7 +108,7 @@ TEST_BEGIN(test_prof_thread_name_threaded) {
 	test_skip_if(opt_prof_sys_thread_name);
 
 #define NTHREADS 4
-	thd_t thds[NTHREADS];
+	thd_t    thds[NTHREADS];
 	unsigned thd_args[NTHREADS];
 	unsigned i;
 
@@ -125,6 +126,5 @@ TEST_END
 int
 main(void) {
 	return test(
-	    test_prof_thread_name_validation,
-	    test_prof_thread_name_threaded);
+	    test_prof_thread_name_validation, test_prof_thread_name_threaded);
 }
diff --git a/test/unit/prof_threshold.c b/test/unit/prof_threshold.c
index c6f53983..a31a5a24 100644
--- a/test/unit/prof_threshold.c
+++ b/test/unit/prof_threshold.c
@@ -23,9 +23,10 @@ static void
 read_write_prof_threshold_hook(prof_threshold_hook_t *to_read, bool do_write,
     prof_threshold_hook_t to_write) {
 	size_t hook_sz = sizeof(prof_threshold_hook_t);
-	expect_d_eq(mallctl("experimental.hooks.prof_threshold",
-	    (void *)to_read, &hook_sz, do_write ? &to_write : NULL, hook_sz), 0,
-	    "Unexpected prof_threshold_hook mallctl failure");
+	expect_d_eq(
+	    mallctl("experimental.hooks.prof_threshold", (void *)to_read,
+	        &hook_sz, do_write ? &to_write : NULL, hook_sz),
+	    0, "Unexpected prof_threshold_hook mallctl failure");
 }
 
 static void
@@ -40,7 +41,8 @@ read_prof_threshold_hook() {
 	return hook;
 }
 
-static void reset_test_config() {
+static void
+reset_test_config() {
 	hook_calls = 0;
 	last_peak = 0;
 	alloc_baseline = last_alloc; /* We run the test multiple times */
@@ -49,15 +51,20 @@ static void reset_test_config() {
 	chunk_size = threshold_bytes / ALLOC_ITERATIONS_IN_THRESHOLD;
 }
 
-static void expect_threshold_calls(int calls) {
-	expect_u64_eq(hook_calls, calls, "Hook called the right amount of times");
-	expect_u64_lt(last_peak, chunk_size * 2, "We allocate chunk_size at a time");
-	expect_u64_ge(last_alloc, threshold_bytes * calls + alloc_baseline, "Crosses");
+static void
+expect_threshold_calls(int calls) {
+	expect_u64_eq(
+	    hook_calls, calls, "Hook called the right amount of times");
+	expect_u64_lt(
+	    last_peak, chunk_size * 2, "We allocate chunk_size at a time");
+	expect_u64_ge(
+	    last_alloc, threshold_bytes * calls + alloc_baseline, "Crosses");
 }
 
-static void allocate_chunks(int chunks) {
+static void
+allocate_chunks(int chunks) {
 	for (int i = 0; i < chunks; i++) {
-		void* p = mallocx((size_t)chunk_size, 0);
+		void *p = mallocx((size_t)chunk_size, 0);
 		expect_ptr_not_null(p, "Failed to allocate");
 		free(p);
 	}
@@ -68,7 +75,8 @@ TEST_BEGIN(test_prof_threshold_hook) {
 
 	/* Test setting and reading the hook (both value and null) */
 	write_prof_threshold_hook(mock_prof_threshold_hook);
-	expect_ptr_eq(read_prof_threshold_hook(), mock_prof_threshold_hook, "Unexpected hook");
+	expect_ptr_eq(read_prof_threshold_hook(), mock_prof_threshold_hook,
+	    "Unexpected hook");
 
 	write_prof_threshold_hook(NULL);
 	expect_ptr_null(read_prof_threshold_hook(), "Hook was erased");
@@ -100,6 +108,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_prof_threshold_hook);
+	return test(test_prof_threshold_hook);
 }
diff --git a/test/unit/psset.c b/test/unit/psset.c
index c834e531..73a9835a 100644
--- a/test/unit/psset.c
+++ b/test/unit/psset.c
@@ -21,8 +21,8 @@ test_psset_fake_purge(hpdata_t *ps) {
 	hpdata_alloc_allowed_set(ps, false);
 	size_t nranges;
 	hpdata_purge_begin(ps, &purge_state, &nranges);
-	(void) nranges;
-	void *addr;
+	(void)nranges;
+	void  *addr;
 	size_t size;
 	while (hpdata_purge_next(ps, &purge_state, &addr, &size)) {
 	}
@@ -31,8 +31,8 @@ test_psset_fake_purge(hpdata_t *ps) {
 }
 
 static void
-test_psset_alloc_new(psset_t *psset, hpdata_t *ps, edata_t *r_edata,
-    size_t size) {
+test_psset_alloc_new(
+    psset_t *psset, hpdata_t *ps, edata_t *r_edata, size_t size) {
 	hpdata_assert_empty(ps);
 
 	test_psset_fake_purge(ps);
@@ -40,12 +40,12 @@ test_psset_alloc_new(psset_t *psset, hpdata_t *ps, edata_t *r_edata,
 	psset_insert(psset, ps);
 	psset_update_begin(psset, ps);
 
-        void *addr = hpdata_reserve_alloc(ps, size);
-        edata_init(r_edata, edata_arena_ind_get(r_edata), addr, size,
+	void *addr = hpdata_reserve_alloc(ps, size);
+	edata_init(r_edata, edata_arena_ind_get(r_edata), addr, size,
 	    /* slab */ false, SC_NSIZES, /* sn */ 0, extent_state_active,
-            /* zeroed */ false, /* committed */ true, EXTENT_PAI_HPA,
-            EXTENT_NOT_HEAD);
-        edata_ps_set(r_edata, ps);
+	    /* zeroed */ false, /* committed */ true, EXTENT_PAI_HPA,
+	    EXTENT_NOT_HEAD);
+	edata_ps_set(r_edata, ps);
 	psset_update_end(psset, ps);
 }
 
@@ -104,15 +104,14 @@ edata_expect(edata_t *edata, size_t page_offset, size_t page_cnt) {
 	 * Note that allocations should get the arena ind of their home
 	 * arena, *not* the arena ind of the pageslab allocator.
 	 */
-	expect_u_eq(ALLOC_ARENA_IND, edata_arena_ind_get(edata),
-	    "Arena ind changed");
+	expect_u_eq(
+	    ALLOC_ARENA_IND, edata_arena_ind_get(edata), "Arena ind changed");
 	expect_ptr_eq(
 	    (void *)((uintptr_t)PAGESLAB_ADDR + (page_offset << LG_PAGE)),
 	    edata_addr_get(edata), "Didn't allocate in order");
 	expect_zu_eq(page_cnt << LG_PAGE, edata_size_get(edata), "");
 	expect_false(edata_slab_get(edata), "");
-	expect_u_eq(SC_NSIZES, edata_szind_get_maybe_invalid(edata),
-	    "");
+	expect_u_eq(SC_NSIZES, edata_szind_get_maybe_invalid(edata), "");
 	expect_u64_eq(0, edata_sn_get(edata), "");
 	expect_d_eq(edata_state_get(edata), extent_state_active, "");
 	expect_false(edata_zeroed_get(edata), "");
@@ -123,7 +122,7 @@ edata_expect(edata_t *edata, size_t page_offset, size_t page_cnt) {
 
 TEST_BEGIN(test_empty) {
 	test_skip_if(hpa_hugepage_size_exceeds_limit());
-	bool err;
+	bool     err;
 	hpdata_t pageslab;
 	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
 
@@ -176,7 +175,7 @@ TEST_END
 
 TEST_BEGIN(test_reuse) {
 	test_skip_if(hpa_hugepage_size_exceeds_limit());
-	bool err;
+	bool      err;
 	hpdata_t *ps;
 
 	hpdata_t pageslab;
@@ -196,7 +195,7 @@ TEST_BEGIN(test_reuse) {
 	}
 
 	/* Free odd indices. */
-	for (size_t i = 0; i < HUGEPAGE_PAGES; i ++) {
+	for (size_t i = 0; i < HUGEPAGE_PAGES; i++) {
 		if (i % 2 == 0) {
 			continue;
 		}
@@ -271,7 +270,7 @@ TEST_END
 
 TEST_BEGIN(test_evict) {
 	test_skip_if(hpa_hugepage_size_exceeds_limit());
-	bool err;
+	bool      err;
 	hpdata_t *ps;
 
 	hpdata_t pageslab;
@@ -308,16 +307,15 @@ TEST_END
 
 TEST_BEGIN(test_multi_pageslab) {
 	test_skip_if(hpa_hugepage_size_exceeds_limit());
-	bool err;
+	bool      err;
 	hpdata_t *ps;
 
 	hpdata_t pageslab[2];
 	hpdata_init(&pageslab[0], PAGESLAB_ADDR, PAGESLAB_AGE);
-	hpdata_init(&pageslab[1],
-	    (void *)((uintptr_t)PAGESLAB_ADDR + HUGEPAGE),
+	hpdata_init(&pageslab[1], (void *)((uintptr_t)PAGESLAB_ADDR + HUGEPAGE),
 	    PAGESLAB_AGE + 1);
 
-	edata_t* alloc[2];
+	edata_t *alloc[2];
 	alloc[0] = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
 	alloc[1] = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
 
@@ -334,9 +332,10 @@ TEST_BEGIN(test_multi_pageslab) {
 	for (size_t i = 0; i < 2; i++) {
 		for (size_t j = 1; j < HUGEPAGE_PAGES; j++) {
 			edata_init_test(&alloc[i][j]);
-			err = test_psset_alloc_reuse(&psset, &alloc[i][j], PAGE);
-			expect_false(err,
-			    "Nonempty psset failed page allocation.");
+			err = test_psset_alloc_reuse(
+			    &psset, &alloc[i][j], PAGE);
+			expect_false(
+			    err, "Nonempty psset failed page allocation.");
 			assert_ptr_eq(&pageslab[i], edata_ps_get(&alloc[i][j]),
 			    "Didn't pick pageslabs in first-fit");
 		}
@@ -505,7 +504,8 @@ TEST_BEGIN(test_stats_huge) {
 
 		expect_zu_eq(1, psset.stats.slabs[0].npageslabs, "");
 		expect_zu_eq(i, psset.stats.slabs[0].nactive, "");
-		expect_zu_eq(HUGEPAGE_PAGES - i, psset.stats.slabs[0].ndirty, "");
+		expect_zu_eq(
+		    HUGEPAGE_PAGES - i, psset.stats.slabs[0].ndirty, "");
 
 		expect_zu_eq(0, psset.stats.slabs[1].npageslabs, "");
 		expect_zu_eq(0, psset.stats.slabs[1].nactive, "");
@@ -527,7 +527,8 @@ static void
 stats_expect_empty(psset_bin_stats_t *stats) {
 	assert_zu_eq(0, stats->npageslabs,
 	    "Supposedly empty bin had positive npageslabs");
-	expect_zu_eq(0, stats->nactive, "Unexpected nonempty bin"
+	expect_zu_eq(0, stats->nactive,
+	    "Unexpected nonempty bin"
 	    "Supposedly empty bin had positive nactive");
 }
 
@@ -536,17 +537,16 @@ stats_expect(psset_t *psset, size_t nactive) {
 	if (nactive == HUGEPAGE_PAGES) {
 		expect_zu_eq(1, psset->stats.full_slabs[0].npageslabs,
 		    "Expected a full slab");
-		expect_zu_eq(HUGEPAGE_PAGES,
-		    psset->stats.full_slabs[0].nactive,
+		expect_zu_eq(HUGEPAGE_PAGES, psset->stats.full_slabs[0].nactive,
 		    "Should have exactly filled the bin");
 	} else {
 		stats_expect_empty(&psset->stats.full_slabs[0]);
 	}
-	size_t ninactive = HUGEPAGE_PAGES - nactive;
+	size_t   ninactive = HUGEPAGE_PAGES - nactive;
 	pszind_t nonempty_pind = PSSET_NPSIZES;
 	if (ninactive != 0 && ninactive < HUGEPAGE_PAGES) {
-		nonempty_pind = sz_psz2ind(sz_psz_quantize_floor(
-		    ninactive << LG_PAGE));
+		nonempty_pind = sz_psz2ind(
+		    sz_psz_quantize_floor(ninactive << LG_PAGE));
 	}
 	for (pszind_t i = 0; i < PSSET_NPSIZES; i++) {
 		if (i == nonempty_pind) {
@@ -657,24 +657,25 @@ init_test_pageslabs(psset_t *psset, hpdata_t *pageslab,
 	}
 
 	/* Deallocate the last page from the older pageslab. */
-	hpdata_t *evicted = test_psset_dalloc(psset,
-	    &alloc[HUGEPAGE_PAGES - 1]);
+	hpdata_t *evicted = test_psset_dalloc(
+	    psset, &alloc[HUGEPAGE_PAGES - 1]);
 	expect_ptr_null(evicted, "Unexpected eviction");
 }
 
 TEST_BEGIN(test_oldest_fit) {
 	test_skip_if(hpa_hugepage_size_exceeds_limit());
-	bool err;
+	bool     err;
 	edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
-	edata_t *worse_alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
+	edata_t *worse_alloc = (edata_t *)malloc(
+	    sizeof(edata_t) * HUGEPAGE_PAGES);
 
 	hpdata_t pageslab;
 	hpdata_t worse_pageslab;
 
 	psset_t psset;
 
-	init_test_pageslabs(&psset, &pageslab, &worse_pageslab, alloc,
-	    worse_alloc);
+	init_test_pageslabs(
+	    &psset, &pageslab, &worse_pageslab, alloc, worse_alloc);
 
 	/* The edata should come from the better pageslab. */
 	edata_t test_edata;
@@ -691,23 +692,24 @@ TEST_END
 
 TEST_BEGIN(test_insert_remove) {
 	test_skip_if(hpa_hugepage_size_exceeds_limit());
-	bool err;
+	bool      err;
 	hpdata_t *ps;
-	edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
-	edata_t *worse_alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
+	edata_t  *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
+	edata_t  *worse_alloc = (edata_t *)malloc(
+            sizeof(edata_t) * HUGEPAGE_PAGES);
 
 	hpdata_t pageslab;
 	hpdata_t worse_pageslab;
 
 	psset_t psset;
 
-	init_test_pageslabs(&psset, &pageslab, &worse_pageslab, alloc,
-	    worse_alloc);
+	init_test_pageslabs(
+	    &psset, &pageslab, &worse_pageslab, alloc, worse_alloc);
 
 	/* Remove better; should still be able to alloc from worse. */
 	psset_update_begin(&psset, &pageslab);
-	err = test_psset_alloc_reuse(&psset, &worse_alloc[HUGEPAGE_PAGES - 1],
-	    PAGE);
+	err = test_psset_alloc_reuse(
+	    &psset, &worse_alloc[HUGEPAGE_PAGES - 1], PAGE);
 	expect_false(err, "Removal should still leave an empty page");
 	expect_ptr_eq(&worse_pageslab,
 	    edata_ps_get(&worse_alloc[HUGEPAGE_PAGES - 1]),
@@ -755,23 +757,21 @@ TEST_BEGIN(test_purge_prefers_nonhuge) {
 	psset_t psset;
 	psset_init(&psset);
 
-	hpdata_t hpdata_huge[NHP];
+	hpdata_t  hpdata_huge[NHP];
 	uintptr_t huge_begin = (uintptr_t)&hpdata_huge[0];
 	uintptr_t huge_end = (uintptr_t)&hpdata_huge[NHP];
-	hpdata_t hpdata_nonhuge[NHP];
+	hpdata_t  hpdata_nonhuge[NHP];
 	uintptr_t nonhuge_begin = (uintptr_t)&hpdata_nonhuge[0];
 	uintptr_t nonhuge_end = (uintptr_t)&hpdata_nonhuge[NHP];
 
 	for (size_t i = 0; i < NHP; i++) {
-		hpdata_init(&hpdata_huge[i], (void *)((10 + i) * HUGEPAGE),
-		    123 + i);
+		hpdata_init(
+		    &hpdata_huge[i], (void *)((10 + i) * HUGEPAGE), 123 + i);
 		psset_insert(&psset, &hpdata_huge[i]);
 
 		hpdata_init(&hpdata_nonhuge[i],
-		    (void *)((10 + NHP + i) * HUGEPAGE),
-		    456 + i);
+		    (void *)((10 + NHP + i) * HUGEPAGE), 456 + i);
 		psset_insert(&psset, &hpdata_nonhuge[i]);
-
 	}
 	for (int i = 0; i < 2 * NHP; i++) {
 		hpdata = psset_pick_alloc(&psset, HUGEPAGE * 3 / 4);
@@ -804,7 +804,8 @@ TEST_BEGIN(test_purge_prefers_nonhuge) {
 	for (int i = 0; i < NHP; i++) {
 		hpdata = psset_pick_purge(&psset);
 		assert_true(nonhuge_begin <= (uintptr_t)hpdata
-		    && (uintptr_t)hpdata < nonhuge_end, "");
+		        && (uintptr_t)hpdata < nonhuge_end,
+		    "");
 		psset_update_begin(&psset, hpdata);
 		test_psset_fake_purge(hpdata);
 		hpdata_purge_allowed_set(hpdata, false);
@@ -813,7 +814,8 @@ TEST_BEGIN(test_purge_prefers_nonhuge) {
 	for (int i = 0; i < NHP; i++) {
 		hpdata = psset_pick_purge(&psset);
 		expect_true(huge_begin <= (uintptr_t)hpdata
-		    && (uintptr_t)hpdata < huge_end, "");
+		        && (uintptr_t)hpdata < huge_end,
+		    "");
 		psset_update_begin(&psset, hpdata);
 		hpdata_dehugify(hpdata);
 		test_psset_fake_purge(hpdata);
@@ -867,13 +869,13 @@ TEST_BEGIN(test_purge_prefers_empty_huge) {
 	psset_t psset;
 	psset_init(&psset);
 
-	enum {NHP = 10 };
+	enum { NHP = 10 };
 
 	hpdata_t hpdata_huge[NHP];
 	hpdata_t hpdata_nonhuge[NHP];
 
 	uintptr_t cur_addr = 100 * HUGEPAGE;
-	uint64_t cur_age = 123;
+	uint64_t  cur_age = 123;
 	for (int i = 0; i < NHP; i++) {
 		hpdata_init(&hpdata_huge[i], (void *)cur_addr, cur_age);
 		cur_addr += HUGEPAGE;
@@ -933,18 +935,9 @@ TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_empty,
-	    test_fill,
-	    test_reuse,
-	    test_evict,
-	    test_multi_pageslab,
-	    test_stats_merged,
-	    test_stats_huge,
-	    test_stats_fullness,
-	    test_oldest_fit,
-	    test_insert_remove,
-	    test_purge_prefers_nonhuge,
-	    test_purge_prefers_empty,
+	return test_no_reentrancy(test_empty, test_fill, test_reuse, test_evict,
+	    test_multi_pageslab, test_stats_merged, test_stats_huge,
+	    test_stats_fullness, test_oldest_fit, test_insert_remove,
+	    test_purge_prefers_nonhuge, test_purge_prefers_empty,
 	    test_purge_prefers_empty_huge);
 }
diff --git a/test/unit/ql.c b/test/unit/ql.c
index f9130582..ff3b436e 100644
--- a/test/unit/ql.c
+++ b/test/unit/ql.c
@@ -15,16 +15,16 @@ struct list_s {
 
 static void
 test_empty_list(list_head_t *head) {
-	list_t *t;
+	list_t  *t;
 	unsigned i;
 
 	expect_true(ql_empty(head), "Unexpected element for empty list");
 	expect_ptr_null(ql_first(head), "Unexpected element for empty list");
-	expect_ptr_null(ql_last(head, link),
-	    "Unexpected element for empty list");
+	expect_ptr_null(
+	    ql_last(head, link), "Unexpected element for empty list");
 
 	i = 0;
-	ql_foreach(t, head, link) {
+	ql_foreach (t, head, link) {
 		i++;
 	}
 	expect_u_eq(i, 0, "Unexpected element for empty list");
@@ -56,48 +56,48 @@ init_entries(list_t *entries, unsigned nentries) {
 
 static void
 test_entries_list(list_head_t *head, list_t *entries, unsigned nentries) {
-	list_t *t;
+	list_t  *t;
 	unsigned i;
 
 	expect_false(ql_empty(head), "List should not be empty");
 	expect_c_eq(ql_first(head)->id, entries[0].id, "Element id mismatch");
-	expect_c_eq(ql_last(head, link)->id, entries[nentries-1].id,
+	expect_c_eq(ql_last(head, link)->id, entries[nentries - 1].id,
 	    "Element id mismatch");
 
 	i = 0;
-	ql_foreach(t, head, link) {
+	ql_foreach (t, head, link) {
 		expect_c_eq(t->id, entries[i].id, "Element id mismatch");
 		i++;
 	}
 
 	i = 0;
 	ql_reverse_foreach(t, head, link) {
-		expect_c_eq(t->id, entries[nentries-i-1].id,
-		    "Element id mismatch");
+		expect_c_eq(
+		    t->id, entries[nentries - i - 1].id, "Element id mismatch");
 		i++;
 	}
 
-	for (i = 0; i < nentries-1; i++) {
+	for (i = 0; i < nentries - 1; i++) {
 		t = ql_next(head, &entries[i], link);
-		expect_c_eq(t->id, entries[i+1].id, "Element id mismatch");
+		expect_c_eq(t->id, entries[i + 1].id, "Element id mismatch");
 	}
-	expect_ptr_null(ql_next(head, &entries[nentries-1], link),
-	    "Unexpected element");
+	expect_ptr_null(
+	    ql_next(head, &entries[nentries - 1], link), "Unexpected element");
 
 	expect_ptr_null(ql_prev(head, &entries[0], link), "Unexpected element");
 	for (i = 1; i < nentries; i++) {
 		t = ql_prev(head, &entries[i], link);
-		expect_c_eq(t->id, entries[i-1].id, "Element id mismatch");
+		expect_c_eq(t->id, entries[i - 1].id, "Element id mismatch");
 	}
 }
 
 TEST_BEGIN(test_ql_tail_insert) {
 	list_head_t head;
-	list_t entries[NENTRIES];
-	unsigned i;
+	list_t      entries[NENTRIES];
+	unsigned    i;
 
 	ql_new(&head);
-	init_entries(entries, sizeof(entries)/sizeof(list_t));
+	init_entries(entries, sizeof(entries) / sizeof(list_t));
 	for (i = 0; i < NENTRIES; i++) {
 		ql_tail_insert(&head, &entries[i], link);
 	}
@@ -108,17 +108,17 @@ TEST_END
 
 TEST_BEGIN(test_ql_tail_remove) {
 	list_head_t head;
-	list_t entries[NENTRIES];
-	unsigned i;
+	list_t      entries[NENTRIES];
+	unsigned    i;
 
 	ql_new(&head);
-	init_entries(entries, sizeof(entries)/sizeof(list_t));
+	init_entries(entries, sizeof(entries) / sizeof(list_t));
 	for (i = 0; i < NENTRIES; i++) {
 		ql_tail_insert(&head, &entries[i], link);
 	}
 
 	for (i = 0; i < NENTRIES; i++) {
-		test_entries_list(&head, entries, NENTRIES-i);
+		test_entries_list(&head, entries, NENTRIES - i);
 		ql_tail_remove(&head, list_t, link);
 	}
 	test_empty_list(&head);
@@ -127,13 +127,13 @@ TEST_END
 
 TEST_BEGIN(test_ql_head_insert) {
 	list_head_t head;
-	list_t entries[NENTRIES];
-	unsigned i;
+	list_t      entries[NENTRIES];
+	unsigned    i;
 
 	ql_new(&head);
-	init_entries(entries, sizeof(entries)/sizeof(list_t));
+	init_entries(entries, sizeof(entries) / sizeof(list_t));
 	for (i = 0; i < NENTRIES; i++) {
-		ql_head_insert(&head, &entries[NENTRIES-i-1], link);
+		ql_head_insert(&head, &entries[NENTRIES - i - 1], link);
 	}
 
 	test_entries_list(&head, entries, NENTRIES);
@@ -142,17 +142,17 @@ TEST_END
 
 TEST_BEGIN(test_ql_head_remove) {
 	list_head_t head;
-	list_t entries[NENTRIES];
-	unsigned i;
+	list_t      entries[NENTRIES];
+	unsigned    i;
 
 	ql_new(&head);
-	init_entries(entries, sizeof(entries)/sizeof(list_t));
+	init_entries(entries, sizeof(entries) / sizeof(list_t));
 	for (i = 0; i < NENTRIES; i++) {
-		ql_head_insert(&head, &entries[NENTRIES-i-1], link);
+		ql_head_insert(&head, &entries[NENTRIES - i - 1], link);
 	}
 
 	for (i = 0; i < NENTRIES; i++) {
-		test_entries_list(&head, &entries[i], NENTRIES-i);
+		test_entries_list(&head, &entries[i], NENTRIES - i);
 		ql_head_remove(&head, list_t, link);
 	}
 	test_empty_list(&head);
@@ -161,11 +161,11 @@ TEST_END
 
 TEST_BEGIN(test_ql_insert) {
 	list_head_t head;
-	list_t entries[8];
-	list_t *a, *b, *c, *d, *e, *f, *g, *h;
+	list_t      entries[8];
+	list_t     *a, *b, *c, *d, *e, *f, *g, *h;
 
 	ql_new(&head);
-	init_entries(entries, sizeof(entries)/sizeof(list_t));
+	init_entries(entries, sizeof(entries) / sizeof(list_t));
 	a = &entries[0];
 	b = &entries[1];
 	c = &entries[2];
@@ -190,13 +190,13 @@ TEST_BEGIN(test_ql_insert) {
 	ql_after_insert(c, d, link);
 	ql_before_insert(&head, f, e, link);
 
-	test_entries_list(&head, entries, sizeof(entries)/sizeof(list_t));
+	test_entries_list(&head, entries, sizeof(entries) / sizeof(list_t));
 }
 TEST_END
 
 static void
-test_concat_split_entries(list_t *entries, unsigned nentries_a,
-    unsigned nentries_b) {
+test_concat_split_entries(
+    list_t *entries, unsigned nentries_a, unsigned nentries_b) {
 	init_entries(entries, nentries_a + nentries_b);
 
 	list_head_t head_a;
@@ -253,8 +253,8 @@ TEST_BEGIN(test_ql_concat_split) {
 
 	test_concat_split_entries(entries, 0, NENTRIES);
 	test_concat_split_entries(entries, 1, NENTRIES - 1);
-	test_concat_split_entries(entries, NENTRIES / 2,
-	    NENTRIES - NENTRIES / 2);
+	test_concat_split_entries(
+	    entries, NENTRIES / 2, NENTRIES - NENTRIES / 2);
 	test_concat_split_entries(entries, NENTRIES - 1, 1);
 	test_concat_split_entries(entries, NENTRIES, 0);
 }
@@ -262,11 +262,11 @@ TEST_END
 
 TEST_BEGIN(test_ql_rotate) {
 	list_head_t head;
-	list_t entries[NENTRIES];
-	unsigned i;
+	list_t      entries[NENTRIES];
+	unsigned    i;
 
 	ql_new(&head);
-	init_entries(entries, sizeof(entries)/sizeof(list_t));
+	init_entries(entries, sizeof(entries) / sizeof(list_t));
 	for (i = 0; i < NENTRIES; i++) {
 		ql_tail_insert(&head, &entries[i], link);
 	}
@@ -284,15 +284,15 @@ TEST_END
 
 TEST_BEGIN(test_ql_move) {
 	list_head_t head_dest, head_src;
-	list_t entries[NENTRIES];
-	unsigned i;
+	list_t      entries[NENTRIES];
+	unsigned    i;
 
 	ql_new(&head_src);
 	ql_move(&head_dest, &head_src);
 	test_empty_list(&head_src);
 	test_empty_list(&head_dest);
 
-	init_entries(entries, sizeof(entries)/sizeof(list_t));
+	init_entries(entries, sizeof(entries) / sizeof(list_t));
 	for (i = 0; i < NENTRIES; i++) {
 		ql_tail_insert(&head_src, &entries[i], link);
 	}
@@ -304,14 +304,7 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_ql_empty,
-	    test_ql_tail_insert,
-	    test_ql_tail_remove,
-	    test_ql_head_insert,
-	    test_ql_head_remove,
-	    test_ql_insert,
-	    test_ql_concat_split,
-	    test_ql_rotate,
-	    test_ql_move);
+	return test(test_ql_empty, test_ql_tail_insert, test_ql_tail_remove,
+	    test_ql_head_insert, test_ql_head_remove, test_ql_insert,
+	    test_ql_concat_split, test_ql_rotate, test_ql_move);
 }
diff --git a/test/unit/qr.c b/test/unit/qr.c
index 16eed0e9..3d8b164b 100644
--- a/test/unit/qr.c
+++ b/test/unit/qr.c
@@ -26,12 +26,12 @@ init_entries(ring_t *entries) {
 
 static void
 test_independent_entries(ring_t *entries) {
-	ring_t *t;
+	ring_t  *t;
 	unsigned i, j;
 
 	for (i = 0; i < NENTRIES; i++) {
 		j = 0;
-		qr_foreach(t, &entries[i], link) {
+		qr_foreach (t, &entries[i], link) {
 			j++;
 		}
 		expect_u_eq(j, 1,
@@ -71,13 +71,13 @@ TEST_END
 
 static void
 test_entries_ring(ring_t *entries) {
-	ring_t *t;
+	ring_t  *t;
 	unsigned i, j;
 
 	for (i = 0; i < NENTRIES; i++) {
 		j = 0;
-		qr_foreach(t, &entries[i], link) {
-			expect_c_eq(t->id, entries[(i+j) % NENTRIES].id,
+		qr_foreach (t, &entries[i], link) {
+			expect_c_eq(t->id, entries[(i + j) % NENTRIES].id,
 			    "Element id mismatch");
 			j++;
 		}
@@ -85,25 +85,26 @@ test_entries_ring(ring_t *entries) {
 	for (i = 0; i < NENTRIES; i++) {
 		j = 0;
 		qr_reverse_foreach(t, &entries[i], link) {
-			expect_c_eq(t->id, entries[(NENTRIES+i-j-1) %
-			    NENTRIES].id, "Element id mismatch");
+			expect_c_eq(t->id,
+			    entries[(NENTRIES + i - j - 1) % NENTRIES].id,
+			    "Element id mismatch");
 			j++;
 		}
 	}
 	for (i = 0; i < NENTRIES; i++) {
 		t = qr_next(&entries[i], link);
-		expect_c_eq(t->id, entries[(i+1) % NENTRIES].id,
+		expect_c_eq(t->id, entries[(i + 1) % NENTRIES].id,
 		    "Element id mismatch");
 	}
 	for (i = 0; i < NENTRIES; i++) {
 		t = qr_prev(&entries[i], link);
-		expect_c_eq(t->id, entries[(NENTRIES+i-1) % NENTRIES].id,
+		expect_c_eq(t->id, entries[(NENTRIES + i - 1) % NENTRIES].id,
 		    "Element id mismatch");
 	}
 }
 
 TEST_BEGIN(test_qr_after_insert) {
-	ring_t entries[NENTRIES];
+	ring_t   entries[NENTRIES];
 	unsigned i;
 
 	init_entries(entries);
@@ -115,8 +116,8 @@ TEST_BEGIN(test_qr_after_insert) {
 TEST_END
 
 TEST_BEGIN(test_qr_remove) {
-	ring_t entries[NENTRIES];
-	ring_t *t;
+	ring_t   entries[NENTRIES];
+	ring_t  *t;
 	unsigned i, j;
 
 	init_entries(entries);
@@ -126,15 +127,15 @@ TEST_BEGIN(test_qr_remove) {
 
 	for (i = 0; i < NENTRIES; i++) {
 		j = 0;
-		qr_foreach(t, &entries[i], link) {
-			expect_c_eq(t->id, entries[i+j].id,
-			    "Element id mismatch");
+		qr_foreach (t, &entries[i], link) {
+			expect_c_eq(
+			    t->id, entries[i + j].id, "Element id mismatch");
 			j++;
 		}
 		j = 0;
 		qr_reverse_foreach(t, &entries[i], link) {
 			expect_c_eq(t->id, entries[NENTRIES - 1 - j].id,
-			"Element id mismatch");
+			    "Element id mismatch");
 			j++;
 		}
 		qr_remove(&entries[i], link);
@@ -144,8 +145,8 @@ TEST_BEGIN(test_qr_remove) {
 TEST_END
 
 TEST_BEGIN(test_qr_before_insert) {
-	ring_t entries[NENTRIES];
-	ring_t *t;
+	ring_t   entries[NENTRIES];
+	ring_t  *t;
 	unsigned i, j;
 
 	init_entries(entries);
@@ -154,28 +155,29 @@ TEST_BEGIN(test_qr_before_insert) {
 	}
 	for (i = 0; i < NENTRIES; i++) {
 		j = 0;
-		qr_foreach(t, &entries[i], link) {
-			expect_c_eq(t->id, entries[(NENTRIES+i-j) %
-			    NENTRIES].id, "Element id mismatch");
+		qr_foreach (t, &entries[i], link) {
+			expect_c_eq(t->id,
+			    entries[(NENTRIES + i - j) % NENTRIES].id,
+			    "Element id mismatch");
 			j++;
 		}
 	}
 	for (i = 0; i < NENTRIES; i++) {
 		j = 0;
 		qr_reverse_foreach(t, &entries[i], link) {
-			expect_c_eq(t->id, entries[(i+j+1) % NENTRIES].id,
+			expect_c_eq(t->id, entries[(i + j + 1) % NENTRIES].id,
 			    "Element id mismatch");
 			j++;
 		}
 	}
 	for (i = 0; i < NENTRIES; i++) {
 		t = qr_next(&entries[i], link);
-		expect_c_eq(t->id, entries[(NENTRIES+i-1) % NENTRIES].id,
+		expect_c_eq(t->id, entries[(NENTRIES + i - 1) % NENTRIES].id,
 		    "Element id mismatch");
 	}
 	for (i = 0; i < NENTRIES; i++) {
 		t = qr_prev(&entries[i], link);
-		expect_c_eq(t->id, entries[(i+1) % NENTRIES].id,
+		expect_c_eq(t->id, entries[(i + 1) % NENTRIES].id,
 		    "Element id mismatch");
 	}
 }
@@ -183,19 +185,22 @@ TEST_END
 
 static void
 test_split_entries(ring_t *entries) {
-	ring_t *t;
+	ring_t  *t;
 	unsigned i, j;
 
 	for (i = 0; i < NENTRIES; i++) {
 		j = 0;
-		qr_foreach(t, &entries[i], link) {
+		qr_foreach (t, &entries[i], link) {
 			if (i < SPLIT_INDEX) {
 				expect_c_eq(t->id,
-				    entries[(i+j) % SPLIT_INDEX].id,
+				    entries[(i + j) % SPLIT_INDEX].id,
 				    "Element id mismatch");
 			} else {
-				expect_c_eq(t->id, entries[(i+j-SPLIT_INDEX) %
-				    (NENTRIES-SPLIT_INDEX) + SPLIT_INDEX].id,
+				expect_c_eq(t->id,
+				    entries[(i + j - SPLIT_INDEX)
+				            % (NENTRIES - SPLIT_INDEX)
+				        + SPLIT_INDEX]
+				        .id,
 				    "Element id mismatch");
 			}
 			j++;
@@ -204,7 +209,7 @@ test_split_entries(ring_t *entries) {
 }
 
 TEST_BEGIN(test_qr_meld_split) {
-	ring_t entries[NENTRIES];
+	ring_t   entries[NENTRIES];
 	unsigned i;
 
 	init_entries(entries);
@@ -234,10 +239,6 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_qr_one,
-	    test_qr_after_insert,
-	    test_qr_remove,
-	    test_qr_before_insert,
-	    test_qr_meld_split);
+	return test(test_qr_one, test_qr_after_insert, test_qr_remove,
+	    test_qr_before_insert, test_qr_meld_split);
 }
diff --git a/test/unit/rb.c b/test/unit/rb.c
index 827ec510..790593e3 100644
--- a/test/unit/rb.c
+++ b/test/unit/rb.c
@@ -4,16 +4,17 @@
 
 #include "jemalloc/internal/rb.h"
 
-#define rbtn_black_height(a_type, a_field, a_rbt, r_height) do {	\
-	a_type *rbp_bh_t;						\
-	for (rbp_bh_t = (a_rbt)->rbt_root, (r_height) = 0; rbp_bh_t !=	\
-	    NULL; rbp_bh_t = rbtn_left_get(a_type, a_field,		\
-	    rbp_bh_t)) {						\
-		if (!rbtn_red_get(a_type, a_field, rbp_bh_t)) {		\
-		(r_height)++;						\
-		}							\
-	}								\
-} while (0)
+#define rbtn_black_height(a_type, a_field, a_rbt, r_height)                    \
+	do {                                                                   \
+		a_type *rbp_bh_t;                                              \
+		for (rbp_bh_t = (a_rbt)->rbt_root, (r_height) = 0;             \
+		     rbp_bh_t != NULL;                                         \
+		     rbp_bh_t = rbtn_left_get(a_type, a_field, rbp_bh_t)) {    \
+			if (!rbtn_red_get(a_type, a_field, rbp_bh_t)) {        \
+				(r_height)++;                                  \
+			}                                                      \
+		}                                                              \
+	} while (0)
 
 static bool summarize_always_returns_true = false;
 
@@ -55,7 +56,7 @@ struct node_s {
 	 */
 	const node_t *summary_lchild;
 	const node_t *summary_rchild;
-	uint64_t summary_max_specialness;
+	uint64_t      summary_max_specialness;
 };
 
 static int
@@ -80,8 +81,8 @@ node_cmp(const node_t *a, const node_t *b) {
 }
 
 static uint64_t
-node_subtree_specialness(node_t *n, const node_t *lchild,
-    const node_t *rchild) {
+node_subtree_specialness(
+    node_t *n, const node_t *lchild, const node_t *rchild) {
 	uint64_t subtree_specialness = n->specialness;
 	if (lchild != NULL
 	    && lchild->summary_max_specialness > subtree_specialness) {
@@ -109,8 +110,8 @@ node_summarize(node_t *a, const node_t *lchild, const node_t *rchild) {
 
 typedef rb_tree(node_t) tree_t;
 rb_summarized_proto(static, tree_, tree_t, node_t);
-rb_summarized_gen(static, tree_, tree_t, node_t, link, node_cmp,
-    node_summarize);
+rb_summarized_gen(
+    static, tree_, tree_t, node_t, link, node_cmp, node_summarize);
 
 static bool
 specialness_filter_node(void *ctx, node_t *node) {
@@ -127,24 +128,24 @@ specialness_filter_subtree(void *ctx, node_t *node) {
 static node_t *
 tree_iterate_cb(tree_t *tree, node_t *node, void *data) {
 	unsigned *i = (unsigned *)data;
-	node_t *search_node;
+	node_t   *search_node;
 
 	expect_u32_eq(node->magic, NODE_MAGIC, "Bad magic");
 
 	/* Test rb_search(). */
 	search_node = tree_search(tree, node);
-	expect_ptr_eq(search_node, node,
-	    "tree_search() returned unexpected node");
+	expect_ptr_eq(
+	    search_node, node, "tree_search() returned unexpected node");
 
 	/* Test rb_nsearch(). */
 	search_node = tree_nsearch(tree, node);
-	expect_ptr_eq(search_node, node,
-	    "tree_nsearch() returned unexpected node");
+	expect_ptr_eq(
+	    search_node, node, "tree_nsearch() returned unexpected node");
 
 	/* Test rb_psearch(). */
 	search_node = tree_psearch(tree, node);
-	expect_ptr_eq(search_node, node,
-	    "tree_psearch() returned unexpected node");
+	expect_ptr_eq(
+	    search_node, node, "tree_psearch() returned unexpected node");
 
 	(*i)++;
 
@@ -174,38 +175,44 @@ TEST_BEGIN(test_rb_empty) {
 	expect_ptr_null(tree_psearch(&tree, &key), "Unexpected node");
 
 	unsigned nodes = 0;
-	tree_iter_filtered(&tree, NULL, &tree_iterate_cb,
-	    &nodes, &specialness_filter_node, &specialness_filter_subtree,
-	    NULL);
+	tree_iter_filtered(&tree, NULL, &tree_iterate_cb, &nodes,
+	    &specialness_filter_node, &specialness_filter_subtree, NULL);
 	expect_u_eq(0, nodes, "");
 
 	nodes = 0;
-	tree_reverse_iter_filtered(&tree, NULL, &tree_iterate_cb,
-	    &nodes, &specialness_filter_node, &specialness_filter_subtree,
-	    NULL);
+	tree_reverse_iter_filtered(&tree, NULL, &tree_iterate_cb, &nodes,
+	    &specialness_filter_node, &specialness_filter_subtree, NULL);
 	expect_u_eq(0, nodes, "");
 
 	expect_ptr_null(tree_first_filtered(&tree, &specialness_filter_node,
-	    &specialness_filter_subtree, NULL), "");
+	                    &specialness_filter_subtree, NULL),
+	    "");
 	expect_ptr_null(tree_last_filtered(&tree, &specialness_filter_node,
-	    &specialness_filter_subtree, NULL), "");
+	                    &specialness_filter_subtree, NULL),
+	    "");
 
 	key.key = 0;
 	key.magic = NODE_MAGIC;
-	expect_ptr_null(tree_search_filtered(&tree, &key,
-	    &specialness_filter_node, &specialness_filter_subtree, NULL), "");
-	expect_ptr_null(tree_nsearch_filtered(&tree, &key,
-	    &specialness_filter_node, &specialness_filter_subtree, NULL), "");
-	expect_ptr_null(tree_psearch_filtered(&tree, &key,
-	    &specialness_filter_node, &specialness_filter_subtree, NULL), "");
+	expect_ptr_null(
+	    tree_search_filtered(&tree, &key, &specialness_filter_node,
+	        &specialness_filter_subtree, NULL),
+	    "");
+	expect_ptr_null(
+	    tree_nsearch_filtered(&tree, &key, &specialness_filter_node,
+	        &specialness_filter_subtree, NULL),
+	    "");
+	expect_ptr_null(
+	    tree_psearch_filtered(&tree, &key, &specialness_filter_node,
+	        &specialness_filter_subtree, NULL),
+	    "");
 }
 TEST_END
 
 static unsigned
 tree_recurse(node_t *node, unsigned black_height, unsigned black_depth) {
 	unsigned ret = 0;
-	node_t *left_node;
-	node_t *right_node;
+	node_t  *left_node;
+	node_t  *right_node;
 
 	if (node == NULL) {
 		return ret;
@@ -214,13 +221,13 @@ tree_recurse(node_t *node, unsigned black_height, unsigned black_depth) {
 	left_node = rbtn_left_get(node_t, link, node);
 	right_node = rbtn_right_get(node_t, link, node);
 
-	expect_ptr_eq(left_node, node->summary_lchild,
-	    "summary missed a tree update");
-	expect_ptr_eq(right_node, node->summary_rchild,
-	    "summary missed a tree update");
+	expect_ptr_eq(
+	    left_node, node->summary_lchild, "summary missed a tree update");
+	expect_ptr_eq(
+	    right_node, node->summary_rchild, "summary missed a tree update");
 
-	uint64_t expected_subtree_specialness = node_subtree_specialness(node,
-	    left_node, right_node);
+	uint64_t expected_subtree_specialness = node_subtree_specialness(
+	    node, left_node, right_node);
 	expect_u64_eq(expected_subtree_specialness,
 	    node->summary_max_specialness, "Incorrect summary");
 
@@ -232,7 +239,7 @@ tree_recurse(node_t *node, unsigned black_height, unsigned black_depth) {
 	if (rbtn_red_get(node_t, link, node)) {
 		if (left_node != NULL) {
 			expect_false(rbtn_red_get(node_t, link, left_node),
-				"Node should be black");
+			    "Node should be black");
 		}
 		if (right_node != NULL) {
 			expect_false(rbtn_red_get(node_t, link, right_node),
@@ -282,7 +289,7 @@ tree_iterate_reverse(tree_t *tree) {
 
 static void
 node_remove(tree_t *tree, node_t *node, unsigned nnodes) {
-	node_t *search_node;
+	node_t  *search_node;
 	unsigned black_height, imbalances;
 
 	tree_remove(tree, node);
@@ -290,15 +297,15 @@ node_remove(tree_t *tree, node_t *node, unsigned nnodes) {
 	/* Test rb_nsearch(). */
 	search_node = tree_nsearch(tree, node);
 	if (search_node != NULL) {
-		expect_u64_ge(search_node->key, node->key,
-		    "Key ordering error");
+		expect_u64_ge(
+		    search_node->key, node->key, "Key ordering error");
 	}
 
 	/* Test rb_psearch(). */
 	search_node = tree_psearch(tree, node);
 	if (search_node != NULL) {
-		expect_u64_le(search_node->key, node->key,
-		    "Key ordering error");
+		expect_u64_le(
+		    search_node->key, node->key, "Key ordering error");
 	}
 
 	node->magic = 0;
@@ -306,16 +313,16 @@ node_remove(tree_t *tree, node_t *node, unsigned nnodes) {
 	rbtn_black_height(node_t, link, tree, black_height);
 	imbalances = tree_recurse(tree->rbt_root, black_height, 0);
 	expect_u_eq(imbalances, 0, "Tree is unbalanced");
-	expect_u_eq(tree_iterate(tree), nnodes-1,
-	    "Unexpected node iteration count");
-	expect_u_eq(tree_iterate_reverse(tree), nnodes-1,
+	expect_u_eq(
+	    tree_iterate(tree), nnodes - 1, "Unexpected node iteration count");
+	expect_u_eq(tree_iterate_reverse(tree), nnodes - 1,
 	    "Unexpected node iteration count");
 }
 
 static node_t *
 remove_iterate_cb(tree_t *tree, node_t *node, void *data) {
 	unsigned *nnodes = (unsigned *)data;
-	node_t *ret = tree_next(tree, node);
+	node_t   *ret = tree_next(tree, node);
 
 	node_remove(tree, node, *nnodes);
 
@@ -325,7 +332,7 @@ remove_iterate_cb(tree_t *tree, node_t *node, void *data) {
 static node_t *
 remove_reverse_iterate_cb(tree_t *tree, node_t *node, void *data) {
 	unsigned *nnodes = (unsigned *)data;
-	node_t *ret = tree_prev(tree, node);
+	node_t   *ret = tree_prev(tree, node);
 
 	node_remove(tree, node, *nnodes);
 
@@ -341,15 +348,11 @@ destroy_cb(node_t *node, void *data) {
 }
 
 TEST_BEGIN(test_rb_random) {
-	enum {
-		NNODES = 25,
-		NBAGS = 500,
-		SEED = 42
-	};
-	sfmt_t *sfmt;
+	enum { NNODES = 25, NBAGS = 500, SEED = 42 };
+	sfmt_t  *sfmt;
 	uint64_t bag[NNODES];
-	tree_t tree;
-	node_t nodes[NNODES];
+	tree_t   tree;
+	node_t   nodes[NNODES];
 	unsigned i, j, k, black_height, imbalances;
 
 	sfmt = init_gen_rand(SEED);
@@ -386,8 +389,8 @@ TEST_BEGIN(test_rb_random) {
 			for (k = 0; k < j; k++) {
 				nodes[k].magic = NODE_MAGIC;
 				nodes[k].key = bag[k];
-				nodes[k].specialness = gen_rand64_range(sfmt,
-				    NNODES);
+				nodes[k].specialness = gen_rand64_range(
+				    sfmt, NNODES);
 				nodes[k].mid_remove = false;
 				nodes[k].allow_duplicates = false;
 				nodes[k].summary_lchild = NULL;
@@ -399,16 +402,16 @@ TEST_BEGIN(test_rb_random) {
 			for (k = 0; k < j; k++) {
 				tree_insert(&tree, &nodes[k]);
 
-				rbtn_black_height(node_t, link, &tree,
-				    black_height);
-				imbalances = tree_recurse(tree.rbt_root,
-				    black_height, 0);
-				expect_u_eq(imbalances, 0,
-				    "Tree is unbalanced");
+				rbtn_black_height(
+				    node_t, link, &tree, black_height);
+				imbalances = tree_recurse(
+				    tree.rbt_root, black_height, 0);
+				expect_u_eq(
+				    imbalances, 0, "Tree is unbalanced");
 
-				expect_u_eq(tree_iterate(&tree), k+1,
+				expect_u_eq(tree_iterate(&tree), k + 1,
 				    "Unexpected node iteration count");
-				expect_u_eq(tree_iterate_reverse(&tree), k+1,
+				expect_u_eq(tree_iterate_reverse(&tree), k + 1,
 				    "Unexpected node iteration count");
 
 				expect_false(tree_empty(&tree),
@@ -431,11 +434,11 @@ TEST_BEGIN(test_rb_random) {
 				break;
 			case 1:
 				for (k = j; k > 0; k--) {
-					node_remove(&tree, &nodes[k-1], k);
+					node_remove(&tree, &nodes[k - 1], k);
 				}
 				break;
 			case 2: {
-				node_t *start;
+				node_t  *start;
 				unsigned nnodes = j;
 
 				start = NULL;
@@ -444,11 +447,12 @@ TEST_BEGIN(test_rb_random) {
 					    remove_iterate_cb, (void *)&nnodes);
 					nnodes--;
 				} while (start != NULL);
-				expect_u_eq(nnodes, 0,
-				    "Removal terminated early");
+				expect_u_eq(
+				    nnodes, 0, "Removal terminated early");
 				break;
-			} case 3: {
-				node_t *start;
+			}
+			case 3: {
+				node_t  *start;
 				unsigned nnodes = j;
 
 				start = NULL;
@@ -458,16 +462,18 @@ TEST_BEGIN(test_rb_random) {
 					    (void *)&nnodes);
 					nnodes--;
 				} while (start != NULL);
-				expect_u_eq(nnodes, 0,
-				    "Removal terminated early");
+				expect_u_eq(
+				    nnodes, 0, "Removal terminated early");
 				break;
-			} case 4: {
+			}
+			case 4: {
 				unsigned nnodes = j;
 				tree_destroy(&tree, destroy_cb, &nnodes);
-				expect_u_eq(nnodes, 0,
-				    "Destruction terminated early");
+				expect_u_eq(
+				    nnodes, 0, "Destruction terminated early");
 				break;
-			} default:
+			}
+			default:
 				not_reached();
 			}
 		}
@@ -479,7 +485,7 @@ TEST_END
 static void
 expect_simple_consistency(tree_t *tree, uint64_t specialness,
     bool expected_empty, node_t *expected_first, node_t *expected_last) {
-	bool empty;
+	bool    empty;
 	node_t *first;
 	node_t *last;
 
@@ -487,19 +493,17 @@ expect_simple_consistency(tree_t *tree, uint64_t specialness,
 	    &specialness_filter_subtree, &specialness);
 	expect_b_eq(expected_empty, empty, "");
 
-	first = tree_first_filtered(tree,
-	    &specialness_filter_node, &specialness_filter_subtree,
-	    (void *)&specialness);
+	first = tree_first_filtered(tree, &specialness_filter_node,
+	    &specialness_filter_subtree, (void *)&specialness);
 	expect_ptr_eq(expected_first, first, "");
 
-	last = tree_last_filtered(tree,
-	    &specialness_filter_node, &specialness_filter_subtree,
-	    (void *)&specialness);
+	last = tree_last_filtered(tree, &specialness_filter_node,
+	    &specialness_filter_subtree, (void *)&specialness);
 	expect_ptr_eq(expected_last, last, "");
 }
 
 TEST_BEGIN(test_rb_filter_simple) {
-	enum {FILTER_NODES = 10};
+	enum { FILTER_NODES = 10 };
 	node_t nodes[FILTER_NODES];
 	for (unsigned i = 0; i < FILTER_NODES; i++) {
 		nodes[i].magic = NODE_MAGIC;
@@ -583,10 +587,10 @@ TEST_END
 
 typedef struct iter_ctx_s iter_ctx_t;
 struct iter_ctx_s {
-	int ncalls;
+	int     ncalls;
 	node_t *last_node;
 
-	int ncalls_max;
+	int  ncalls_max;
 	bool forward;
 };
 
@@ -624,8 +628,8 @@ static void
 check_consistency(tree_t *tree, node_t nodes[UPDATE_TEST_MAX], int nnodes) {
 	uint64_t specialness = 1;
 
-	bool empty;
-	bool real_empty = true;
+	bool    empty;
+	bool    real_empty = true;
 	node_t *first;
 	node_t *real_first = NULL;
 	node_t *last;
@@ -667,12 +671,14 @@ check_consistency(tree_t *tree, node_t nodes[UPDATE_TEST_MAX], int nnodes) {
 			}
 			if (node_cmp(&nodes[j], &nodes[i]) < 0
 			    && (real_prev_filtered == NULL
-			    || node_cmp(&nodes[j], real_prev_filtered) > 0)) {
+			        || node_cmp(&nodes[j], real_prev_filtered)
+			            > 0)) {
 				real_prev_filtered = &nodes[j];
 			}
 			if (node_cmp(&nodes[j], &nodes[i]) > 0
 			    && (real_next_filtered == NULL
-			    || node_cmp(&nodes[j], real_next_filtered) < 0)) {
+			        || node_cmp(&nodes[j], real_next_filtered)
+			            < 0)) {
 				real_next_filtered = &nodes[j];
 			}
 		}
@@ -707,8 +713,9 @@ check_consistency(tree_t *tree, node_t nodes[UPDATE_TEST_MAX], int nnodes) {
 		    &specialness);
 		expect_ptr_eq(real_search_filtered, search_filtered, "");
 
-		real_nsearch_filtered = (nodes[i].specialness >= specialness ?
-		    &nodes[i] : real_next_filtered);
+		real_nsearch_filtered = (nodes[i].specialness >= specialness
+		        ? &nodes[i]
+		        : real_next_filtered);
 		nsearch_filtered = tree_nsearch_filtered(tree, &before,
 		    &specialness_filter_node, &specialness_filter_subtree,
 		    &specialness);
@@ -721,22 +728,25 @@ check_consistency(tree_t *tree, node_t nodes[UPDATE_TEST_MAX], int nnodes) {
 		expect_ptr_eq(real_psearch_filtered, psearch_filtered, "");
 
 		/* search, nsearch, psearch from nodes[i] */
-		real_search_filtered = (nodes[i].specialness >= specialness ?
-		    &nodes[i] : NULL);
+		real_search_filtered = (nodes[i].specialness >= specialness
+		        ? &nodes[i]
+		        : NULL);
 		search_filtered = tree_search_filtered(tree, &nodes[i],
 		    &specialness_filter_node, &specialness_filter_subtree,
 		    &specialness);
 		expect_ptr_eq(real_search_filtered, search_filtered, "");
 
-		real_nsearch_filtered = (nodes[i].specialness >= specialness ?
-		    &nodes[i] : real_next_filtered);
+		real_nsearch_filtered = (nodes[i].specialness >= specialness
+		        ? &nodes[i]
+		        : real_next_filtered);
 		nsearch_filtered = tree_nsearch_filtered(tree, &nodes[i],
 		    &specialness_filter_node, &specialness_filter_subtree,
 		    &specialness);
 		expect_ptr_eq(real_nsearch_filtered, nsearch_filtered, "");
 
-		real_psearch_filtered = (nodes[i].specialness >= specialness ?
-		    &nodes[i] : real_prev_filtered);
+		real_psearch_filtered = (nodes[i].specialness >= specialness
+		        ? &nodes[i]
+		        : real_prev_filtered);
 		psearch_filtered = tree_psearch_filtered(tree, &nodes[i],
 		    &specialness_filter_node, &specialness_filter_subtree,
 		    &specialness);
@@ -750,22 +760,25 @@ check_consistency(tree_t *tree, node_t nodes[UPDATE_TEST_MAX], int nnodes) {
 		equiv.magic = NODE_MAGIC;
 		equiv.key = nodes[i].key;
 		equiv.allow_duplicates = true;
-		real_search_filtered = (nodes[i].specialness >= specialness ?
-		    &nodes[i] : NULL);
+		real_search_filtered = (nodes[i].specialness >= specialness
+		        ? &nodes[i]
+		        : NULL);
 		search_filtered = tree_search_filtered(tree, &equiv,
 		    &specialness_filter_node, &specialness_filter_subtree,
 		    &specialness);
 		expect_ptr_eq(real_search_filtered, search_filtered, "");
 
-		real_nsearch_filtered = (nodes[i].specialness >= specialness ?
-		    &nodes[i] : real_next_filtered);
+		real_nsearch_filtered = (nodes[i].specialness >= specialness
+		        ? &nodes[i]
+		        : real_next_filtered);
 		nsearch_filtered = tree_nsearch_filtered(tree, &equiv,
 		    &specialness_filter_node, &specialness_filter_subtree,
 		    &specialness);
 		expect_ptr_eq(real_nsearch_filtered, nsearch_filtered, "");
 
-		real_psearch_filtered = (nodes[i].specialness >= specialness ?
-		    &nodes[i] : real_prev_filtered);
+		real_psearch_filtered = (nodes[i].specialness >= specialness
+		        ? &nodes[i]
+		        : real_prev_filtered);
 		psearch_filtered = tree_psearch_filtered(tree, &equiv,
 		    &specialness_filter_node, &specialness_filter_subtree,
 		    &specialness);
@@ -791,8 +804,9 @@ check_consistency(tree_t *tree, node_t nodes[UPDATE_TEST_MAX], int nnodes) {
 		    &specialness);
 		expect_ptr_eq(real_nsearch_filtered, nsearch_filtered, "");
 
-		real_psearch_filtered = (nodes[i].specialness >= specialness ?
-		    &nodes[i] : real_prev_filtered);
+		real_psearch_filtered = (nodes[i].specialness >= specialness
+		        ? &nodes[i]
+		        : real_prev_filtered);
 		psearch_filtered = tree_psearch_filtered(tree, &after,
 		    &specialness_filter_node, &specialness_filter_subtree,
 		    &specialness);
@@ -800,7 +814,7 @@ check_consistency(tree_t *tree, node_t nodes[UPDATE_TEST_MAX], int nnodes) {
 	}
 
 	/* Filtered iteration test setup. */
-	int nspecial = 0;
+	int     nspecial = 0;
 	node_t *sorted_nodes[UPDATE_TEST_MAX];
 	node_t *sorted_filtered_nodes[UPDATE_TEST_MAX];
 	for (int i = 0; i < nnodes; i++) {
@@ -862,8 +876,9 @@ check_consistency(tree_t *tree, node_t nodes[UPDATE_TEST_MAX], int nnodes) {
 			    &specialness_filter_node,
 			    &specialness_filter_subtree, &specialness);
 			expect_d_eq(j + 1, ctx.ncalls, "");
-			expect_ptr_eq(sorted_filtered_nodes[
-			    nodes[i].filtered_rank + j], iter_result, "");
+			expect_ptr_eq(
+			    sorted_filtered_nodes[nodes[i].filtered_rank + j],
+			    iter_result, "");
 		}
 	}
 
@@ -888,8 +903,8 @@ check_consistency(tree_t *tree, node_t nodes[UPDATE_TEST_MAX], int nnodes) {
 		    &specialness_filter_subtree, &specialness);
 		expect_ptr_null(iter_result, "");
 		int surplus_rank = (nodes[i].specialness >= 1 ? 1 : 0);
-		expect_d_eq(nodes[i].filtered_rank + surplus_rank, ctx.ncalls,
-		    "");
+		expect_d_eq(
+		    nodes[i].filtered_rank + surplus_rank, ctx.ncalls, "");
 	}
 	/* Filtered backward iteration from the end, with stopping */
 	for (int i = 0; i < nspecial; i++) {
@@ -899,15 +914,15 @@ check_consistency(tree_t *tree, node_t nodes[UPDATE_TEST_MAX], int nnodes) {
 		iter_result = tree_reverse_iter_filtered(tree, NULL,
 		    &tree_iterate_filtered_cb, &ctx, &specialness_filter_node,
 		    &specialness_filter_subtree, &specialness);
-		expect_ptr_eq(sorted_filtered_nodes[nspecial - i - 1],
-		    iter_result, "");
+		expect_ptr_eq(
+		    sorted_filtered_nodes[nspecial - i - 1], iter_result, "");
 		expect_d_eq(ctx.ncalls, i + 1, "");
 	}
 	/* Filtered backward iteration from a starting point, with stopping. */
 	for (int i = 0; i < nnodes; i++) {
 		int surplus_rank = (nodes[i].specialness >= 1 ? 1 : 0);
 		for (int j = 0; j < nodes[i].filtered_rank + surplus_rank;
-		    j++) {
+		     j++) {
 			ctx.ncalls = 0;
 			ctx.last_node = NULL;
 			ctx.ncalls_max = j + 1;
@@ -916,16 +931,16 @@ check_consistency(tree_t *tree, node_t nodes[UPDATE_TEST_MAX], int nnodes) {
 			    &specialness_filter_node,
 			    &specialness_filter_subtree, &specialness);
 			expect_d_eq(j + 1, ctx.ncalls, "");
-			expect_ptr_eq(sorted_filtered_nodes[
-			    nodes[i].filtered_rank - j - 1 + surplus_rank],
+			expect_ptr_eq(
+			    sorted_filtered_nodes[nodes[i].filtered_rank - j - 1
+			        + surplus_rank],
 			    iter_result, "");
 		}
 	}
 }
 
 static void
-do_update_search_test(int nnodes, int ntrees, int nremovals,
-    int nupdates) {
+do_update_search_test(int nnodes, int ntrees, int nremovals, int nupdates) {
 	node_t nodes[UPDATE_TEST_MAX];
 	assert(nnodes <= UPDATE_TEST_MAX);
 
@@ -987,8 +1002,8 @@ rb_gen(static UNUSED, unsummarized_tree_, unsummarized_tree_t, node_t, link,
     node_cmp);
 
 static node_t *
-unsummarized_tree_iterate_cb(unsummarized_tree_t *tree, node_t *node,
-    void *data) {
+unsummarized_tree_iterate_cb(
+    unsummarized_tree_t *tree, node_t *node, void *data) {
 	unsigned *i = (unsigned *)data;
 	(*i)++;
 	return NULL;
@@ -1002,18 +1017,14 @@ TEST_BEGIN(test_rb_unsummarized) {
 	unsummarized_tree_t tree;
 	unsummarized_tree_new(&tree);
 	unsigned nnodes = 0;
-	unsummarized_tree_iter(&tree, NULL, &unsummarized_tree_iterate_cb,
-	    &nnodes);
+	unsummarized_tree_iter(
+	    &tree, NULL, &unsummarized_tree_iterate_cb, &nnodes);
 	expect_u_eq(0, nnodes, "");
 }
 TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_rb_empty,
-	    test_rb_random,
-	    test_rb_filter_simple,
-	    test_rb_update_search,
-	    test_rb_unsummarized);
+	return test_no_reentrancy(test_rb_empty, test_rb_random,
+	    test_rb_filter_simple, test_rb_update_search, test_rb_unsummarized);
 }
diff --git a/test/unit/retained.c b/test/unit/retained.c
index 40cbb0cd..687701c7 100644
--- a/test/unit/retained.c
+++ b/test/unit/retained.c
@@ -3,21 +3,22 @@
 #include "jemalloc/internal/san.h"
 #include "jemalloc/internal/spin.h"
 
-static unsigned		arena_ind;
-static size_t		sz;
-static size_t		esz;
-#define NEPOCHS		8
-#define PER_THD_NALLOCS	1
-static atomic_u_t	epoch;
-static atomic_u_t	nfinished;
+static unsigned arena_ind;
+static size_t   sz;
+static size_t   esz;
+#define NEPOCHS 8
+#define PER_THD_NALLOCS 1
+static atomic_u_t epoch;
+static atomic_u_t nfinished;
 
 static unsigned
 do_arena_create(extent_hooks_t *h) {
 	unsigned new_arena_ind;
-	size_t ind_sz = sizeof(unsigned);
-	expect_d_eq(mallctl("arenas.create", (void *)&new_arena_ind, &ind_sz,
-	    (void *)(h != NULL ? &h : NULL), (h != NULL ? sizeof(h) : 0)), 0,
-	    "Unexpected mallctl() failure");
+	size_t   ind_sz = sizeof(unsigned);
+	expect_d_eq(
+	    mallctl("arenas.create", (void *)&new_arena_ind, &ind_sz,
+	        (void *)(h != NULL ? &h : NULL), (h != NULL ? sizeof(h) : 0)),
+	    0, "Unexpected mallctl() failure");
 	return new_arena_ind;
 }
 
@@ -26,7 +27,7 @@ do_arena_destroy(unsigned ind) {
 	size_t mib[3];
 	size_t miblen;
 
-	miblen = sizeof(mib)/sizeof(size_t);
+	miblen = sizeof(mib) / sizeof(size_t);
 	expect_d_eq(mallctlnametomib("arena.0.destroy", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[1] = (size_t)ind;
@@ -38,7 +39,8 @@ static void
 do_refresh(void) {
 	uint64_t refresh_epoch = 1;
 	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&refresh_epoch,
-	    sizeof(refresh_epoch)), 0, "Unexpected mallctl() failure");
+	                sizeof(refresh_epoch)),
+	    0, "Unexpected mallctl() failure");
 }
 
 static size_t
@@ -47,12 +49,12 @@ do_get_size_impl(const char *cmd, unsigned ind) {
 	size_t miblen = sizeof(mib) / sizeof(size_t);
 	size_t z = sizeof(size_t);
 
-	expect_d_eq(mallctlnametomib(cmd, mib, &miblen),
-	    0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
+	expect_d_eq(mallctlnametomib(cmd, mib, &miblen), 0,
+	    "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
 	mib[2] = ind;
 	size_t size;
-	expect_d_eq(mallctlbymib(mib, miblen, (void *)&size, &z, NULL, 0),
-	    0, "Unexpected mallctlbymib([\"%s\"], ...) failure", cmd);
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&size, &z, NULL, 0), 0,
+	    "Unexpected mallctlbymib([\"%s\"], ...) failure", cmd);
 
 	return size;
 }
@@ -72,9 +74,9 @@ thd_start(void *arg) {
 	for (unsigned next_epoch = 1; next_epoch < NEPOCHS; next_epoch++) {
 		/* Busy-wait for next epoch. */
 		unsigned cur_epoch;
-		spin_t spinner = SPIN_INITIALIZER;
-		while ((cur_epoch = atomic_load_u(&epoch, ATOMIC_ACQUIRE)) !=
-		    next_epoch) {
+		spin_t   spinner = SPIN_INITIALIZER;
+		while ((cur_epoch = atomic_load_u(&epoch, ATOMIC_ACQUIRE))
+		    != next_epoch) {
 			spin_adaptive(&spinner);
 		}
 		expect_u_eq(cur_epoch, next_epoch, "Unexpected epoch");
@@ -84,11 +86,10 @@ thd_start(void *arg) {
 		 * no need to deallocate.
 		 */
 		for (unsigned i = 0; i < PER_THD_NALLOCS; i++) {
-			void *p = mallocx(sz, MALLOCX_ARENA(arena_ind) |
-			    MALLOCX_TCACHE_NONE
-			    );
-			expect_ptr_not_null(p,
-			    "Unexpected mallocx() failure\n");
+			void *p = mallocx(
+			    sz, MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE);
+			expect_ptr_not_null(
+			    p, "Unexpected mallocx() failure\n");
 		}
 
 		/* Let the main thread know we've finished this iteration. */
@@ -142,17 +143,17 @@ TEST_BEGIN(test_retained) {
 		 */
 		do_refresh();
 
-		size_t allocated = (esz - guard_sz) * nthreads *
-		    PER_THD_NALLOCS;
+		size_t allocated = (esz - guard_sz) * nthreads
+		    * PER_THD_NALLOCS;
 		size_t active = do_get_active(arena_ind);
 		expect_zu_le(allocated, active, "Unexpected active memory");
 		size_t mapped = do_get_mapped(arena_ind);
 		expect_zu_le(active, mapped, "Unexpected mapped memory");
 
 		arena_t *arena = arena_get(tsdn_fetch(), arena_ind, false);
-		size_t usable = 0;
-		for (pszind_t pind = sz_psz2ind(HUGEPAGE); pind <
-		    arena->pa_shard.pac.exp_grow.next; pind++) {
+		size_t   usable = 0;
+		for (pszind_t pind = sz_psz2ind(HUGEPAGE);
+		     pind < arena->pa_shard.pac.exp_grow.next; pind++) {
 			size_t psz = sz_pind2sz(pind);
 			size_t psz_fragmented = psz % esz;
 			size_t psz_usable = psz - psz_fragmented;
@@ -162,8 +163,8 @@ TEST_BEGIN(test_retained) {
 			if (psz_usable > 0) {
 				expect_zu_lt(usable, allocated,
 				    "Excessive retained memory "
-				    "(%#zx[+%#zx] > %#zx)", usable, psz_usable,
-				    allocated);
+				    "(%#zx[+%#zx] > %#zx)",
+				    usable, psz_usable, allocated);
 				usable += psz_usable;
 			}
 		}
@@ -174,8 +175,8 @@ TEST_BEGIN(test_retained) {
 		 * (rather than retaining) during reset.
 		 */
 		do_arena_destroy(arena_ind);
-		expect_u_eq(do_arena_create(NULL), arena_ind,
-		    "Unexpected arena index");
+		expect_u_eq(
+		    do_arena_create(NULL), arena_ind, "Unexpected arena index");
 	}
 
 	for (unsigned i = 0; i < nthreads; i++) {
@@ -188,6 +189,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_retained);
+	return test(test_retained);
 }
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index 4101b72b..284c3eae 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -16,14 +16,15 @@ TEST_BEGIN(test_rtree_read_empty) {
 	    /* metadata_use_hooks */ true);
 	expect_ptr_not_null(base, "Unexpected base_new failure");
 
-	rtree_t *rtree = &test_rtree;
+	rtree_t    *rtree = &test_rtree;
 	rtree_ctx_t rtree_ctx;
 	rtree_ctx_data_init(&rtree_ctx);
-	expect_false(rtree_new(rtree, base, false),
-	    "Unexpected rtree_new() failure");
+	expect_false(
+	    rtree_new(rtree, base, false), "Unexpected rtree_new() failure");
 	rtree_contents_t contents;
-	expect_true(rtree_read_independent(tsdn, rtree, &rtree_ctx, PAGE,
-	    &contents), "rtree_read_independent() should fail on empty rtree.");
+	expect_true(
+	    rtree_read_independent(tsdn, rtree, &rtree_ctx, PAGE, &contents),
+	    "rtree_read_independent() should fail on empty rtree.");
 
 	base_delete(tsdn, base);
 }
@@ -45,9 +46,9 @@ TEST_BEGIN(test_rtree_extrema) {
 	edata_t *edata_a, *edata_b;
 	edata_a = alloc_edata();
 	edata_b = alloc_edata();
-	edata_init(edata_a, INVALID_ARENA_IND, NULL, SC_LARGE_MINCLASS,
-	    false, sz_size2index(SC_LARGE_MINCLASS), 0,
-	    extent_state_active, false, false, EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
+	edata_init(edata_a, INVALID_ARENA_IND, NULL, SC_LARGE_MINCLASS, false,
+	    sz_size2index(SC_LARGE_MINCLASS), 0, extent_state_active, false,
+	    false, EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
 	edata_init(edata_b, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
 	    extent_state_active, false, false, EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
 
@@ -57,11 +58,11 @@ TEST_BEGIN(test_rtree_extrema) {
 	    /* metadata_use_hooks */ true);
 	expect_ptr_not_null(base, "Unexpected base_new failure");
 
-	rtree_t *rtree = &test_rtree;
+	rtree_t    *rtree = &test_rtree;
 	rtree_ctx_t rtree_ctx;
 	rtree_ctx_data_init(&rtree_ctx);
-	expect_false(rtree_new(rtree, base, false),
-	    "Unexpected rtree_new() failure");
+	expect_false(
+	    rtree_new(rtree, base, false), "Unexpected rtree_new() failure");
 
 	rtree_contents_t contents_a;
 	contents_a.edata = edata_a;
@@ -73,13 +74,14 @@ TEST_BEGIN(test_rtree_extrema) {
 	    "Unexpected rtree_write() failure");
 	expect_false(rtree_write(tsdn, rtree, &rtree_ctx, PAGE, contents_a),
 	    "Unexpected rtree_write() failure");
-	rtree_contents_t read_contents_a = rtree_read(tsdn, rtree, &rtree_ctx,
-	    PAGE);
+	rtree_contents_t read_contents_a = rtree_read(
+	    tsdn, rtree, &rtree_ctx, PAGE);
 	expect_true(contents_a.edata == read_contents_a.edata
-	    && contents_a.metadata.szind == read_contents_a.metadata.szind
-	    && contents_a.metadata.slab == read_contents_a.metadata.slab
-	    && contents_a.metadata.is_head == read_contents_a.metadata.is_head
-	    && contents_a.metadata.state == read_contents_a.metadata.state,
+	        && contents_a.metadata.szind == read_contents_a.metadata.szind
+	        && contents_a.metadata.slab == read_contents_a.metadata.slab
+	        && contents_a.metadata.is_head
+	            == read_contents_a.metadata.is_head
+	        && contents_a.metadata.state == read_contents_a.metadata.state,
 	    "rtree_read() should return previously set value");
 
 	rtree_contents_t contents_b;
@@ -88,15 +90,17 @@ TEST_BEGIN(test_rtree_extrema) {
 	contents_b.metadata.slab = edata_slab_get(edata_b);
 	contents_b.metadata.is_head = edata_is_head_get(edata_b);
 	contents_b.metadata.state = edata_state_get(edata_b);
-	expect_false(rtree_write(tsdn, rtree, &rtree_ctx, ~((uintptr_t)0),
-	    contents_b), "Unexpected rtree_write() failure");
-	rtree_contents_t read_contents_b = rtree_read(tsdn, rtree, &rtree_ctx,
-	    ~((uintptr_t)0));
+	expect_false(
+	    rtree_write(tsdn, rtree, &rtree_ctx, ~((uintptr_t)0), contents_b),
+	    "Unexpected rtree_write() failure");
+	rtree_contents_t read_contents_b = rtree_read(
+	    tsdn, rtree, &rtree_ctx, ~((uintptr_t)0));
 	assert_true(contents_b.edata == read_contents_b.edata
-	    && contents_b.metadata.szind == read_contents_b.metadata.szind
-	    && contents_b.metadata.slab == read_contents_b.metadata.slab
-	    && contents_b.metadata.is_head == read_contents_b.metadata.is_head
-	    && contents_b.metadata.state == read_contents_b.metadata.state,
+	        && contents_b.metadata.szind == read_contents_b.metadata.szind
+	        && contents_b.metadata.slab == read_contents_b.metadata.slab
+	        && contents_b.metadata.is_head
+	            == read_contents_b.metadata.is_head
+	        && contents_b.metadata.state == read_contents_b.metadata.state,
 	    "rtree_read() should return previously set value");
 
 	base_delete(tsdn, base);
@@ -109,19 +113,19 @@ TEST_BEGIN(test_rtree_bits) {
 	    /* metadata_use_hooks */ true);
 	expect_ptr_not_null(base, "Unexpected base_new failure");
 
-	uintptr_t keys[] = {PAGE, PAGE + 1,
-	    PAGE + (((uintptr_t)1) << LG_PAGE) - 1};
+	uintptr_t keys[] = {
+	    PAGE, PAGE + 1, PAGE + (((uintptr_t)1) << LG_PAGE) - 1};
 	edata_t *edata_c = alloc_edata();
 	edata_init(edata_c, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
 	    extent_state_active, false, false, EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
 
-	rtree_t *rtree = &test_rtree;
+	rtree_t    *rtree = &test_rtree;
 	rtree_ctx_t rtree_ctx;
 	rtree_ctx_data_init(&rtree_ctx);
-	expect_false(rtree_new(rtree, base, false),
-	    "Unexpected rtree_new() failure");
+	expect_false(
+	    rtree_new(rtree, base, false), "Unexpected rtree_new() failure");
 
-	for (unsigned i = 0; i < sizeof(keys)/sizeof(uintptr_t); i++) {
+	for (unsigned i = 0; i < sizeof(keys) / sizeof(uintptr_t); i++) {
 		rtree_contents_t contents;
 		contents.edata = edata_c;
 		contents.metadata.szind = SC_NSIZES;
@@ -129,18 +133,22 @@ TEST_BEGIN(test_rtree_bits) {
 		contents.metadata.is_head = false;
 		contents.metadata.state = extent_state_active;
 
-		expect_false(rtree_write(tsdn, rtree, &rtree_ctx, keys[i],
-		    contents), "Unexpected rtree_write() failure");
-		for (unsigned j = 0; j < sizeof(keys)/sizeof(uintptr_t); j++) {
-			expect_ptr_eq(rtree_read(tsdn, rtree, &rtree_ctx,
-			    keys[j]).edata, edata_c,
+		expect_false(
+		    rtree_write(tsdn, rtree, &rtree_ctx, keys[i], contents),
+		    "Unexpected rtree_write() failure");
+		for (unsigned j = 0; j < sizeof(keys) / sizeof(uintptr_t);
+		     j++) {
+			expect_ptr_eq(
+			    rtree_read(tsdn, rtree, &rtree_ctx, keys[j]).edata,
+			    edata_c,
 			    "rtree_edata_read() should return previously set "
 			    "value and ignore insignificant key bits; i=%u, "
-			    "j=%u, set key=%#"FMTxPTR", get key=%#"FMTxPTR, i,
-			    j, keys[i], keys[j]);
+			    "j=%u, set key=%#" FMTxPTR ", get key=%#" FMTxPTR,
+			    i, j, keys[i], keys[j]);
 		}
 		expect_ptr_null(rtree_read(tsdn, rtree, &rtree_ctx,
-		    (((uintptr_t)2) << LG_PAGE)).edata,
+		                    (((uintptr_t)2) << LG_PAGE))
+		                    .edata,
 		    "Only leftmost rtree leaf should be set; i=%u", i);
 		rtree_clear(tsdn, rtree, &rtree_ctx, keys[i]);
 	}
@@ -159,8 +167,8 @@ TEST_BEGIN(test_rtree_random) {
 	    /* metadata_use_hooks */ true);
 	expect_ptr_not_null(base, "Unexpected base_new failure");
 
-	uintptr_t keys[NSET];
-	rtree_t *rtree = &test_rtree;
+	uintptr_t   keys[NSET];
+	rtree_t    *rtree = &test_rtree;
 	rtree_ctx_t rtree_ctx;
 	rtree_ctx_data_init(&rtree_ctx);
 
@@ -168,15 +176,15 @@ TEST_BEGIN(test_rtree_random) {
 	edata_init(edata_d, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
 	    extent_state_active, false, false, EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
 
-	expect_false(rtree_new(rtree, base, false),
-	    "Unexpected rtree_new() failure");
+	expect_false(
+	    rtree_new(rtree, base, false), "Unexpected rtree_new() failure");
 
 	for (unsigned i = 0; i < NSET; i++) {
 		keys[i] = (uintptr_t)gen_rand64(sfmt);
-		rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree,
-		    &rtree_ctx, keys[i], false, true);
-		expect_ptr_not_null(elm,
-		    "Unexpected rtree_leaf_elm_lookup() failure");
+		rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(
+		    tsdn, rtree, &rtree_ctx, keys[i], false, true);
+		expect_ptr_not_null(
+		    elm, "Unexpected rtree_leaf_elm_lookup() failure");
 		rtree_contents_t contents;
 		contents.edata = edata_d;
 		contents.metadata.szind = SC_NSIZES;
@@ -184,26 +192,27 @@ TEST_BEGIN(test_rtree_random) {
 		contents.metadata.is_head = false;
 		contents.metadata.state = edata_state_get(edata_d);
 		rtree_leaf_elm_write(tsdn, rtree, elm, contents);
-		expect_ptr_eq(rtree_read(tsdn, rtree, &rtree_ctx,
-		    keys[i]).edata, edata_d,
+		expect_ptr_eq(
+		    rtree_read(tsdn, rtree, &rtree_ctx, keys[i]).edata, edata_d,
 		    "rtree_edata_read() should return previously set value");
 	}
 	for (unsigned i = 0; i < NSET; i++) {
-		expect_ptr_eq(rtree_read(tsdn, rtree, &rtree_ctx,
-		    keys[i]).edata, edata_d,
+		expect_ptr_eq(
+		    rtree_read(tsdn, rtree, &rtree_ctx, keys[i]).edata, edata_d,
 		    "rtree_edata_read() should return previously set value, "
-		    "i=%u", i);
+		    "i=%u",
+		    i);
 	}
 
 	for (unsigned i = 0; i < NSET; i++) {
 		rtree_clear(tsdn, rtree, &rtree_ctx, keys[i]);
-		expect_ptr_null(rtree_read(tsdn, rtree, &rtree_ctx,
-		    keys[i]).edata,
-		   "rtree_edata_read() should return previously set value");
+		expect_ptr_null(
+		    rtree_read(tsdn, rtree, &rtree_ctx, keys[i]).edata,
+		    "rtree_edata_read() should return previously set value");
 	}
 	for (unsigned i = 0; i < NSET; i++) {
-		expect_ptr_null(rtree_read(tsdn, rtree, &rtree_ctx,
-		    keys[i]).edata,
+		expect_ptr_null(
+		    rtree_read(tsdn, rtree, &rtree_ctx, keys[i]).edata,
 		    "rtree_edata_read() should return previously set value");
 	}
 
@@ -215,8 +224,8 @@ TEST_BEGIN(test_rtree_random) {
 TEST_END
 
 static void
-test_rtree_range_write(tsdn_t *tsdn, rtree_t *rtree, uintptr_t start,
-    uintptr_t end) {
+test_rtree_range_write(
+    tsdn_t *tsdn, rtree_t *rtree, uintptr_t start, uintptr_t end) {
 	rtree_ctx_t rtree_ctx;
 	rtree_ctx_data_init(&rtree_ctx);
 
@@ -230,15 +239,17 @@ test_rtree_range_write(tsdn_t *tsdn, rtree_t *rtree, uintptr_t start,
 	contents.metadata.is_head = false;
 	contents.metadata.state = extent_state_active;
 
-	expect_false(rtree_write(tsdn, rtree, &rtree_ctx, start,
-	    contents), "Unexpected rtree_write() failure");
-	expect_false(rtree_write(tsdn, rtree, &rtree_ctx, end,
-	    contents), "Unexpected rtree_write() failure");
+	expect_false(rtree_write(tsdn, rtree, &rtree_ctx, start, contents),
+	    "Unexpected rtree_write() failure");
+	expect_false(rtree_write(tsdn, rtree, &rtree_ctx, end, contents),
+	    "Unexpected rtree_write() failure");
 
 	rtree_write_range(tsdn, rtree, &rtree_ctx, start, end, contents);
 	for (uintptr_t i = 0; i < ((end - start) >> LG_PAGE); i++) {
-		expect_ptr_eq(rtree_read(tsdn, rtree, &rtree_ctx,
-		    start + (i << LG_PAGE)).edata, edata_e,
+		expect_ptr_eq(
+		    rtree_read(tsdn, rtree, &rtree_ctx, start + (i << LG_PAGE))
+		        .edata,
+		    edata_e,
 		    "rtree_edata_read() should return previously set value");
 	}
 	rtree_clear_range(tsdn, rtree, &rtree_ctx, start, end);
@@ -247,8 +258,9 @@ test_rtree_range_write(tsdn_t *tsdn, rtree_t *rtree, uintptr_t start,
 		elm = rtree_leaf_elm_lookup(tsdn, rtree, &rtree_ctx,
 		    start + (i << LG_PAGE), false, false);
 		expect_ptr_not_null(elm, "Should have been initialized.");
-		expect_ptr_null(rtree_leaf_elm_read(tsdn, rtree, elm,
-		    false).edata, "Should have been cleared.");
+		expect_ptr_null(
+		    rtree_leaf_elm_read(tsdn, rtree, elm, false).edata,
+		    "Should have been cleared.");
 	}
 }
 
@@ -259,8 +271,8 @@ TEST_BEGIN(test_rtree_range) {
 	expect_ptr_not_null(base, "Unexpected base_new failure");
 
 	rtree_t *rtree = &test_rtree;
-	expect_false(rtree_new(rtree, base, false),
-	    "Unexpected rtree_new() failure");
+	expect_false(
+	    rtree_new(rtree, base, false), "Unexpected rtree_new() failure");
 
 	/* Not crossing rtree node boundary first. */
 	uintptr_t start = ZU(1) << rtree_leaf_maskbits();
@@ -280,10 +292,6 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_rtree_read_empty,
-	    test_rtree_extrema,
-	    test_rtree_bits,
-	    test_rtree_random,
-	    test_rtree_range);
+	return test(test_rtree_read_empty, test_rtree_extrema, test_rtree_bits,
+	    test_rtree_random, test_rtree_range);
 }
diff --git a/test/unit/safety_check.c b/test/unit/safety_check.c
index 84726675..558797c0 100644
--- a/test/unit/safety_check.c
+++ b/test/unit/safety_check.c
@@ -8,7 +8,8 @@
  */
 
 bool fake_abort_called;
-void fake_abort(const char *message) {
+void
+fake_abort(const char *message) {
 	(void)message;
 	fake_abort_called = true;
 }
@@ -26,7 +27,7 @@ TEST_BEGIN(test_malloc_free_overflow) {
 
 	safety_check_set_abort(&fake_abort);
 	/* Buffer overflow! */
-	char* ptr = malloc(128);
+	char *ptr = malloc(128);
 	buffer_overflow_write(ptr, 128);
 	free(ptr);
 	safety_check_set_abort(NULL);
@@ -42,7 +43,7 @@ TEST_BEGIN(test_mallocx_dallocx_overflow) {
 
 	safety_check_set_abort(&fake_abort);
 	/* Buffer overflow! */
-	char* ptr = mallocx(128, 0);
+	char *ptr = mallocx(128, 0);
 	buffer_overflow_write(ptr, 128);
 	dallocx(ptr, 0);
 	safety_check_set_abort(NULL);
@@ -58,7 +59,7 @@ TEST_BEGIN(test_malloc_sdallocx_overflow) {
 
 	safety_check_set_abort(&fake_abort);
 	/* Buffer overflow! */
-	char* ptr = malloc(128);
+	char *ptr = malloc(128);
 	buffer_overflow_write(ptr, 128);
 	sdallocx(ptr, 128, 0);
 	safety_check_set_abort(NULL);
@@ -74,7 +75,7 @@ TEST_BEGIN(test_realloc_overflow) {
 
 	safety_check_set_abort(&fake_abort);
 	/* Buffer overflow! */
-	char* ptr = malloc(128);
+	char *ptr = malloc(128);
 	buffer_overflow_write(ptr, 128);
 	ptr = realloc(ptr, 129);
 	safety_check_set_abort(NULL);
@@ -91,7 +92,7 @@ TEST_BEGIN(test_rallocx_overflow) {
 
 	safety_check_set_abort(&fake_abort);
 	/* Buffer overflow! */
-	char* ptr = malloc(128);
+	char *ptr = malloc(128);
 	buffer_overflow_write(ptr, 128);
 	ptr = rallocx(ptr, 129, 0);
 	safety_check_set_abort(NULL);
@@ -108,7 +109,7 @@ TEST_BEGIN(test_xallocx_overflow) {
 
 	safety_check_set_abort(&fake_abort);
 	/* Buffer overflow! */
-	char* ptr = malloc(128);
+	char *ptr = malloc(128);
 	buffer_overflow_write(ptr, 128);
 	size_t result = xallocx(ptr, 129, 0, 0);
 	expect_zu_eq(result, 128, "");
@@ -120,7 +121,7 @@ TEST_BEGIN(test_xallocx_overflow) {
 TEST_END
 
 TEST_BEGIN(test_realloc_no_overflow) {
-	char* ptr = malloc(128);
+	char *ptr = malloc(128);
 	ptr = realloc(ptr, 256);
 	ptr[128] = 0;
 	ptr[255] = 0;
@@ -135,7 +136,7 @@ TEST_BEGIN(test_realloc_no_overflow) {
 TEST_END
 
 TEST_BEGIN(test_rallocx_no_overflow) {
-	char* ptr = malloc(128);
+	char *ptr = malloc(128);
 	ptr = rallocx(ptr, 256, 0);
 	ptr[128] = 0;
 	ptr[255] = 0;
@@ -151,13 +152,8 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_malloc_free_overflow,
-	    test_mallocx_dallocx_overflow,
-	    test_malloc_sdallocx_overflow,
-	    test_realloc_overflow,
-	    test_rallocx_overflow,
-	    test_xallocx_overflow,
-	    test_realloc_no_overflow,
-	    test_rallocx_no_overflow);
+	return test(test_malloc_free_overflow, test_mallocx_dallocx_overflow,
+	    test_malloc_sdallocx_overflow, test_realloc_overflow,
+	    test_rallocx_overflow, test_xallocx_overflow,
+	    test_realloc_no_overflow, test_rallocx_no_overflow);
 }
diff --git a/test/unit/san.c b/test/unit/san.c
index 5b98f52e..2c7f1ec5 100644
--- a/test/unit/san.c
+++ b/test/unit/san.c
@@ -6,8 +6,8 @@
 
 static void
 verify_extent_guarded(tsdn_t *tsdn, void *ptr) {
-	expect_true(extent_is_guarded(tsdn, ptr),
-	    "All extents should be guarded.");
+	expect_true(
+	    extent_is_guarded(tsdn, ptr), "All extents should be guarded.");
 }
 
 #define MAX_SMALL_ALLOCATIONS 4096
@@ -21,13 +21,13 @@ void *small_alloc[MAX_SMALL_ALLOCATIONS];
 TEST_BEGIN(test_guarded_small) {
 	test_skip_if(opt_prof);
 
-	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	tsdn_t  *tsdn = tsd_tsdn(tsd_fetch());
 	unsigned npages = 16, pages_found = 0, ends_found = 0;
 	VARIABLE_ARRAY(uintptr_t, pages, npages);
 
 	/* Allocate to get sanitized pointers. */
-	size_t slab_sz = PAGE;
-	size_t sz = slab_sz / 8;
+	size_t   slab_sz = PAGE;
+	size_t   sz = slab_sz / 8;
 	unsigned n_alloc = 0;
 	while (n_alloc < MAX_SMALL_ALLOCATIONS) {
 		void *ptr = malloc(sz);
@@ -54,8 +54,9 @@ TEST_BEGIN(test_guarded_small) {
 	/* Verify the pages are not continuous, i.e. separated by guards. */
 	for (unsigned i = 0; i < npages - 1; i++) {
 		for (unsigned j = i + 1; j < npages; j++) {
-			uintptr_t ptr_diff = pages[i] > pages[j] ?
-			    pages[i] - pages[j] : pages[j] - pages[i];
+			uintptr_t ptr_diff = pages[i] > pages[j]
+			    ? pages[i] - pages[j]
+			    : pages[j] - pages[i];
 			expect_zu_ge((size_t)ptr_diff, slab_sz + PAGE,
 			    "There should be at least one pages between "
 			    "guarded slabs");
@@ -69,7 +70,7 @@ TEST_BEGIN(test_guarded_small) {
 TEST_END
 
 TEST_BEGIN(test_guarded_large) {
-	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	tsdn_t  *tsdn = tsd_tsdn(tsd_fetch());
 	unsigned nlarge = 32;
 	VARIABLE_ARRAY(uintptr_t, large, nlarge);
 
@@ -85,8 +86,9 @@ TEST_BEGIN(test_guarded_large) {
 	/* Verify the pages are not continuous, i.e. separated by guards. */
 	for (unsigned i = 0; i < nlarge; i++) {
 		for (unsigned j = i + 1; j < nlarge; j++) {
-			uintptr_t ptr_diff = large[i] > large[j] ?
-			    large[i] - large[j] : large[j] - large[i];
+			uintptr_t ptr_diff = large[i] > large[j]
+			    ? large[i] - large[j]
+			    : large[j] - large[i];
 			expect_zu_ge((size_t)ptr_diff, large_sz + 2 * PAGE,
 			    "There should be at least two pages between "
 			    " guarded large allocations");
@@ -102,15 +104,13 @@ TEST_END
 static void
 verify_pdirty(unsigned arena_ind, uint64_t expected) {
 	uint64_t pdirty = get_arena_pdirty(arena_ind);
-	expect_u64_eq(pdirty, expected / PAGE,
-	    "Unexpected dirty page amount.");
+	expect_u64_eq(pdirty, expected / PAGE, "Unexpected dirty page amount.");
 }
 
 static void
 verify_pmuzzy(unsigned arena_ind, uint64_t expected) {
 	uint64_t pmuzzy = get_arena_pmuzzy(arena_ind);
-	expect_u64_eq(pmuzzy, expected / PAGE,
-	    "Unexpected muzzy page amount.");
+	expect_u64_eq(pmuzzy, expected / PAGE, "Unexpected muzzy page amount.");
 }
 
 TEST_BEGIN(test_guarded_decay) {
@@ -140,7 +140,7 @@ TEST_BEGIN(test_guarded_decay) {
 	verify_pmuzzy(arena_ind, 0);
 
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
-	int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
+	int     flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
 
 	/* Should reuse dirty extents for the two mallocx. */
 	void *p1 = do_mallocx(sz1, flags);
@@ -200,8 +200,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_guarded_small,
-	    test_guarded_large,
-	    test_guarded_decay);
+	return test(test_guarded_small, test_guarded_large, test_guarded_decay);
 }
diff --git a/test/unit/san_bump.c b/test/unit/san_bump.c
index cafa37fe..9aa0210e 100644
--- a/test/unit/san_bump.c
+++ b/test/unit/san_bump.c
@@ -16,12 +16,12 @@ TEST_BEGIN(test_san_bump_alloc) {
 	assert_u_ne(arena_ind, UINT_MAX, "Failed to create an arena");
 
 	arena_t *arena = arena_get(tsdn, arena_ind, false);
-	pac_t *pac = &arena->pa_shard.pac;
+	pac_t   *pac = &arena->pa_shard.pac;
 
-	size_t alloc_size = PAGE * 16;
-	size_t alloc_n = alloc_size / sizeof(unsigned);
-	edata_t* edata = san_bump_alloc(tsdn, &sba, pac, pac_ehooks_get(pac),
-	    alloc_size, /* zero */ false);
+	size_t   alloc_size = PAGE * 16;
+	size_t   alloc_n = alloc_size / sizeof(unsigned);
+	edata_t *edata = san_bump_alloc(
+	    tsdn, &sba, pac, pac_ehooks_get(pac), alloc_size, /* zero */ false);
 
 	expect_ptr_not_null(edata, "Failed to allocate edata");
 	expect_u_eq(edata_arena_ind_get(edata), arena_ind,
@@ -39,10 +39,10 @@ TEST_BEGIN(test_san_bump_alloc) {
 		((unsigned *)ptr)[i] = 1;
 	}
 
-	size_t alloc_size2 = PAGE * 28;
-	size_t alloc_n2 = alloc_size / sizeof(unsigned);
-	edata_t *edata2 = san_bump_alloc(tsdn, &sba, pac, pac_ehooks_get(pac),
-	    alloc_size2, /* zero */ true);
+	size_t   alloc_size2 = PAGE * 28;
+	size_t   alloc_n2 = alloc_size / sizeof(unsigned);
+	edata_t *edata2 = san_bump_alloc(
+	    tsdn, &sba, pac, pac_ehooks_get(pac), alloc_size2, /* zero */ true);
 
 	expect_ptr_not_null(edata2, "Failed to allocate edata");
 	expect_u_eq(edata_arena_ind_get(edata2), arena_ind,
@@ -57,11 +57,11 @@ TEST_BEGIN(test_san_bump_alloc) {
 	expect_ptr_not_null(ptr, "Edata was assigned an invalid address");
 
 	uintptr_t ptrdiff = ptr2 > ptr ? (uintptr_t)ptr2 - (uintptr_t)ptr
-	    : (uintptr_t)ptr - (uintptr_t)ptr2;
-	size_t between_allocs = (size_t)ptrdiff - alloc_size;
+	                               : (uintptr_t)ptr - (uintptr_t)ptr2;
+	size_t    between_allocs = (size_t)ptrdiff - alloc_size;
 
-	expect_zu_ge(between_allocs, PAGE,
-	    "Guard page between allocs is missing");
+	expect_zu_ge(
+	    between_allocs, PAGE, "Guard page between allocs is missing");
 
 	for (unsigned i = 0; i < alloc_n2; ++i) {
 		expect_u_eq(((unsigned *)ptr2)[i], 0, "Memory is not zeroed");
@@ -81,11 +81,11 @@ TEST_BEGIN(test_large_alloc_size) {
 	assert_u_ne(arena_ind, UINT_MAX, "Failed to create an arena");
 
 	arena_t *arena = arena_get(tsdn, arena_ind, false);
-	pac_t *pac = &arena->pa_shard.pac;
+	pac_t   *pac = &arena->pa_shard.pac;
 
-	size_t alloc_size = SBA_RETAINED_ALLOC_SIZE * 2;
-	edata_t* edata = san_bump_alloc(tsdn, &sba, pac, pac_ehooks_get(pac),
-	    alloc_size, /* zero */ false);
+	size_t   alloc_size = SBA_RETAINED_ALLOC_SIZE * 2;
+	edata_t *edata = san_bump_alloc(
+	    tsdn, &sba, pac, pac_ehooks_get(pac), alloc_size, /* zero */ false);
 	expect_u_eq(edata_arena_ind_get(edata), arena_ind,
 	    "Edata was assigned an incorrect arena id");
 	expect_zu_eq(edata_size_get(edata), alloc_size,
@@ -105,7 +105,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_san_bump_alloc,
-	    test_large_alloc_size);
+	return test(test_san_bump_alloc, test_large_alloc_size);
 }
diff --git a/test/unit/sc.c b/test/unit/sc.c
index d207481c..725ede0e 100644
--- a/test/unit/sc.c
+++ b/test/unit/sc.c
@@ -4,7 +4,7 @@ TEST_BEGIN(test_update_slab_size) {
 	sc_data_t data;
 	memset(&data, 0, sizeof(data));
 	sc_data_init(&data);
-	sc_t *tiny = &data.sc[0];
+	sc_t  *tiny = &data.sc[0];
 	size_t tiny_size = (ZU(1) << tiny->lg_base)
 	    + (ZU(tiny->ndelta) << tiny->lg_delta);
 	size_t pgs_too_big = (tiny_size * BITMAP_MAXBITS + PAGE - 1) / PAGE + 1;
@@ -13,14 +13,14 @@ TEST_BEGIN(test_update_slab_size) {
 
 	sc_data_update_slab_size(&data, 1, 10 * PAGE, 1);
 	for (int i = 0; i < data.nbins; i++) {
-		sc_t *sc = &data.sc[i];
+		sc_t  *sc = &data.sc[i];
 		size_t reg_size = (ZU(1) << sc->lg_base)
 		    + (ZU(sc->ndelta) << sc->lg_delta);
 		if (reg_size <= PAGE) {
 			expect_d_eq(sc->pgs, 1, "Ignored valid page size hint");
 		} else {
-			expect_d_gt(sc->pgs, 1,
-			    "Allowed invalid page size hint");
+			expect_d_gt(
+			    sc->pgs, 1, "Allowed invalid page size hint");
 		}
 	}
 }
@@ -28,6 +28,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_update_slab_size);
+	return test(test_update_slab_size);
 }
diff --git a/test/unit/sec.c b/test/unit/sec.c
index cfef043f..d57c66ec 100644
--- a/test/unit/sec.c
+++ b/test/unit/sec.c
@@ -4,8 +4,8 @@
 
 typedef struct pai_test_allocator_s pai_test_allocator_t;
 struct pai_test_allocator_s {
-	pai_t pai;
-	bool alloc_fail;
+	pai_t  pai;
+	bool   alloc_fail;
 	size_t alloc_count;
 	size_t alloc_batch_count;
 	size_t dalloc_count;
@@ -17,10 +17,10 @@ struct pai_test_allocator_s {
 	 * pointers it gets back; this is mostly just helpful for debugging.
 	 */
 	uintptr_t next_ptr;
-	size_t expand_count;
-	bool expand_return_value;
-	size_t shrink_count;
-	bool shrink_return_value;
+	size_t    expand_count;
+	bool      expand_return_value;
+	size_t    shrink_count;
+	bool      shrink_return_value;
 };
 
 static void
@@ -82,8 +82,7 @@ pai_test_allocator_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size,
 	for (size_t i = 0; i < nallocs; i++) {
 		edata_t *edata = malloc(sizeof(edata_t));
 		assert_ptr_not_null(edata, "");
-		edata_init(edata, /* arena_ind */ 0,
-		    (void *)ta->next_ptr, size,
+		edata_init(edata, /* arena_ind */ 0, (void *)ta->next_ptr, size,
 		    /* slab */ false, /* szind */ 0, /* sn */ 1,
 		    extent_state_active, /* zero */ false, /* comitted */ true,
 		    /* ranged */ false, EXTENT_NOT_HEAD);
@@ -112,8 +111,8 @@ pai_test_allocator_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
 }
 
 static void
-pai_test_allocator_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    bool *deferred_work_generated) {
+pai_test_allocator_dalloc(
+    tsdn_t *tsdn, pai_t *self, edata_t *edata, bool *deferred_work_generated) {
 	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
 	ta->dalloc_count++;
 	free(edata);
@@ -174,7 +173,7 @@ TEST_BEGIN(test_reuse) {
 	enum { NALLOCS = 11 };
 	edata_t *one_page[NALLOCS];
 	edata_t *two_page[NALLOCS];
-	bool deferred_work_generated = false;
+	bool     deferred_work_generated = false;
 	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ 2 * PAGE,
 	    /* max_bytes */ 2 * (NALLOCS * PAGE + NALLOCS * 2 * PAGE));
 	for (int i = 0; i < NALLOCS; i++) {
@@ -189,26 +188,24 @@ TEST_BEGIN(test_reuse) {
 	}
 	expect_zu_eq(0, ta.alloc_count, "Should be using batch allocs");
 	size_t max_allocs = ta.alloc_count + ta.alloc_batch_count;
-	expect_zu_le(2 * NALLOCS, max_allocs,
-	    "Incorrect number of allocations");
-	expect_zu_eq(0, ta.dalloc_count,
-	    "Incorrect number of allocations");
+	expect_zu_le(
+	    2 * NALLOCS, max_allocs, "Incorrect number of allocations");
+	expect_zu_eq(0, ta.dalloc_count, "Incorrect number of allocations");
 	/*
 	 * Free in a different order than we allocated, to make sure free-list
 	 * separation works correctly.
 	 */
 	for (int i = NALLOCS - 1; i >= 0; i--) {
-		pai_dalloc(tsdn, &sec.pai, one_page[i],
-		    &deferred_work_generated);
+		pai_dalloc(
+		    tsdn, &sec.pai, one_page[i], &deferred_work_generated);
 	}
 	for (int i = NALLOCS - 1; i >= 0; i--) {
-		pai_dalloc(tsdn, &sec.pai, two_page[i],
-		    &deferred_work_generated);
+		pai_dalloc(
+		    tsdn, &sec.pai, two_page[i], &deferred_work_generated);
 	}
 	expect_zu_eq(max_allocs, ta.alloc_count + ta.alloc_batch_count,
 	    "Incorrect number of allocations");
-	expect_zu_eq(0, ta.dalloc_count,
-	    "Incorrect number of allocations");
+	expect_zu_eq(0, ta.dalloc_count, "Incorrect number of allocations");
 	/*
 	 * Check that the n'th most recent deallocated extent is returned for
 	 * the n'th alloc request of a given size.
@@ -220,19 +217,15 @@ TEST_BEGIN(test_reuse) {
 		edata_t *alloc2 = pai_alloc(tsdn, &sec.pai, 2 * PAGE, PAGE,
 		    /* zero */ false, /* guarded */ false, /* frequent_reuse */
 		    false, &deferred_work_generated);
-		expect_ptr_eq(one_page[i], alloc1,
-		    "Got unexpected allocation");
-		expect_ptr_eq(two_page[i], alloc2,
-		    "Got unexpected allocation");
+		expect_ptr_eq(one_page[i], alloc1, "Got unexpected allocation");
+		expect_ptr_eq(two_page[i], alloc2, "Got unexpected allocation");
 	}
 	expect_zu_eq(max_allocs, ta.alloc_count + ta.alloc_batch_count,
 	    "Incorrect number of allocations");
-	expect_zu_eq(0, ta.dalloc_count,
-	    "Incorrect number of allocations");
+	expect_zu_eq(0, ta.dalloc_count, "Incorrect number of allocations");
 }
 TEST_END
 
-
 TEST_BEGIN(test_auto_flush) {
 	pai_test_allocator_t ta;
 	pai_test_allocator_init(&ta);
@@ -251,7 +244,7 @@ TEST_BEGIN(test_auto_flush) {
 	enum { NALLOCS = 10 };
 	edata_t *extra_alloc;
 	edata_t *allocs[NALLOCS];
-	bool deferred_work_generated = false;
+	bool     deferred_work_generated = false;
 	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE,
 	    /* max_bytes */ NALLOCS * PAGE);
 	for (int i = 0; i < NALLOCS; i++) {
@@ -265,18 +258,16 @@ TEST_BEGIN(test_auto_flush) {
 	    &deferred_work_generated);
 	expect_ptr_not_null(extra_alloc, "Unexpected alloc failure");
 	size_t max_allocs = ta.alloc_count + ta.alloc_batch_count;
-	expect_zu_le(NALLOCS + 1, max_allocs,
-	    "Incorrect number of allocations");
-	expect_zu_eq(0, ta.dalloc_count,
-	    "Incorrect number of allocations");
+	expect_zu_le(
+	    NALLOCS + 1, max_allocs, "Incorrect number of allocations");
+	expect_zu_eq(0, ta.dalloc_count, "Incorrect number of allocations");
 	/* Free until the SEC is full, but should not have flushed yet. */
 	for (int i = 0; i < NALLOCS; i++) {
 		pai_dalloc(tsdn, &sec.pai, allocs[i], &deferred_work_generated);
 	}
-	expect_zu_le(NALLOCS + 1, max_allocs,
-	    "Incorrect number of allocations");
-	expect_zu_eq(0, ta.dalloc_count,
-	    "Incorrect number of allocations");
+	expect_zu_le(
+	    NALLOCS + 1, max_allocs, "Incorrect number of allocations");
+	expect_zu_eq(0, ta.dalloc_count, "Incorrect number of allocations");
 	/*
 	 * Free the extra allocation; this should trigger a flush.  The internal
 	 * flushing logic is allowed to get complicated; for now, we rely on our
@@ -308,7 +299,7 @@ do_disable_flush_test(bool is_disable) {
 
 	enum { NALLOCS = 11 };
 	edata_t *allocs[NALLOCS];
-	bool deferred_work_generated = false;
+	bool     deferred_work_generated = false;
 	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE,
 	    /* max_bytes */ NALLOCS * PAGE);
 	for (int i = 0; i < NALLOCS; i++) {
@@ -324,8 +315,7 @@ do_disable_flush_test(bool is_disable) {
 	size_t max_allocs = ta.alloc_count + ta.alloc_batch_count;
 
 	expect_zu_le(NALLOCS, max_allocs, "Incorrect number of allocations");
-	expect_zu_eq(0, ta.dalloc_count,
-	    "Incorrect number of allocations");
+	expect_zu_eq(0, ta.dalloc_count, "Incorrect number of allocations");
 
 	if (is_disable) {
 		sec_disable(tsdn, &sec);
@@ -345,8 +335,8 @@ do_disable_flush_test(bool is_disable) {
 	 * If we free into a disabled SEC, it should forward to the fallback.
 	 * Otherwise, the SEC should accept the allocation.
 	 */
-	pai_dalloc(tsdn, &sec.pai, allocs[NALLOCS - 1],
-	    &deferred_work_generated);
+	pai_dalloc(
+	    tsdn, &sec.pai, allocs[NALLOCS - 1], &deferred_work_generated);
 
 	expect_zu_eq(max_allocs, ta.alloc_count + ta.alloc_batch_count,
 	    "Incorrect number of allocations");
@@ -382,18 +372,18 @@ TEST_BEGIN(test_max_alloc_respected) {
 	    /* max_bytes */ 1000 * PAGE);
 
 	for (size_t i = 0; i < 100; i++) {
-		expect_zu_eq(i, ta.alloc_count,
-		    "Incorrect number of allocations");
-		expect_zu_eq(i, ta.dalloc_count,
-		    "Incorrect number of deallocations");
+		expect_zu_eq(
+		    i, ta.alloc_count, "Incorrect number of allocations");
+		expect_zu_eq(
+		    i, ta.dalloc_count, "Incorrect number of deallocations");
 		edata_t *edata = pai_alloc(tsdn, &sec.pai, attempted_alloc,
 		    PAGE, /* zero */ false, /* guarded */ false,
 		    /* frequent_reuse */ false, &deferred_work_generated);
 		expect_ptr_not_null(edata, "Unexpected alloc failure");
-		expect_zu_eq(i + 1, ta.alloc_count,
-		    "Incorrect number of allocations");
-		expect_zu_eq(i, ta.dalloc_count,
-		    "Incorrect number of deallocations");
+		expect_zu_eq(
+		    i + 1, ta.alloc_count, "Incorrect number of allocations");
+		expect_zu_eq(
+		    i, ta.dalloc_count, "Incorrect number of deallocations");
 		pai_dalloc(tsdn, &sec.pai, edata, &deferred_work_generated);
 	}
 }
@@ -435,8 +425,8 @@ TEST_BEGIN(test_expand_shrink_delegate) {
 	expect_false(err, "Unexpected shrink failure");
 	expect_zu_eq(1, ta.shrink_count, "");
 	ta.shrink_return_value = true;
-	err = pai_shrink(tsdn, &sec.pai, edata, 2 * PAGE, PAGE,
-	    &deferred_work_generated);
+	err = pai_shrink(
+	    tsdn, &sec.pai, edata, 2 * PAGE, PAGE, &deferred_work_generated);
 	expect_true(err, "Unexpected shrink success");
 	expect_zu_eq(2, ta.shrink_count, "");
 }
@@ -455,7 +445,7 @@ TEST_BEGIN(test_nshards_0) {
 	opts.nshards = 0;
 	sec_init(TSDN_NULL, &sec, base, &ta.pai, &opts);
 
-	bool deferred_work_generated = false;
+	bool     deferred_work_generated = false;
 	edata_t *edata = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
 	    /* zero */ false, /* guarded */ false, /* frequent_reuse */ false,
 	    &deferred_work_generated);
@@ -570,8 +560,9 @@ TEST_BEGIN(test_stats_auto_flush) {
 
 	pai_dalloc(tsdn, &sec.pai, extra_alloc1, &deferred_work_generated);
 
-	expect_stats_pages(tsdn, &sec, ta.alloc_count + ta.alloc_batch_count
-	    - ta.dalloc_count - ta.dalloc_batch_count);
+	expect_stats_pages(tsdn, &sec,
+	    ta.alloc_count + ta.alloc_batch_count - ta.dalloc_count
+	        - ta.dalloc_batch_count);
 }
 TEST_END
 
@@ -590,7 +581,7 @@ TEST_BEGIN(test_stats_manual_flush) {
 	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE,
 	    /* max_bytes */ FLUSH_PAGES * PAGE);
 
-	bool deferred_work_generated = false;
+	bool     deferred_work_generated = false;
 	edata_t *allocs[FLUSH_PAGES];
 	for (size_t i = 0; i < FLUSH_PAGES; i++) {
 		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
@@ -621,15 +612,8 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_reuse,
-	    test_auto_flush,
-	    test_disable,
-	    test_flush,
-	    test_max_alloc_respected,
-	    test_expand_shrink_delegate,
-	    test_nshards_0,
-	    test_stats_simple,
-	    test_stats_auto_flush,
+	return test(test_reuse, test_auto_flush, test_disable, test_flush,
+	    test_max_alloc_respected, test_expand_shrink_delegate,
+	    test_nshards_0, test_stats_simple, test_stats_auto_flush,
 	    test_stats_manual_flush);
 }
diff --git a/test/unit/seq.c b/test/unit/seq.c
index 06ed6834..ca6c74b1 100644
--- a/test/unit/seq.c
+++ b/test/unit/seq.c
@@ -24,7 +24,7 @@ expect_data(data_t *data) {
 
 seq_define(data_t, data)
 
-typedef struct thd_data_s thd_data_t;
+    typedef struct thd_data_s thd_data_t;
 struct thd_data_s {
 	seq_data_t data;
 };
@@ -32,8 +32,8 @@ struct thd_data_s {
 static void *
 seq_reader_thd(void *arg) {
 	thd_data_t *thd_data = (thd_data_t *)arg;
-	int iter = 0;
-	data_t local_data;
+	int         iter = 0;
+	data_t      local_data;
 	while (iter < 1000 * 1000 - 1) {
 		bool success = seq_try_load_data(&local_data, &thd_data->data);
 		if (success) {
@@ -49,7 +49,7 @@ seq_reader_thd(void *arg) {
 static void *
 seq_writer_thd(void *arg) {
 	thd_data_t *thd_data = (thd_data_t *)arg;
-	data_t local_data;
+	data_t      local_data;
 	memset(&local_data, 0, sizeof(local_data));
 	for (int i = 0; i < 1000 * 1000; i++) {
 		set_data(&local_data, i);
@@ -74,7 +74,7 @@ TEST_BEGIN(test_seq_threaded) {
 TEST_END
 
 TEST_BEGIN(test_seq_simple) {
-	data_t data;
+	data_t     data;
 	seq_data_t seq;
 	memset(&seq, 0, sizeof(seq));
 	for (int i = 0; i < 1000 * 1000; i++) {
@@ -88,8 +88,7 @@ TEST_BEGIN(test_seq_simple) {
 }
 TEST_END
 
-int main(void) {
-	return test_no_reentrancy(
-	    test_seq_simple,
-	    test_seq_threaded);
+int
+main(void) {
+	return test_no_reentrancy(test_seq_simple, test_seq_threaded);
 }
diff --git a/test/unit/size_check.c b/test/unit/size_check.c
index 3cb3bc9c..a31578bf 100644
--- a/test/unit/size_check.c
+++ b/test/unit/size_check.c
@@ -3,7 +3,8 @@
 #include "jemalloc/internal/safety_check.h"
 
 bool fake_abort_called;
-void fake_abort(const char *message) {
+void
+fake_abort(const char *message) {
 	(void)message;
 	fake_abort_called = true;
 }
@@ -72,8 +73,7 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_invalid_size_sdallocx,
+	return test(test_invalid_size_sdallocx,
 	    test_invalid_size_sdallocx_nonzero_flag,
 	    test_invalid_size_sdallocx_noflags);
 }
diff --git a/test/unit/size_classes.c b/test/unit/size_classes.c
index c373829c..5379047c 100644
--- a/test/unit/size_classes.c
+++ b/test/unit/size_classes.c
@@ -3,12 +3,13 @@
 static size_t
 get_max_size_class(void) {
 	unsigned nlextents;
-	size_t mib[4];
-	size_t sz, miblen, max_size_class;
+	size_t   mib[4];
+	size_t   sz, miblen, max_size_class;
 
 	sz = sizeof(unsigned);
-	expect_d_eq(mallctl("arenas.nlextents", (void *)&nlextents, &sz, NULL,
-	    0), 0, "Unexpected mallctl() error");
+	expect_d_eq(
+	    mallctl("arenas.nlextents", (void *)&nlextents, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() error");
 
 	miblen = sizeof(mib) / sizeof(size_t);
 	expect_d_eq(mallctlnametomib("arenas.lextent.0.size", mib, &miblen), 0,
@@ -16,30 +17,34 @@ get_max_size_class(void) {
 	mib[2] = nlextents - 1;
 
 	sz = sizeof(size_t);
-	expect_d_eq(mallctlbymib(mib, miblen, (void *)&max_size_class, &sz,
-	    NULL, 0), 0, "Unexpected mallctlbymib() error");
+	expect_d_eq(
+	    mallctlbymib(mib, miblen, (void *)&max_size_class, &sz, NULL, 0), 0,
+	    "Unexpected mallctlbymib() error");
 
 	return max_size_class;
 }
 
 TEST_BEGIN(test_size_classes) {
-	size_t size_class, max_size_class;
+	size_t  size_class, max_size_class;
 	szind_t index, gen_index, max_index;
 
-	max_size_class = sz_large_size_classes_disabled()? SC_SMALL_MAXCLASS:
-	    get_max_size_class();
+	max_size_class = sz_large_size_classes_disabled()
+	    ? SC_SMALL_MAXCLASS
+	    : get_max_size_class();
 	max_index = sz_size2index(max_size_class);
 
-	for (index = 0, size_class = sz_index2size(index); index < max_index ||
-	    size_class < max_size_class; index++, size_class =
-	    sz_index2size(index)) {
+	for (index = 0, size_class = sz_index2size(index);
+	     index < max_index || size_class < max_size_class;
+	     index++, size_class = sz_index2size(index)) {
 		gen_index = sz_size2index(size_class);
 		expect_true(index < max_index,
 		    "Loop conditionals should be equivalent; index=%u, "
-		    "size_class=%zu (%#zx)", index, size_class, size_class);
+		    "size_class=%zu (%#zx)",
+		    index, size_class, size_class);
 		expect_true(size_class < max_size_class,
 		    "Loop conditionals should be equivalent; index=%u, "
-		    "size_class=%zu (%#zx)", index, size_class, size_class);
+		    "size_class=%zu (%#zx)",
+		    index, size_class, size_class);
 
 		expect_u_eq(index, gen_index,
 		    "sz_size2index() does not reverse sz_index2size(): index=%u"
@@ -51,29 +56,30 @@ TEST_BEGIN(test_size_classes) {
 		    " --> size_class=%zu --> index=%u --> size_class=%zu",
 		    index, size_class, gen_index, sz_index2size(gen_index));
 
-		expect_u_eq(index+1, sz_size2index(size_class+1),
+		expect_u_eq(index + 1, sz_size2index(size_class + 1),
 		    "Next size_class does not round up properly");
 
-		expect_zu_eq(size_class, (index > 0) ?
-		    sz_s2u(sz_index2size(index-1)+1) : sz_s2u(1),
+		expect_zu_eq(size_class,
+		    (index > 0) ? sz_s2u(sz_index2size(index - 1) + 1)
+		                : sz_s2u(1),
 		    "sz_s2u() does not round up to size class");
-		expect_zu_eq(size_class, sz_s2u(size_class-1),
+		expect_zu_eq(size_class, sz_s2u(size_class - 1),
 		    "sz_s2u() does not round up to size class");
 		expect_zu_eq(size_class, sz_s2u(size_class),
 		    "sz_s2u() does not compute same size class");
-		expect_zu_eq(sz_s2u(size_class+1), sz_index2size(index+1),
+		expect_zu_eq(sz_s2u(size_class + 1), sz_index2size(index + 1),
 		    "sz_s2u() does not round up to next size class");
 	}
 
 	expect_u_eq(index, sz_size2index(sz_index2size(index)),
 	    "sz_size2index() does not reverse sz_index2size()");
-	expect_zu_eq(max_size_class, sz_index2size(
-	    sz_size2index(max_size_class)),
+	expect_zu_eq(max_size_class,
+	    sz_index2size(sz_size2index(max_size_class)),
 	    "sz_index2size() does not reverse sz_size2index()");
 
-	expect_zu_eq(size_class, sz_s2u(sz_index2size(index-1)+1),
+	expect_zu_eq(size_class, sz_s2u(sz_index2size(index - 1) + 1),
 	    "sz_s2u() does not round up to size class");
-	expect_zu_eq(size_class, sz_s2u(size_class-1),
+	expect_zu_eq(size_class, sz_s2u(size_class - 1),
 	    "sz_s2u() does not round up to size class");
 	expect_zu_eq(size_class, sz_s2u(size_class),
 	    "sz_s2u() does not compute same size class");
@@ -115,31 +121,33 @@ TEST_BEGIN(test_grow_slow_size_classes) {
 TEST_END
 
 TEST_BEGIN(test_psize_classes) {
-	size_t size_class, max_psz;
+	size_t   size_class, max_psz;
 	pszind_t pind, max_pind;
 
 	max_psz = get_max_size_class() + PAGE;
 	max_pind = sz_psz2ind(max_psz);
 
 	for (pind = 0, size_class = sz_pind2sz(pind);
-	    pind < max_pind || size_class < max_psz;
-	    pind++, size_class = sz_pind2sz(pind)) {
+	     pind < max_pind || size_class < max_psz;
+	     pind++, size_class = sz_pind2sz(pind)) {
 		expect_true(pind < max_pind,
 		    "Loop conditionals should be equivalent; pind=%u, "
-		    "size_class=%zu (%#zx)", pind, size_class, size_class);
+		    "size_class=%zu (%#zx)",
+		    pind, size_class, size_class);
 		expect_true(size_class < max_psz,
 		    "Loop conditionals should be equivalent; pind=%u, "
-		    "size_class=%zu (%#zx)", pind, size_class, size_class);
+		    "size_class=%zu (%#zx)",
+		    pind, size_class, size_class);
 
 		expect_u_eq(pind, sz_psz2ind(size_class),
 		    "sz_psz2ind() does not reverse sz_pind2sz(): pind=%u -->"
-		    " size_class=%zu --> pind=%u --> size_class=%zu", pind,
-		    size_class, sz_psz2ind(size_class),
+		    " size_class=%zu --> pind=%u --> size_class=%zu",
+		    pind, size_class, sz_psz2ind(size_class),
 		    sz_pind2sz(sz_psz2ind(size_class)));
 		expect_zu_eq(size_class, sz_pind2sz(sz_psz2ind(size_class)),
 		    "sz_pind2sz() does not reverse sz_psz2ind(): pind=%u -->"
-		    " size_class=%zu --> pind=%u --> size_class=%zu", pind,
-		    size_class, sz_psz2ind(size_class),
+		    " size_class=%zu --> pind=%u --> size_class=%zu",
+		    pind, size_class, sz_psz2ind(size_class),
 		    sz_pind2sz(sz_psz2ind(size_class)));
 
 		if (size_class == SC_LARGE_MAXCLASS) {
@@ -150,14 +158,15 @@ TEST_BEGIN(test_psize_classes) {
 			    "Next size_class does not round up properly");
 		}
 
-		expect_zu_eq(size_class, (pind > 0) ?
-		    sz_psz2u(sz_pind2sz(pind-1)+1) : sz_psz2u(1),
+		expect_zu_eq(size_class,
+		    (pind > 0) ? sz_psz2u(sz_pind2sz(pind - 1) + 1)
+		               : sz_psz2u(1),
 		    "sz_psz2u() does not round up to size class");
-		expect_zu_eq(size_class, sz_psz2u(size_class-1),
+		expect_zu_eq(size_class, sz_psz2u(size_class - 1),
 		    "sz_psz2u() does not round up to size class");
 		expect_zu_eq(size_class, sz_psz2u(size_class),
 		    "sz_psz2u() does not compute same size class");
-		expect_zu_eq(sz_psz2u(size_class+1), sz_pind2sz(pind+1),
+		expect_zu_eq(sz_psz2u(size_class + 1), sz_pind2sz(pind + 1),
 		    "sz_psz2u() does not round up to next size class");
 	}
 
@@ -166,9 +175,9 @@ TEST_BEGIN(test_psize_classes) {
 	expect_zu_eq(max_psz, sz_pind2sz(sz_psz2ind(max_psz)),
 	    "sz_pind2sz() does not reverse sz_psz2ind()");
 
-	expect_zu_eq(size_class, sz_psz2u(sz_pind2sz(pind-1)+1),
+	expect_zu_eq(size_class, sz_psz2u(sz_pind2sz(pind - 1) + 1),
 	    "sz_psz2u() does not round up to size class");
-	expect_zu_eq(size_class, sz_psz2u(size_class-1),
+	expect_zu_eq(size_class, sz_psz2u(size_class - 1),
 	    "sz_psz2u() does not round up to size class");
 	expect_zu_eq(size_class, sz_psz2u(size_class),
 	    "sz_psz2u() does not compute same size class");
@@ -181,31 +190,31 @@ TEST_BEGIN(test_overflow) {
 	max_size_class = get_max_size_class();
 	max_psz = max_size_class + PAGE;
 
-	expect_u_eq(sz_size2index(max_size_class+1), SC_NSIZES,
+	expect_u_eq(sz_size2index(max_size_class + 1), SC_NSIZES,
 	    "sz_size2index() should return NSIZES on overflow");
-	expect_u_eq(sz_size2index(ZU(PTRDIFF_MAX)+1), SC_NSIZES,
+	expect_u_eq(sz_size2index(ZU(PTRDIFF_MAX) + 1), SC_NSIZES,
 	    "sz_size2index() should return NSIZES on overflow");
 	expect_u_eq(sz_size2index(SIZE_T_MAX), SC_NSIZES,
 	    "sz_size2index() should return NSIZES on overflow");
 
-	expect_zu_eq(sz_s2u(max_size_class+1), 0,
+	expect_zu_eq(sz_s2u(max_size_class + 1), 0,
 	    "sz_s2u() should return 0 for unsupported size");
-	expect_zu_eq(sz_s2u(ZU(PTRDIFF_MAX)+1), 0,
+	expect_zu_eq(sz_s2u(ZU(PTRDIFF_MAX) + 1), 0,
 	    "sz_s2u() should return 0 for unsupported size");
-	expect_zu_eq(sz_s2u(SIZE_T_MAX), 0,
-	    "sz_s2u() should return 0 on overflow");
+	expect_zu_eq(
+	    sz_s2u(SIZE_T_MAX), 0, "sz_s2u() should return 0 on overflow");
 
-	expect_u_eq(sz_psz2ind(max_size_class+1), SC_NPSIZES,
+	expect_u_eq(sz_psz2ind(max_size_class + 1), SC_NPSIZES,
 	    "sz_psz2ind() should return NPSIZES on overflow");
-	expect_u_eq(sz_psz2ind(ZU(PTRDIFF_MAX)+1), SC_NPSIZES,
+	expect_u_eq(sz_psz2ind(ZU(PTRDIFF_MAX) + 1), SC_NPSIZES,
 	    "sz_psz2ind() should return NPSIZES on overflow");
 	expect_u_eq(sz_psz2ind(SIZE_T_MAX), SC_NPSIZES,
 	    "sz_psz2ind() should return NPSIZES on overflow");
 
-	expect_zu_eq(sz_psz2u(max_size_class+1), max_psz,
+	expect_zu_eq(sz_psz2u(max_size_class + 1), max_psz,
 	    "sz_psz2u() should return (LARGE_MAXCLASS + PAGE) for unsupported"
 	    " size");
-	expect_zu_eq(sz_psz2u(ZU(PTRDIFF_MAX)+1), max_psz,
+	expect_zu_eq(sz_psz2u(ZU(PTRDIFF_MAX) + 1), max_psz,
 	    "sz_psz2u() should return (LARGE_MAXCLASS + PAGE) for unsupported "
 	    "size");
 	expect_zu_eq(sz_psz2u(SIZE_T_MAX), max_psz,
@@ -215,9 +224,6 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_size_classes,
-	    test_grow_slow_size_classes,
-	    test_psize_classes,
-	    test_overflow);
+	return test(test_size_classes, test_grow_slow_size_classes,
+	    test_psize_classes, test_overflow);
 }
diff --git a/test/unit/slab.c b/test/unit/slab.c
index 70fc5c7d..5c48e762 100644
--- a/test/unit/slab.c
+++ b/test/unit/slab.c
@@ -6,23 +6,22 @@ TEST_BEGIN(test_arena_slab_regind) {
 	szind_t binind;
 
 	for (binind = 0; binind < SC_NBINS; binind++) {
-		size_t regind;
-		edata_t slab;
+		size_t            regind;
+		edata_t           slab;
 		const bin_info_t *bin_info = &bin_infos[binind];
 		edata_init(&slab, INVALID_ARENA_IND,
 		    mallocx(bin_info->slab_size, MALLOCX_LG_ALIGN(LG_PAGE)),
-		    bin_info->slab_size, true,
-		    binind, 0, extent_state_active, false, true, EXTENT_PAI_PAC,
-		    EXTENT_NOT_HEAD);
-		expect_ptr_not_null(edata_addr_get(&slab),
-		    "Unexpected malloc() failure");
+		    bin_info->slab_size, true, binind, 0, extent_state_active,
+		    false, true, EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
+		expect_ptr_not_null(
+		    edata_addr_get(&slab), "Unexpected malloc() failure");
 		arena_dalloc_bin_locked_info_t dalloc_info;
 		arena_dalloc_bin_locked_begin(&dalloc_info, binind);
 		for (regind = 0; regind < bin_info->nregs; regind++) {
-			void *reg = (void *)((uintptr_t)edata_addr_get(&slab) +
-			    (bin_info->reg_size * regind));
-			expect_zu_eq(arena_slab_regind(&dalloc_info, binind,
-			    &slab, reg),
+			void *reg = (void *)((uintptr_t)edata_addr_get(&slab)
+			    + (bin_info->reg_size * regind));
+			expect_zu_eq(
+			    arena_slab_regind(&dalloc_info, binind, &slab, reg),
 			    regind,
 			    "Incorrect region index computed for size %zu",
 			    bin_info->reg_size);
@@ -34,6 +33,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_arena_slab_regind);
+	return test(test_arena_slab_regind);
 }
diff --git a/test/unit/smoothstep.c b/test/unit/smoothstep.c
index 588c9f44..3686ca74 100644
--- a/test/unit/smoothstep.c
+++ b/test/unit/smoothstep.c
@@ -1,9 +1,8 @@
 #include "test/jemalloc_test.h"
 
 static const uint64_t smoothstep_tab[] = {
-#define STEP(step, h, x, y)			\
-	h,
-	SMOOTHSTEP
+#define STEP(step, h, x, y) h,
+    SMOOTHSTEP
 #undef STEP
 };
 
@@ -23,14 +22,14 @@ TEST_BEGIN(test_smoothstep_integral) {
 		sum += smoothstep_tab[i];
 	}
 
-	max = (KQU(1) << (SMOOTHSTEP_BFP-1)) * (SMOOTHSTEP_NSTEPS+1);
+	max = (KQU(1) << (SMOOTHSTEP_BFP - 1)) * (SMOOTHSTEP_NSTEPS + 1);
 	min = max - SMOOTHSTEP_NSTEPS;
 
-	expect_u64_ge(sum, min,
-	    "Integral too small, even accounting for truncation");
+	expect_u64_ge(
+	    sum, min, "Integral too small, even accounting for truncation");
 	expect_u64_le(sum, max, "Integral exceeds 1/2");
 	if (false) {
-		malloc_printf("%"FMTu64" ulps under 1/2 (limit %d)\n",
+		malloc_printf("%" FMTu64 " ulps under 1/2 (limit %d)\n",
 		    max - sum, SMOOTHSTEP_NSTEPS);
 	}
 }
@@ -52,7 +51,7 @@ TEST_BEGIN(test_smoothstep_monotonic) {
 		expect_u64_ge(h, prev_h, "Piecewise non-monotonic, i=%u", i);
 		prev_h = h;
 	}
-	expect_u64_eq(smoothstep_tab[SMOOTHSTEP_NSTEPS-1],
+	expect_u64_eq(smoothstep_tab[SMOOTHSTEP_NSTEPS - 1],
 	    (KQU(1) << SMOOTHSTEP_BFP), "Last step must equal 1");
 }
 TEST_END
@@ -74,19 +73,21 @@ TEST_BEGIN(test_smoothstep_slope) {
 		uint64_t delta = h - prev_h;
 		expect_u64_ge(delta, prev_delta,
 		    "Slope must monotonically increase in 0.0 <= x <= 0.5, "
-		    "i=%u", i);
+		    "i=%u",
+		    i);
 		prev_h = h;
 		prev_delta = delta;
 	}
 
 	prev_h = KQU(1) << SMOOTHSTEP_BFP;
 	prev_delta = 0;
-	for (i = SMOOTHSTEP_NSTEPS-1; i >= SMOOTHSTEP_NSTEPS / 2; i--) {
+	for (i = SMOOTHSTEP_NSTEPS - 1; i >= SMOOTHSTEP_NSTEPS / 2; i--) {
 		uint64_t h = smoothstep_tab[i];
 		uint64_t delta = prev_h - h;
 		expect_u64_ge(delta, prev_delta,
 		    "Slope must monotonically decrease in 0.5 <= x <= 1.0, "
-		    "i=%u", i);
+		    "i=%u",
+		    i);
 		prev_h = h;
 		prev_delta = delta;
 	}
@@ -95,8 +96,6 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_smoothstep_integral,
-	    test_smoothstep_monotonic,
+	return test(test_smoothstep_integral, test_smoothstep_monotonic,
 	    test_smoothstep_slope);
 }
diff --git a/test/unit/spin.c b/test/unit/spin.c
index b965f742..6dbd0dd1 100644
--- a/test/unit/spin.c
+++ b/test/unit/spin.c
@@ -13,6 +13,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_spin);
+	return test(test_spin);
 }
diff --git a/test/unit/stats.c b/test/unit/stats.c
index 584a582f..26516fa8 100644
--- a/test/unit/stats.c
+++ b/test/unit/stats.c
@@ -4,13 +4,14 @@
 #define STRINGIFY(x) STRINGIFY_HELPER(x)
 
 TEST_BEGIN(test_stats_summary) {
-	size_t sz, allocated, active, resident, mapped,
-	    metadata, metadata_edata, metadata_rtree;
+	size_t sz, allocated, active, resident, mapped, metadata,
+	    metadata_edata, metadata_rtree;
 	int expected = config_stats ? 0 : ENOENT;
 
 	sz = sizeof(size_t);
-	expect_d_eq(mallctl("stats.allocated", (void *)&allocated, &sz, NULL,
-	    0), expected, "Unexpected mallctl() result");
+	expect_d_eq(
+	    mallctl("stats.allocated", (void *)&allocated, &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
 	expect_d_eq(mallctl("stats.active", (void *)&active, &sz, NULL, 0),
 	    expected, "Unexpected mallctl() result");
 	expect_d_eq(mallctl("stats.resident", (void *)&resident, &sz, NULL, 0),
@@ -21,17 +22,19 @@ TEST_BEGIN(test_stats_summary) {
 	expect_d_eq(mallctl("stats.metadata", (void *)&metadata, &sz, NULL, 0),
 	    expected, "Unexpected mallctl() result");
 	expect_d_eq(mallctl("stats.metadata_edata", (void *)&metadata_edata,
-	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
+	                &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
 	expect_d_eq(mallctl("stats.metadata_rtree", (void *)&metadata_rtree,
-	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
+	                &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
 
 	if (config_stats) {
 		expect_zu_le(allocated, active,
 		    "allocated should be no larger than active");
-		expect_zu_lt(active, resident,
-		    "active should be less than resident");
-		expect_zu_lt(active, mapped,
-		    "active should be less than mapped");
+		expect_zu_lt(
+		    active, resident, "active should be less than resident");
+		expect_zu_lt(
+		    active, mapped, "active should be less than mapped");
 		expect_zu_le(metadata_edata + metadata_rtree, metadata,
 		    "the sum of metadata_edata and metadata_rtree "
 		    "should be no larger than metadata");
@@ -40,12 +43,12 @@ TEST_BEGIN(test_stats_summary) {
 TEST_END
 
 TEST_BEGIN(test_stats_large) {
-	void *p;
+	void    *p;
 	uint64_t epoch;
-	size_t allocated;
+	size_t   allocated;
 	uint64_t nmalloc, ndalloc, nrequests;
-	size_t sz;
-	int expected = config_stats ? 0 : ENOENT;
+	size_t   sz;
+	int      expected = config_stats ? 0 : ENOENT;
 
 	p = mallocx(SC_SMALL_MAXCLASS + 1, MALLOCX_ARENA(0));
 	expect_ptr_not_null(p, "Unexpected mallocx() failure");
@@ -55,20 +58,22 @@ TEST_BEGIN(test_stats_large) {
 
 	sz = sizeof(size_t);
 	expect_d_eq(mallctl("stats.arenas.0.large.allocated",
-	    (void *)&allocated, &sz, NULL, 0), expected,
-	    "Unexpected mallctl() result");
+	                (void *)&allocated, &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
 	sz = sizeof(uint64_t);
 	expect_d_eq(mallctl("stats.arenas.0.large.nmalloc", (void *)&nmalloc,
-	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
+	                &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
 	expect_d_eq(mallctl("stats.arenas.0.large.ndalloc", (void *)&ndalloc,
-	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
+	                &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
 	expect_d_eq(mallctl("stats.arenas.0.large.nrequests",
-	    (void *)&nrequests, &sz, NULL, 0), expected,
-	    "Unexpected mallctl() result");
+	                (void *)&nrequests, &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
 
 	if (config_stats) {
-		expect_zu_gt(allocated, 0,
-		    "allocated should be greater than zero");
+		expect_zu_gt(
+		    allocated, 0, "allocated should be greater than zero");
 		expect_u64_ge(nmalloc, ndalloc,
 		    "nmalloc should be at least as large as ndalloc");
 		expect_u64_le(nmalloc, nrequests,
@@ -80,18 +85,17 @@ TEST_BEGIN(test_stats_large) {
 TEST_END
 
 TEST_BEGIN(test_stats_arenas_summary) {
-	void *little, *large;
+	void    *little, *large;
 	uint64_t epoch;
-	size_t sz;
-	int expected = config_stats ? 0 : ENOENT;
-	size_t mapped;
+	size_t   sz;
+	int      expected = config_stats ? 0 : ENOENT;
+	size_t   mapped;
 	uint64_t dirty_npurge, dirty_nmadvise, dirty_purged;
 	uint64_t muzzy_npurge, muzzy_nmadvise, muzzy_purged;
 
 	little = mallocx(SC_SMALL_MAXCLASS, MALLOCX_ARENA(0));
 	expect_ptr_not_null(little, "Unexpected mallocx() failure");
-	large = mallocx((1U << SC_LG_LARGE_MINCLASS),
-	    MALLOCX_ARENA(0));
+	large = mallocx((1U << SC_LG_LARGE_MINCLASS), MALLOCX_ARENA(0));
 	expect_ptr_not_null(large, "Unexpected mallocx() failure");
 
 	dallocx(little, 0);
@@ -106,28 +110,29 @@ TEST_BEGIN(test_stats_arenas_summary) {
 	    0, "Unexpected mallctl() failure");
 
 	sz = sizeof(size_t);
-	expect_d_eq(mallctl("stats.arenas.0.mapped", (void *)&mapped, &sz, NULL,
-	    0), expected, "Unexepected mallctl() result");
+	expect_d_eq(
+	    mallctl("stats.arenas.0.mapped", (void *)&mapped, &sz, NULL, 0),
+	    expected, "Unexepected mallctl() result");
 
 	sz = sizeof(uint64_t);
 	expect_d_eq(mallctl("stats.arenas.0.dirty_npurge",
-	    (void *)&dirty_npurge, &sz, NULL, 0), expected,
-	    "Unexepected mallctl() result");
+	                (void *)&dirty_npurge, &sz, NULL, 0),
+	    expected, "Unexepected mallctl() result");
 	expect_d_eq(mallctl("stats.arenas.0.dirty_nmadvise",
-	    (void *)&dirty_nmadvise, &sz, NULL, 0), expected,
-	    "Unexepected mallctl() result");
+	                (void *)&dirty_nmadvise, &sz, NULL, 0),
+	    expected, "Unexepected mallctl() result");
 	expect_d_eq(mallctl("stats.arenas.0.dirty_purged",
-	    (void *)&dirty_purged, &sz, NULL, 0), expected,
-	    "Unexepected mallctl() result");
+	                (void *)&dirty_purged, &sz, NULL, 0),
+	    expected, "Unexepected mallctl() result");
 	expect_d_eq(mallctl("stats.arenas.0.muzzy_npurge",
-	    (void *)&muzzy_npurge, &sz, NULL, 0), expected,
-	    "Unexepected mallctl() result");
+	                (void *)&muzzy_npurge, &sz, NULL, 0),
+	    expected, "Unexepected mallctl() result");
 	expect_d_eq(mallctl("stats.arenas.0.muzzy_nmadvise",
-	    (void *)&muzzy_nmadvise, &sz, NULL, 0), expected,
-	    "Unexepected mallctl() result");
+	                (void *)&muzzy_nmadvise, &sz, NULL, 0),
+	    expected, "Unexepected mallctl() result");
 	expect_d_eq(mallctl("stats.arenas.0.muzzy_purged",
-	    (void *)&muzzy_purged, &sz, NULL, 0), expected,
-	    "Unexepected mallctl() result");
+	                (void *)&muzzy_purged, &sz, NULL, 0),
+	    expected, "Unexepected mallctl() result");
 
 	if (config_stats) {
 		if (!is_background_thread_enabled() && !opt_hpa) {
@@ -156,10 +161,10 @@ no_lazy_lock(void) {
 }
 
 TEST_BEGIN(test_stats_arenas_small) {
-	void *p;
-	size_t sz, allocated;
+	void    *p;
+	size_t   sz, allocated;
 	uint64_t epoch, nmalloc, ndalloc, nrequests;
-	int expected = config_stats ? 0 : ENOENT;
+	int      expected = config_stats ? 0 : ENOENT;
 
 	no_lazy_lock(); /* Lazy locking would dodge tcache testing. */
 
@@ -174,26 +179,28 @@ TEST_BEGIN(test_stats_arenas_small) {
 
 	sz = sizeof(size_t);
 	expect_d_eq(mallctl("stats.arenas.0.small.allocated",
-	    (void *)&allocated, &sz, NULL, 0), expected,
-	    "Unexpected mallctl() result");
+	                (void *)&allocated, &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
 	sz = sizeof(uint64_t);
 	expect_d_eq(mallctl("stats.arenas.0.small.nmalloc", (void *)&nmalloc,
-	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
+	                &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
 	expect_d_eq(mallctl("stats.arenas.0.small.ndalloc", (void *)&ndalloc,
-	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
+	                &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
 	expect_d_eq(mallctl("stats.arenas.0.small.nrequests",
-	    (void *)&nrequests, &sz, NULL, 0), expected,
-	    "Unexpected mallctl() result");
+	                (void *)&nrequests, &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
 
 	if (config_stats) {
-		expect_zu_gt(allocated, 0,
-		    "allocated should be greater than zero");
-		expect_u64_gt(nmalloc, 0,
-		    "nmalloc should be no greater than zero");
+		expect_zu_gt(
+		    allocated, 0, "allocated should be greater than zero");
+		expect_u64_gt(
+		    nmalloc, 0, "nmalloc should be no greater than zero");
 		expect_u64_ge(nmalloc, ndalloc,
 		    "nmalloc should be at least as large as ndalloc");
-		expect_u64_gt(nrequests, 0,
-		    "nrequests should be greater than zero");
+		expect_u64_gt(
+		    nrequests, 0, "nrequests should be greater than zero");
 	}
 
 	dallocx(p, 0);
@@ -201,16 +208,16 @@ TEST_BEGIN(test_stats_arenas_small) {
 TEST_END
 
 TEST_BEGIN(test_stats_arenas_large) {
-	void *p;
-	size_t sz, allocated, allocated_before;
+	void    *p;
+	size_t   sz, allocated, allocated_before;
 	uint64_t epoch, nmalloc, ndalloc;
-	size_t malloc_size = (1U << (SC_LG_LARGE_MINCLASS + 1)) + 1;
-	int expected = config_stats ? 0 : ENOENT;
+	size_t   malloc_size = (1U << (SC_LG_LARGE_MINCLASS + 1)) + 1;
+	int      expected = config_stats ? 0 : ENOENT;
 
 	sz = sizeof(size_t);
 	expect_d_eq(mallctl("stats.arenas.0.large.allocated",
-	    (void *)&allocated_before, &sz, NULL, 0), expected,
-	    "Unexpected mallctl() result");
+	                (void *)&allocated_before, &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
 
 	p = mallocx(malloc_size, MALLOCX_ARENA(0));
 	expect_ptr_not_null(p, "Unexpected mallocx() failure");
@@ -219,21 +226,23 @@ TEST_BEGIN(test_stats_arenas_large) {
 	    0, "Unexpected mallctl() failure");
 
 	expect_d_eq(mallctl("stats.arenas.0.large.allocated",
-	    (void *)&allocated, &sz, NULL, 0), expected,
-	    "Unexpected mallctl() result");
+	                (void *)&allocated, &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
 	sz = sizeof(uint64_t);
 	expect_d_eq(mallctl("stats.arenas.0.large.nmalloc", (void *)&nmalloc,
-	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
+	                &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
 	expect_d_eq(mallctl("stats.arenas.0.large.ndalloc", (void *)&ndalloc,
-	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
+	                &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
 
 	if (config_stats) {
 		expect_zu_ge(allocated_before, 0,
 		    "allocated should be greater than zero");
 		expect_zu_ge(allocated - allocated_before, sz_s2u(malloc_size),
 		    "the diff between allocated should be greater than the allocation made");
-		expect_u64_gt(nmalloc, 0,
-		    "nmalloc should be greater than zero");
+		expect_u64_gt(
+		    nmalloc, 0, "nmalloc should be greater than zero");
 		expect_u64_ge(nmalloc, ndalloc,
 		    "nmalloc should be at least as large as ndalloc");
 	}
@@ -248,11 +257,11 @@ gen_mallctl_str(char *cmd, char *name, unsigned arena_ind) {
 }
 
 TEST_BEGIN(test_stats_arenas_bins) {
-	void *p;
-	size_t sz, curslabs, curregs, nonfull_slabs;
+	void    *p;
+	size_t   sz, curslabs, curregs, nonfull_slabs;
 	uint64_t epoch, nmalloc, ndalloc, nrequests, nfills, nflushes;
 	uint64_t nslabs, nreslabs;
-	int expected = config_stats ? 0 : ENOENT;
+	int      expected = config_stats ? 0 : ENOENT;
 
 	/* Make sure allocation below isn't satisfied by tcache. */
 	expect_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
@@ -264,8 +273,8 @@ TEST_BEGIN(test_stats_arenas_bins) {
 	    0, "Arena creation failure");
 	sz = sizeof(arena_ind);
 	expect_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz,
-	    (void *)&arena_ind, sizeof(arena_ind)), 0,
-	    "Unexpected mallctl() failure");
+	                (void *)&arena_ind, sizeof(arena_ind)),
+	    0, "Unexpected mallctl() failure");
 
 	p = malloc(bin_infos[0].reg_size);
 	expect_ptr_not_null(p, "Unexpected malloc() failure");
@@ -315,26 +324,25 @@ TEST_BEGIN(test_stats_arenas_bins) {
 	    expected, "Unexpected mallctl() result");
 
 	if (config_stats) {
-		expect_u64_gt(nmalloc, 0,
-		    "nmalloc should be greater than zero");
+		expect_u64_gt(
+		    nmalloc, 0, "nmalloc should be greater than zero");
 		expect_u64_ge(nmalloc, ndalloc,
 		    "nmalloc should be at least as large as ndalloc");
-		expect_u64_gt(nrequests, 0,
-		    "nrequests should be greater than zero");
-		expect_zu_gt(curregs, 0,
-		    "allocated should be greater than zero");
+		expect_u64_gt(
+		    nrequests, 0, "nrequests should be greater than zero");
+		expect_zu_gt(
+		    curregs, 0, "allocated should be greater than zero");
 		if (opt_tcache) {
 			expect_u64_gt(nfills, 0,
 			    "At least one fill should have occurred");
 			expect_u64_gt(nflushes, 0,
 			    "At least one flush should have occurred");
 		}
-		expect_u64_gt(nslabs, 0,
-		    "At least one slab should have been allocated");
+		expect_u64_gt(
+		    nslabs, 0, "At least one slab should have been allocated");
 		expect_zu_gt(curslabs, 0,
 		    "At least one slab should be currently allocated");
-		expect_zu_eq(nonfull_slabs, 0,
-		    "slabs_nonfull should be empty");
+		expect_zu_eq(nonfull_slabs, 0, "slabs_nonfull should be empty");
 	}
 
 	dallocx(p, 0);
@@ -342,14 +350,15 @@ TEST_BEGIN(test_stats_arenas_bins) {
 TEST_END
 
 TEST_BEGIN(test_stats_arenas_lextents) {
-	void *p;
+	void    *p;
 	uint64_t epoch, nmalloc, ndalloc;
-	size_t curlextents, sz, hsize;
-	int expected = config_stats ? 0 : ENOENT;
+	size_t   curlextents, sz, hsize;
+	int      expected = config_stats ? 0 : ENOENT;
 
 	sz = sizeof(size_t);
-	expect_d_eq(mallctl("arenas.lextent.0.size", (void *)&hsize, &sz, NULL,
-	    0), 0, "Unexpected mallctl() failure");
+	expect_d_eq(
+	    mallctl("arenas.lextent.0.size", (void *)&hsize, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
 
 	p = mallocx(hsize, MALLOCX_ARENA(0));
 	expect_ptr_not_null(p, "Unexpected mallocx() failure");
@@ -359,19 +368,19 @@ TEST_BEGIN(test_stats_arenas_lextents) {
 
 	sz = sizeof(uint64_t);
 	expect_d_eq(mallctl("stats.arenas.0.lextents.0.nmalloc",
-	    (void *)&nmalloc, &sz, NULL, 0), expected,
-	    "Unexpected mallctl() result");
+	                (void *)&nmalloc, &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
 	expect_d_eq(mallctl("stats.arenas.0.lextents.0.ndalloc",
-	    (void *)&ndalloc, &sz, NULL, 0), expected,
-	    "Unexpected mallctl() result");
+	                (void *)&ndalloc, &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
 	sz = sizeof(size_t);
 	expect_d_eq(mallctl("stats.arenas.0.lextents.0.curlextents",
-	    (void *)&curlextents, &sz, NULL, 0), expected,
-	    "Unexpected mallctl() result");
+	                (void *)&curlextents, &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
 
 	if (config_stats) {
-		expect_u64_gt(nmalloc, 0,
-		    "nmalloc should be greater than zero");
+		expect_u64_gt(
+		    nmalloc, 0, "nmalloc should be greater than zero");
 		expect_u64_ge(nmalloc, ndalloc,
 		    "nmalloc should be at least as large as ndalloc");
 		expect_u64_gt(curlextents, 0,
@@ -385,35 +394,37 @@ TEST_END
 static void
 test_tcache_bytes_for_usize(size_t usize) {
 	uint64_t epoch;
-	size_t tcache_bytes, tcache_stashed_bytes;
-	size_t sz = sizeof(tcache_bytes);
+	size_t   tcache_bytes, tcache_stashed_bytes;
+	size_t   sz = sizeof(tcache_bytes);
 
 	void *ptr = mallocx(usize, 0);
 
 	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
 	    0, "Unexpected mallctl() failure");
-	assert_d_eq(mallctl(
-	    "stats.arenas." STRINGIFY(MALLCTL_ARENAS_ALL) ".tcache_bytes",
-	    &tcache_bytes, &sz, NULL, 0), 0, "Unexpected mallctl failure");
-	assert_d_eq(mallctl(
-	    "stats.arenas." STRINGIFY(MALLCTL_ARENAS_ALL)
-	    ".tcache_stashed_bytes", &tcache_stashed_bytes, &sz, NULL, 0), 0,
-	    "Unexpected mallctl failure");
+	assert_d_eq(mallctl("stats.arenas." STRINGIFY(
+	                        MALLCTL_ARENAS_ALL) ".tcache_bytes",
+	                &tcache_bytes, &sz, NULL, 0),
+	    0, "Unexpected mallctl failure");
+	assert_d_eq(mallctl("stats.arenas." STRINGIFY(
+	                        MALLCTL_ARENAS_ALL) ".tcache_stashed_bytes",
+	                &tcache_stashed_bytes, &sz, NULL, 0),
+	    0, "Unexpected mallctl failure");
 	size_t tcache_bytes_before = tcache_bytes + tcache_stashed_bytes;
 	dallocx(ptr, 0);
 
 	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
 	    0, "Unexpected mallctl() failure");
-	assert_d_eq(mallctl(
-	    "stats.arenas." STRINGIFY(MALLCTL_ARENAS_ALL) ".tcache_bytes",
-	    &tcache_bytes, &sz, NULL, 0), 0, "Unexpected mallctl failure");
-	assert_d_eq(mallctl(
-	    "stats.arenas." STRINGIFY(MALLCTL_ARENAS_ALL)
-	    ".tcache_stashed_bytes", &tcache_stashed_bytes, &sz, NULL, 0), 0,
-	    "Unexpected mallctl failure");
+	assert_d_eq(mallctl("stats.arenas." STRINGIFY(
+	                        MALLCTL_ARENAS_ALL) ".tcache_bytes",
+	                &tcache_bytes, &sz, NULL, 0),
+	    0, "Unexpected mallctl failure");
+	assert_d_eq(mallctl("stats.arenas." STRINGIFY(
+	                        MALLCTL_ARENAS_ALL) ".tcache_stashed_bytes",
+	                &tcache_stashed_bytes, &sz, NULL, 0),
+	    0, "Unexpected mallctl failure");
 	size_t tcache_bytes_after = tcache_bytes + tcache_stashed_bytes;
-	assert_zu_eq(tcache_bytes_after - tcache_bytes_before,
-	    usize, "Incorrectly attributed a free");
+	assert_zu_eq(tcache_bytes_after - tcache_bytes_before, usize,
+	    "Incorrectly attributed a free");
 }
 
 TEST_BEGIN(test_stats_tcache_bytes_small) {
@@ -436,14 +447,9 @@ TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_stats_summary,
-	    test_stats_large,
-	    test_stats_arenas_summary,
-	    test_stats_arenas_small,
-	    test_stats_arenas_large,
-	    test_stats_arenas_bins,
-	    test_stats_arenas_lextents,
-	    test_stats_tcache_bytes_small,
+	return test_no_reentrancy(test_stats_summary, test_stats_large,
+	    test_stats_arenas_summary, test_stats_arenas_small,
+	    test_stats_arenas_large, test_stats_arenas_bins,
+	    test_stats_arenas_lextents, test_stats_tcache_bytes_small,
 	    test_stats_tcache_bytes_large);
 }
diff --git a/test/unit/stats_print.c b/test/unit/stats_print.c
index 3b317753..e611369c 100644
--- a/test/unit/stats_print.c
+++ b/test/unit/stats_print.c
@@ -21,22 +21,22 @@ typedef enum {
 
 typedef struct parser_s parser_t;
 typedef struct {
-	parser_t	*parser;
-	token_type_t	token_type;
-	size_t		pos;
-	size_t		len;
-	size_t		line;
-	size_t		col;
+	parser_t    *parser;
+	token_type_t token_type;
+	size_t       pos;
+	size_t       len;
+	size_t       line;
+	size_t       col;
 } token_t;
 
 struct parser_s {
-	bool verbose;
-	char	*buf; /* '\0'-terminated. */
-	size_t	len; /* Number of characters preceding '\0' in buf. */
-	size_t	pos;
-	size_t	line;
-	size_t	col;
-	token_t	token;
+	bool    verbose;
+	char   *buf; /* '\0'-terminated. */
+	size_t  len; /* Number of characters preceding '\0' in buf. */
+	size_t  pos;
+	size_t  line;
+	size_t  col;
+	token_t token;
 };
 
 static void
@@ -63,12 +63,12 @@ token_error(token_t *token) {
 		    token->line, token->col);
 		break;
 	default:
-		malloc_printf("%zu:%zu: Unexpected token: ", token->line,
-		    token->col);
+		malloc_printf(
+		    "%zu:%zu: Unexpected token: ", token->line, token->col);
 		break;
 	}
-	UNUSED ssize_t err = malloc_write_fd(STDERR_FILENO,
-	    &token->parser->buf[token->pos], token->len);
+	UNUSED ssize_t err = malloc_write_fd(
+	    STDERR_FILENO, &token->parser->buf[token->pos], token->len);
 	malloc_printf("\n");
 }
 
@@ -92,9 +92,9 @@ parser_fini(parser_t *parser) {
 static bool
 parser_append(parser_t *parser, const char *str) {
 	size_t len = strlen(str);
-	char *buf = (parser->buf == NULL) ? mallocx(len + 1,
-	    MALLOCX_TCACHE_NONE) : rallocx(parser->buf, parser->len + len + 1,
-	    MALLOCX_TCACHE_NONE);
+	char  *buf = (parser->buf == NULL)
+	     ? mallocx(len + 1, MALLOCX_TCACHE_NONE)
+	     : rallocx(parser->buf, parser->len + len + 1, MALLOCX_TCACHE_NONE);
 	if (buf == NULL) {
 		return true;
 	}
@@ -109,9 +109,19 @@ parser_tokenize(parser_t *parser) {
 	enum {
 		STATE_START,
 		STATE_EOI,
-		STATE_N, STATE_NU, STATE_NUL, STATE_NULL,
-		STATE_F, STATE_FA, STATE_FAL, STATE_FALS, STATE_FALSE,
-		STATE_T, STATE_TR, STATE_TRU, STATE_TRUE,
+		STATE_N,
+		STATE_NU,
+		STATE_NUL,
+		STATE_NULL,
+		STATE_F,
+		STATE_FA,
+		STATE_FAL,
+		STATE_FALS,
+		STATE_FALSE,
+		STATE_T,
+		STATE_TR,
+		STATE_TRU,
+		STATE_TRUE,
 		STATE_LBRACKET,
 		STATE_RBRACKET,
 		STATE_LBRACE,
@@ -120,7 +130,10 @@ parser_tokenize(parser_t *parser) {
 		STATE_COMMA,
 		STATE_CHARS,
 		STATE_CHAR_ESCAPE,
-		STATE_CHAR_U, STATE_CHAR_UD, STATE_CHAR_UDD, STATE_CHAR_UDDD,
+		STATE_CHAR_U,
+		STATE_CHAR_UD,
+		STATE_CHAR_UDD,
+		STATE_CHAR_UDDD,
 		STATE_STRING,
 		STATE_MINUS,
 		STATE_LEADING_ZERO,
@@ -132,12 +145,12 @@ parser_tokenize(parser_t *parser) {
 		STATE_EXP_DIGITS,
 		STATE_ACCEPT
 	} state = STATE_START;
-	size_t token_pos JEMALLOC_CC_SILENCE_INIT(0);
+	size_t token_pos  JEMALLOC_CC_SILENCE_INIT(0);
 	size_t token_line JEMALLOC_CC_SILENCE_INIT(1);
-	size_t token_col JEMALLOC_CC_SILENCE_INIT(0);
+	size_t token_col  JEMALLOC_CC_SILENCE_INIT(0);
 
-	expect_zu_le(parser->pos, parser->len,
-	    "Position is past end of buffer");
+	expect_zu_le(
+	    parser->pos, parser->len, "Position is past end of buffer");
 
 	while (state != STATE_ACCEPT) {
 		char c = parser->buf[parser->pos];
@@ -148,7 +161,11 @@ parser_tokenize(parser_t *parser) {
 			token_line = parser->line;
 			token_col = parser->col;
 			switch (c) {
-			case ' ': case '\b': case '\n': case '\r': case '\t':
+			case ' ':
+			case '\b':
+			case '\n':
+			case '\r':
+			case '\t':
 				break;
 			case '\0':
 				state = STATE_EOI;
@@ -189,21 +206,29 @@ parser_tokenize(parser_t *parser) {
 			case '0':
 				state = STATE_LEADING_ZERO;
 				break;
-			case '1': case '2': case '3': case '4':
-			case '5': case '6': case '7': case '8': case '9':
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+			case '8':
+			case '9':
 				state = STATE_DIGITS;
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
 		case STATE_EOI:
-			token_init(&parser->token, parser,
-			    TOKEN_TYPE_EOI, token_pos, parser->pos -
-			    token_pos, token_line, token_col);
+			token_init(&parser->token, parser, TOKEN_TYPE_EOI,
+			    token_pos, parser->pos - token_pos, token_line,
+			    token_col);
 			state = STATE_ACCEPT;
 			break;
 		case STATE_N:
@@ -213,8 +238,9 @@ parser_tokenize(parser_t *parser) {
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
@@ -225,8 +251,9 @@ parser_tokenize(parser_t *parser) {
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
@@ -237,22 +264,32 @@ parser_tokenize(parser_t *parser) {
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
 		case STATE_NULL:
 			switch (c) {
-			case ' ': case '\b': case '\n': case '\r': case '\t':
+			case ' ':
+			case '\b':
+			case '\n':
+			case '\r':
+			case '\t':
 			case '\0':
-			case '[': case ']': case '{': case '}': case ':':
+			case '[':
+			case ']':
+			case '{':
+			case '}':
+			case ':':
 			case ',':
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			token_init(&parser->token, parser, TOKEN_TYPE_NULL,
@@ -267,8 +304,9 @@ parser_tokenize(parser_t *parser) {
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
@@ -279,8 +317,9 @@ parser_tokenize(parser_t *parser) {
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
@@ -291,8 +330,9 @@ parser_tokenize(parser_t *parser) {
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
@@ -303,27 +343,37 @@ parser_tokenize(parser_t *parser) {
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
 		case STATE_FALSE:
 			switch (c) {
-			case ' ': case '\b': case '\n': case '\r': case '\t':
+			case ' ':
+			case '\b':
+			case '\n':
+			case '\r':
+			case '\t':
 			case '\0':
-			case '[': case ']': case '{': case '}': case ':':
+			case '[':
+			case ']':
+			case '{':
+			case '}':
+			case ':':
 			case ',':
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
-			token_init(&parser->token, parser,
-			    TOKEN_TYPE_FALSE, token_pos, parser->pos -
-			    token_pos, token_line, token_col);
+			token_init(&parser->token, parser, TOKEN_TYPE_FALSE,
+			    token_pos, parser->pos - token_pos, token_line,
+			    token_col);
 			state = STATE_ACCEPT;
 			break;
 		case STATE_T:
@@ -333,8 +383,9 @@ parser_tokenize(parser_t *parser) {
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
@@ -345,8 +396,9 @@ parser_tokenize(parser_t *parser) {
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
@@ -357,22 +409,32 @@ parser_tokenize(parser_t *parser) {
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
 		case STATE_TRUE:
 			switch (c) {
-			case ' ': case '\b': case '\n': case '\r': case '\t':
+			case ' ':
+			case '\b':
+			case '\n':
+			case '\r':
+			case '\t':
 			case '\0':
-			case '[': case ']': case '{': case '}': case ':':
+			case '[':
+			case ']':
+			case '{':
+			case '}':
+			case ':':
 			case ',':
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			token_init(&parser->token, parser, TOKEN_TYPE_TRUE,
@@ -424,16 +486,42 @@ parser_tokenize(parser_t *parser) {
 			case '"':
 				state = STATE_STRING;
 				break;
-			case 0x00: case 0x01: case 0x02: case 0x03: case 0x04:
-			case 0x05: case 0x06: case 0x07: case 0x08: case 0x09:
-			case 0x0a: case 0x0b: case 0x0c: case 0x0d: case 0x0e:
-			case 0x0f: case 0x10: case 0x11: case 0x12: case 0x13:
-			case 0x14: case 0x15: case 0x16: case 0x17: case 0x18:
-			case 0x19: case 0x1a: case 0x1b: case 0x1c: case 0x1d:
-			case 0x1e: case 0x1f:
+			case 0x00:
+			case 0x01:
+			case 0x02:
+			case 0x03:
+			case 0x04:
+			case 0x05:
+			case 0x06:
+			case 0x07:
+			case 0x08:
+			case 0x09:
+			case 0x0a:
+			case 0x0b:
+			case 0x0c:
+			case 0x0d:
+			case 0x0e:
+			case 0x0f:
+			case 0x10:
+			case 0x11:
+			case 0x12:
+			case 0x13:
+			case 0x14:
+			case 0x15:
+			case 0x16:
+			case 0x17:
+			case 0x18:
+			case 0x19:
+			case 0x1a:
+			case 0x1b:
+			case 0x1c:
+			case 0x1d:
+			case 0x1e:
+			case 0x1f:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			default:
 				break;
@@ -441,8 +529,13 @@ parser_tokenize(parser_t *parser) {
 			break;
 		case STATE_CHAR_ESCAPE:
 			switch (c) {
-			case '"': case '\\': case '/': case 'b': case 'n':
-			case 'r': case 't':
+			case '"':
+			case '\\':
+			case '/':
+			case 'b':
+			case 'n':
+			case 'r':
+			case 't':
 				state = STATE_CHARS;
 				break;
 			case 'u':
@@ -450,76 +543,145 @@ parser_tokenize(parser_t *parser) {
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
 		case STATE_CHAR_U:
 			switch (c) {
-			case '0': case '1': case '2': case '3': case '4':
-			case '5': case '6': case '7': case '8': case '9':
-			case 'a': case 'b': case 'c': case 'd': case 'e':
+			case '0':
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+			case '8':
+			case '9':
+			case 'a':
+			case 'b':
+			case 'c':
+			case 'd':
+			case 'e':
 			case 'f':
-			case 'A': case 'B': case 'C': case 'D': case 'E':
+			case 'A':
+			case 'B':
+			case 'C':
+			case 'D':
+			case 'E':
 			case 'F':
 				state = STATE_CHAR_UD;
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
 		case STATE_CHAR_UD:
 			switch (c) {
-			case '0': case '1': case '2': case '3': case '4':
-			case '5': case '6': case '7': case '8': case '9':
-			case 'a': case 'b': case 'c': case 'd': case 'e':
+			case '0':
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+			case '8':
+			case '9':
+			case 'a':
+			case 'b':
+			case 'c':
+			case 'd':
+			case 'e':
 			case 'f':
-			case 'A': case 'B': case 'C': case 'D': case 'E':
+			case 'A':
+			case 'B':
+			case 'C':
+			case 'D':
+			case 'E':
 			case 'F':
 				state = STATE_CHAR_UDD;
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
 		case STATE_CHAR_UDD:
 			switch (c) {
-			case '0': case '1': case '2': case '3': case '4':
-			case '5': case '6': case '7': case '8': case '9':
-			case 'a': case 'b': case 'c': case 'd': case 'e':
+			case '0':
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+			case '8':
+			case '9':
+			case 'a':
+			case 'b':
+			case 'c':
+			case 'd':
+			case 'e':
 			case 'f':
-			case 'A': case 'B': case 'C': case 'D': case 'E':
+			case 'A':
+			case 'B':
+			case 'C':
+			case 'D':
+			case 'E':
 			case 'F':
 				state = STATE_CHAR_UDDD;
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
 		case STATE_CHAR_UDDD:
 			switch (c) {
-			case '0': case '1': case '2': case '3': case '4':
-			case '5': case '6': case '7': case '8': case '9':
-			case 'a': case 'b': case 'c': case 'd': case 'e':
+			case '0':
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+			case '8':
+			case '9':
+			case 'a':
+			case 'b':
+			case 'c':
+			case 'd':
+			case 'e':
 			case 'f':
-			case 'A': case 'B': case 'C': case 'D': case 'E':
+			case 'A':
+			case 'B':
+			case 'C':
+			case 'D':
+			case 'E':
 			case 'F':
 				state = STATE_CHARS;
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
@@ -534,14 +696,22 @@ parser_tokenize(parser_t *parser) {
 			case '0':
 				state = STATE_LEADING_ZERO;
 				break;
-			case '1': case '2': case '3': case '4':
-			case '5': case '6': case '7': case '8': case '9':
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+			case '8':
+			case '9':
 				state = STATE_DIGITS;
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
@@ -552,95 +722,152 @@ parser_tokenize(parser_t *parser) {
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_NUMBER, token_pos, parser->pos -
-				    token_pos, token_line, token_col);
+				    TOKEN_TYPE_NUMBER, token_pos,
+				    parser->pos - token_pos, token_line,
+				    token_col);
 				state = STATE_ACCEPT;
 				break;
 			}
 			break;
 		case STATE_DIGITS:
 			switch (c) {
-			case '0': case '1': case '2': case '3': case '4':
-			case '5': case '6': case '7': case '8': case '9':
+			case '0':
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+			case '8':
+			case '9':
 				break;
 			case '.':
 				state = STATE_DECIMAL;
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_NUMBER, token_pos, parser->pos -
-				    token_pos, token_line, token_col);
+				    TOKEN_TYPE_NUMBER, token_pos,
+				    parser->pos - token_pos, token_line,
+				    token_col);
 				state = STATE_ACCEPT;
 				break;
 			}
 			break;
 		case STATE_DECIMAL:
 			switch (c) {
-			case '0': case '1': case '2': case '3': case '4':
-			case '5': case '6': case '7': case '8': case '9':
+			case '0':
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+			case '8':
+			case '9':
 				state = STATE_FRAC_DIGITS;
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
 		case STATE_FRAC_DIGITS:
 			switch (c) {
-			case '0': case '1': case '2': case '3': case '4':
-			case '5': case '6': case '7': case '8': case '9':
+			case '0':
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+			case '8':
+			case '9':
 				break;
-			case 'e': case 'E':
+			case 'e':
+			case 'E':
 				state = STATE_EXP;
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_NUMBER, token_pos, parser->pos -
-				    token_pos, token_line, token_col);
+				    TOKEN_TYPE_NUMBER, token_pos,
+				    parser->pos - token_pos, token_line,
+				    token_col);
 				state = STATE_ACCEPT;
 				break;
 			}
 			break;
 		case STATE_EXP:
 			switch (c) {
-			case '-': case '+':
+			case '-':
+			case '+':
 				state = STATE_EXP_SIGN;
 				break;
-			case '0': case '1': case '2': case '3': case '4':
-			case '5': case '6': case '7': case '8': case '9':
+			case '0':
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+			case '8':
+			case '9':
 				state = STATE_EXP_DIGITS;
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
 		case STATE_EXP_SIGN:
 			switch (c) {
-			case '0': case '1': case '2': case '3': case '4':
-			case '5': case '6': case '7': case '8': case '9':
+			case '0':
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+			case '8':
+			case '9':
 				state = STATE_EXP_DIGITS;
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
 		case STATE_EXP_DIGITS:
 			switch (c) {
-			case '0': case '1': case '2': case '3': case '4':
-			case '5': case '6': case '7': case '8': case '9':
+			case '0':
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+			case '8':
+			case '9':
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_NUMBER, token_pos, parser->pos -
-				    token_pos, token_line, token_col);
+				    TOKEN_TYPE_NUMBER, token_pos,
+				    parser->pos - token_pos, token_line,
+				    token_col);
 				state = STATE_ACCEPT;
 				break;
 			}
@@ -662,8 +889,8 @@ parser_tokenize(parser_t *parser) {
 	return false;
 }
 
-static bool	parser_parse_array(parser_t *parser);
-static bool	parser_parse_object(parser_t *parser);
+static bool parser_parse_array(parser_t *parser);
+static bool parser_parse_object(parser_t *parser);
 
 static bool
 parser_parse_value(parser_t *parser) {
@@ -824,80 +1051,80 @@ label_error:
 }
 
 TEST_BEGIN(test_json_parser) {
-	size_t i;
+	size_t      i;
 	const char *invalid_inputs[] = {
-		/* Tokenizer error case tests. */
-		"{ \"string\": X }",
-		"{ \"string\": nXll }",
-		"{ \"string\": nuXl }",
-		"{ \"string\": nulX }",
-		"{ \"string\": nullX }",
-		"{ \"string\": fXlse }",
-		"{ \"string\": faXse }",
-		"{ \"string\": falXe }",
-		"{ \"string\": falsX }",
-		"{ \"string\": falseX }",
-		"{ \"string\": tXue }",
-		"{ \"string\": trXe }",
-		"{ \"string\": truX }",
-		"{ \"string\": trueX }",
-		"{ \"string\": \"\n\" }",
-		"{ \"string\": \"\\z\" }",
-		"{ \"string\": \"\\uX000\" }",
-		"{ \"string\": \"\\u0X00\" }",
-		"{ \"string\": \"\\u00X0\" }",
-		"{ \"string\": \"\\u000X\" }",
-		"{ \"string\": -X }",
-		"{ \"string\": 0.X }",
-		"{ \"string\": 0.0eX }",
-		"{ \"string\": 0.0e+X }",
+	    /* Tokenizer error case tests. */
+	    "{ \"string\": X }",
+	    "{ \"string\": nXll }",
+	    "{ \"string\": nuXl }",
+	    "{ \"string\": nulX }",
+	    "{ \"string\": nullX }",
+	    "{ \"string\": fXlse }",
+	    "{ \"string\": faXse }",
+	    "{ \"string\": falXe }",
+	    "{ \"string\": falsX }",
+	    "{ \"string\": falseX }",
+	    "{ \"string\": tXue }",
+	    "{ \"string\": trXe }",
+	    "{ \"string\": truX }",
+	    "{ \"string\": trueX }",
+	    "{ \"string\": \"\n\" }",
+	    "{ \"string\": \"\\z\" }",
+	    "{ \"string\": \"\\uX000\" }",
+	    "{ \"string\": \"\\u0X00\" }",
+	    "{ \"string\": \"\\u00X0\" }",
+	    "{ \"string\": \"\\u000X\" }",
+	    "{ \"string\": -X }",
+	    "{ \"string\": 0.X }",
+	    "{ \"string\": 0.0eX }",
+	    "{ \"string\": 0.0e+X }",
 
-		/* Parser error test cases. */
-		"{\"string\": }",
-		"{\"string\" }",
-		"{\"string\": [ 0 }",
-		"{\"string\": {\"a\":0, 1 } }",
-		"{\"string\": {\"a\":0: } }",
-		"{",
-		"{}{",
+	    /* Parser error test cases. */
+	    "{\"string\": }",
+	    "{\"string\" }",
+	    "{\"string\": [ 0 }",
+	    "{\"string\": {\"a\":0, 1 } }",
+	    "{\"string\": {\"a\":0: } }",
+	    "{",
+	    "{}{",
 	};
 	const char *valid_inputs[] = {
-		/* Token tests. */
-		"null",
-		"false",
-		"true",
-		"{}",
-		"{\"a\": 0}",
-		"[]",
-		"[0, 1]",
-		"0",
-		"1",
-		"10",
-		"-10",
-		"10.23",
-		"10.23e4",
-		"10.23e-4",
-		"10.23e+4",
-		"10.23E4",
-		"10.23E-4",
-		"10.23E+4",
-		"-10.23",
-		"-10.23e4",
-		"-10.23e-4",
-		"-10.23e+4",
-		"-10.23E4",
-		"-10.23E-4",
-		"-10.23E+4",
-		"\"value\"",
-		"\" \\\" \\/ \\b \\n \\r \\t \\u0abc \\u1DEF \"",
+	    /* Token tests. */
+	    "null",
+	    "false",
+	    "true",
+	    "{}",
+	    "{\"a\": 0}",
+	    "[]",
+	    "[0, 1]",
+	    "0",
+	    "1",
+	    "10",
+	    "-10",
+	    "10.23",
+	    "10.23e4",
+	    "10.23e-4",
+	    "10.23e+4",
+	    "10.23E4",
+	    "10.23E-4",
+	    "10.23E+4",
+	    "-10.23",
+	    "-10.23e4",
+	    "-10.23e-4",
+	    "-10.23e+4",
+	    "-10.23E4",
+	    "-10.23E-4",
+	    "-10.23E+4",
+	    "\"value\"",
+	    "\" \\\" \\/ \\b \\n \\r \\t \\u0abc \\u1DEF \"",
 
-		/* Parser test with various nesting. */
-		"{\"a\":null, \"b\":[1,[{\"c\":2},3]], \"d\":{\"e\":true}}",
+	    /* Parser test with various nesting. */
+	    "{\"a\":null, \"b\":[1,[{\"c\":2},3]], \"d\":{\"e\":true}}",
 	};
 
-	for (i = 0; i < sizeof(invalid_inputs)/sizeof(const char *); i++) {
+	for (i = 0; i < sizeof(invalid_inputs) / sizeof(const char *); i++) {
 		const char *input = invalid_inputs[i];
-		parser_t parser;
+		parser_t    parser;
 		parser_init(&parser, false);
 		expect_false(parser_append(&parser, input),
 		    "Unexpected input appending failure");
@@ -906,9 +1133,9 @@ TEST_BEGIN(test_json_parser) {
 		parser_fini(&parser);
 	}
 
-	for (i = 0; i < sizeof(valid_inputs)/sizeof(const char *); i++) {
+	for (i = 0; i < sizeof(valid_inputs) / sizeof(const char *); i++) {
 		const char *input = valid_inputs[i];
-		parser_t parser;
+		parser_t    parser;
 		parser_init(&parser, true);
 		expect_false(parser_append(&parser, input),
 		    "Unexpected input appending failure");
@@ -929,27 +1156,27 @@ write_cb(void *opaque, const char *str) {
 
 TEST_BEGIN(test_stats_print_json) {
 	const char *opts[] = {
-		"J",
-		"Jg",
-		"Jm",
-		"Jd",
-		"Jmd",
-		"Jgd",
-		"Jgm",
-		"Jgmd",
-		"Ja",
-		"Jb",
-		"Jl",
-		"Jx",
-		"Jbl",
-		"Jal",
-		"Jab",
-		"Jabl",
-		"Jax",
-		"Jbx",
-		"Jlx",
-		"Jablx",
-		"Jgmdablx",
+	    "J",
+	    "Jg",
+	    "Jm",
+	    "Jd",
+	    "Jmd",
+	    "Jgd",
+	    "Jgm",
+	    "Jgmd",
+	    "Ja",
+	    "Jb",
+	    "Jl",
+	    "Jx",
+	    "Jbl",
+	    "Jal",
+	    "Jab",
+	    "Jabl",
+	    "Jax",
+	    "Jbx",
+	    "Jlx",
+	    "Jablx",
+	    "Jgmdablx",
 	};
 	unsigned arena_ind, i;
 
@@ -962,23 +1189,27 @@ TEST_BEGIN(test_stats_print_json) {
 		case 1: {
 			size_t sz = sizeof(arena_ind);
 			expect_d_eq(mallctl("arenas.create", (void *)&arena_ind,
-			    &sz, NULL, 0), 0, "Unexpected mallctl failure");
+			                &sz, NULL, 0),
+			    0, "Unexpected mallctl failure");
 			break;
-		} case 2: {
+		}
+		case 2: {
 			size_t mib[3];
-			size_t miblen = sizeof(mib)/sizeof(size_t);
-			expect_d_eq(mallctlnametomib("arena.0.destroy",
-			    mib, &miblen), 0,
-			    "Unexpected mallctlnametomib failure");
+			size_t miblen = sizeof(mib) / sizeof(size_t);
+			expect_d_eq(
+			    mallctlnametomib("arena.0.destroy", mib, &miblen),
+			    0, "Unexpected mallctlnametomib failure");
 			mib[1] = arena_ind;
-			expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL,
-			    0), 0, "Unexpected mallctlbymib failure");
+			expect_d_eq(
+			    mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+			    "Unexpected mallctlbymib failure");
 			break;
-		} default:
+		}
+		default:
 			not_reached();
 		}
 
-		for (j = 0; j < sizeof(opts)/sizeof(const char *); j++) {
+		for (j = 0; j < sizeof(opts) / sizeof(const char *); j++) {
 			parser_t parser;
 
 			parser_init(&parser, true);
@@ -993,7 +1224,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_json_parser,
-	    test_stats_print_json);
+	return test(test_json_parser, test_stats_print_json);
 }
diff --git a/test/unit/sz.c b/test/unit/sz.c
index 8ae04b92..fa2b8dc0 100644
--- a/test/unit/sz.c
+++ b/test/unit/sz.c
@@ -10,8 +10,8 @@ TEST_BEGIN(test_sz_psz2ind) {
 	for (size_t i = 0; i < SC_NGROUP; i++) {
 		for (size_t psz = i * PAGE + 1; psz <= (i + 1) * PAGE; psz++) {
 			pszind_t ind = sz_psz2ind(psz);
-			expect_zu_eq(ind, i, "Got %u as sz_psz2ind of %zu", ind,
-			    psz);
+			expect_zu_eq(
+			    ind, i, "Got %u as sz_psz2ind of %zu", ind, psz);
 		}
 	}
 
@@ -25,15 +25,14 @@ TEST_BEGIN(test_sz_psz2ind) {
 	 */
 	size_t base_psz = 1 << (SC_LG_NGROUP + LG_PAGE);
 	size_t base_ind = 0;
-	while (base_ind < SC_NSIZES &&
-	    reg_size_compute(data.sc[base_ind].lg_base,
-		data.sc[base_ind].lg_delta,
-		data.sc[base_ind].ndelta) < base_psz) {
+	while (base_ind < SC_NSIZES
+	    && reg_size_compute(data.sc[base_ind].lg_base,
+	           data.sc[base_ind].lg_delta, data.sc[base_ind].ndelta)
+	        < base_psz) {
 		base_ind++;
 	}
-	expect_zu_eq(
-	    reg_size_compute(data.sc[base_ind].lg_base,
-		data.sc[base_ind].lg_delta, data.sc[base_ind].ndelta),
+	expect_zu_eq(reg_size_compute(data.sc[base_ind].lg_base,
+	                 data.sc[base_ind].lg_delta, data.sc[base_ind].ndelta),
 	    base_psz, "Size class equal to %zu not found", base_psz);
 	/*
 	 * Test different sizes falling into groups after the 'base'. The
@@ -42,21 +41,21 @@ TEST_BEGIN(test_sz_psz2ind) {
 	base_ind -= SC_NGROUP;
 	for (size_t psz = base_psz; psz <= 64 * 1024 * 1024; psz += PAGE / 3) {
 		pszind_t ind = sz_psz2ind(psz);
-		sc_t gt_sc = data.sc[ind + base_ind];
+		sc_t     gt_sc = data.sc[ind + base_ind];
 		expect_zu_gt(psz,
-		    reg_size_compute(gt_sc.lg_base, gt_sc.lg_delta,
-			gt_sc.ndelta),
+		    reg_size_compute(
+		        gt_sc.lg_base, gt_sc.lg_delta, gt_sc.ndelta),
 		    "Got %u as sz_psz2ind of %zu", ind, psz);
 		sc_t le_sc = data.sc[ind + base_ind + 1];
 		expect_zu_le(psz,
-		    reg_size_compute(le_sc.lg_base, le_sc.lg_delta,
-			le_sc.ndelta),
+		    reg_size_compute(
+		        le_sc.lg_base, le_sc.lg_delta, le_sc.ndelta),
 		    "Got %u as sz_psz2ind of %zu", ind, psz);
 	}
 
 	pszind_t max_ind = sz_psz2ind(SC_LARGE_MAXCLASS + 1);
-	expect_lu_eq(max_ind, SC_NPSIZES,
-	    "Got %u as sz_psz2ind of %llu", max_ind, SC_LARGE_MAXCLASS);
+	expect_lu_eq(max_ind, SC_NPSIZES, "Got %u as sz_psz2ind of %llu",
+	    max_ind, SC_LARGE_MAXCLASS);
 }
 TEST_END
 
diff --git a/test/unit/tcache_max.c b/test/unit/tcache_max.c
index 884ee7fe..d57b2d3b 100644
--- a/test/unit/tcache_max.c
+++ b/test/unit/tcache_max.c
@@ -69,8 +69,8 @@ tcache_bytes_read_global(void) {
 
 static size_t
 tcache_bytes_read_local(void) {
-	size_t tcache_bytes = 0;
-	tsd_t *tsd = tsd_fetch();
+	size_t    tcache_bytes = 0;
+	tsd_t    *tsd = tsd_fetch();
 	tcache_t *tcache = tcache_get(tsd);
 	for (szind_t i = 0; i < tcache_nbins_get(tcache->tcache_slow); i++) {
 		cache_bin_t *cache_bin = &tcache->bins[i];
@@ -98,7 +98,7 @@ test_tcache_bytes_alloc(size_t alloc_size, size_t tcache_max,
 
 	size_t usize = sz_s2u(alloc_size);
 	/* No change is expected if usize is outside of tcache_max range. */
-	bool cached = (usize <= tcache_max);
+	bool    cached = (usize <= tcache_max);
 	ssize_t diff = cached ? usize : 0;
 
 	void *ptr1 = alloc_func(alloc_size, alloc_option);
@@ -186,7 +186,7 @@ TEST_BEGIN(test_tcache_max) {
 	test_skip_if(san_uaf_detection_enabled());
 
 	unsigned arena_ind, alloc_option, dalloc_option;
-	size_t sz = sizeof(arena_ind);
+	size_t   sz = sizeof(arena_ind);
 	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
 	    0, "Unexpected mallctl() failure");
 	expect_d_eq(
@@ -215,12 +215,12 @@ static void
 validate_tcache_stack(tcache_t *tcache) {
 	/* Assume bins[0] is enabled. */
 	void *tcache_stack = tcache->bins[0].stack_head;
-	bool expect_found = cache_bin_stack_use_thp() ? true : false;
+	bool  expect_found = cache_bin_stack_use_thp() ? true : false;
 
 	/* Walk through all blocks to see if the stack is within range. */
-	base_t *base = b0get();
+	base_t       *base = b0get();
 	base_block_t *next = base->blocks;
-	bool found = false;
+	bool          found = false;
 	do {
 		base_block_t *block = next;
 		if ((byte_t *)tcache_stack >= (byte_t *)block
@@ -237,10 +237,10 @@ validate_tcache_stack(tcache_t *tcache) {
 
 static void *
 tcache_check(void *arg) {
-	size_t old_tcache_max, new_tcache_max, min_tcache_max, sz;
-	unsigned tcache_nbins;
-	tsd_t *tsd = tsd_fetch();
-	tcache_t *tcache = tsd_tcachep_get(tsd);
+	size_t         old_tcache_max, new_tcache_max, min_tcache_max, sz;
+	unsigned       tcache_nbins;
+	tsd_t         *tsd = tsd_fetch();
+	tcache_t      *tcache = tsd_tcachep_get(tsd);
 	tcache_slow_t *tcache_slow = tcache->tcache_slow;
 	sz = sizeof(size_t);
 	new_tcache_max = *(size_t *)arg;
@@ -263,7 +263,7 @@ tcache_check(void *arg) {
 	 * Test an input that is not a valid size class, it should be ceiled
 	 * to a valid size class.
 	 */
-	bool e0 = false, e1;
+	bool   e0 = false, e1;
 	size_t bool_sz = sizeof(bool);
 	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e1, &bool_sz,
 	                (void *)&e0, bool_sz),
diff --git a/test/unit/test_hooks.c b/test/unit/test_hooks.c
index 41e7bf35..47e5fa9e 100644
--- a/test/unit/test_hooks.c
+++ b/test/unit/test_hooks.c
@@ -32,7 +32,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    unhooked_call,
-	    hooked_call);
+	return test(unhooked_call, hooked_call);
 }
diff --git a/test/unit/thread_event.c b/test/unit/thread_event.c
index 66d61cd2..d886c998 100644
--- a/test/unit/thread_event.c
+++ b/test/unit/thread_event.c
@@ -1,20 +1,17 @@
 #include "test/jemalloc_test.h"
 
 static uint32_t nuser_hook_calls;
-static bool is_registered = false;
+static bool     is_registered = false;
 static void
 test_cb(bool is_alloc, uint64_t tallocated, uint64_t tdallocated) {
 	++nuser_hook_calls;
 }
 
 static user_hook_object_t tobj = {
-	.callback = &test_cb,
-	.interval = 10,
-	.is_alloc_only = false
-};
+    .callback = &test_cb, .interval = 10, .is_alloc_only = false};
 
 TEST_BEGIN(test_next_event_fast) {
-	tsd_t *tsd = tsd_fetch();
+	tsd_t   *tsd = tsd_fetch();
 	te_ctx_t ctx;
 	te_ctx_get(tsd, &ctx, true);
 
@@ -23,7 +20,8 @@ TEST_BEGIN(test_next_event_fast) {
 	te_ctx_next_event_set(tsd, &ctx, TE_NEXT_EVENT_FAST_MAX);
 
 	if (!is_registered) {
-		is_registered = 0 == te_register_user_handler(tsd_tsdn(tsd), &tobj);
+		is_registered = 0
+		    == te_register_user_handler(tsd_tsdn(tsd), &tobj);
 	}
 	assert_true(is_registered || !config_stats, "Register user handler");
 	nuser_hook_calls = 0;
@@ -35,7 +33,8 @@ TEST_BEGIN(test_next_event_fast) {
 
 	/* Test next_event_fast rolling back to 0. */
 	void *p = malloc(16U);
-	assert_true(nuser_hook_calls == 1 || !config_stats, "Expected alloc call");
+	assert_true(
+	    nuser_hook_calls == 1 || !config_stats, "Expected alloc call");
 	assert_ptr_not_null(p, "malloc() failed");
 	free(p);
 
@@ -48,6 +47,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_next_event_fast);
+	return test(test_next_event_fast);
 }
diff --git a/test/unit/ticker.c b/test/unit/ticker.c
index c4147a0c..31a2b8e0 100644
--- a/test/unit/ticker.c
+++ b/test/unit/ticker.c
@@ -6,7 +6,7 @@ TEST_BEGIN(test_ticker_tick) {
 #define NREPS 2
 #define NTICKS 3
 	ticker_t ticker;
-	int32_t i, j;
+	int32_t  i, j;
 
 	ticker_init(&ticker, NTICKS);
 	for (i = 0; i < NREPS; i++) {
@@ -16,12 +16,12 @@ TEST_BEGIN(test_ticker_tick) {
 			expect_false(ticker_tick(&ticker, false),
 			    "Unexpected ticker fire (i=%d, j=%d)", i, j);
 		}
-		expect_u32_eq(ticker_read(&ticker), 0,
-		    "Expected ticker depletion");
+		expect_u32_eq(
+		    ticker_read(&ticker), 0, "Expected ticker depletion");
 		expect_true(ticker_tick(&ticker, false),
 		    "Expected ticker fire (i=%d)", i);
-		expect_u32_eq(ticker_read(&ticker), NTICKS,
-		    "Expected ticker reset");
+		expect_u32_eq(
+		    ticker_read(&ticker), NTICKS, "Expected ticker reset");
 	}
 #undef NTICKS
 }
@@ -34,15 +34,15 @@ TEST_BEGIN(test_ticker_ticks) {
 	ticker_init(&ticker, NTICKS);
 
 	expect_u_eq(ticker_read(&ticker), NTICKS, "Unexpected ticker value");
-	expect_false(ticker_ticks(&ticker, NTICKS, false),
-	    "Unexpected ticker fire");
+	expect_false(
+	    ticker_ticks(&ticker, NTICKS, false), "Unexpected ticker fire");
 	expect_u_eq(ticker_read(&ticker), 0, "Unexpected ticker value");
-	expect_true(ticker_ticks(&ticker, NTICKS, false),
-	    "Expected ticker fire");
+	expect_true(
+	    ticker_ticks(&ticker, NTICKS, false), "Expected ticker fire");
 	expect_u_eq(ticker_read(&ticker), NTICKS, "Unexpected ticker value");
 
-	expect_true(ticker_ticks(&ticker, NTICKS + 1, false),
-	    "Expected ticker fire");
+	expect_true(
+	    ticker_ticks(&ticker, NTICKS + 1, false), "Expected ticker fire");
 	expect_u_eq(ticker_read(&ticker), NTICKS, "Unexpected ticker value");
 #undef NTICKS
 }
@@ -55,8 +55,8 @@ TEST_BEGIN(test_ticker_copy) {
 	ticker_init(&ta, NTICKS);
 	ticker_copy(&tb, &ta);
 	expect_u_eq(ticker_read(&tb), NTICKS, "Unexpected ticker value");
-	expect_true(ticker_ticks(&tb, NTICKS + 1, false),
-	    "Expected ticker fire");
+	expect_true(
+	    ticker_ticks(&tb, NTICKS + 1, false), "Expected ticker fire");
 	expect_u_eq(ticker_read(&tb), NTICKS, "Unexpected ticker value");
 
 	ticker_tick(&ta, false);
@@ -69,7 +69,7 @@ TEST_BEGIN(test_ticker_copy) {
 TEST_END
 
 TEST_BEGIN(test_ticker_geom) {
-	const int32_t ticks = 100;
+	const int32_t  ticks = 100;
 	const uint64_t niters = 100 * 1000;
 
 	ticker_geom_t ticker;
@@ -78,7 +78,7 @@ TEST_BEGIN(test_ticker_geom) {
 	/* Just some random constant. */
 	uint64_t prng_state = 0x343219f93496db9fULL;
 	for (uint64_t i = 0; i < niters; i++) {
-		while(!ticker_geom_tick(&ticker, &prng_state, false)) {
+		while (!ticker_geom_tick(&ticker, &prng_state, false)) {
 			total_ticks++;
 		}
 	}
@@ -87,15 +87,15 @@ TEST_BEGIN(test_ticker_geom) {
 	 * used at the time this was tested, total_ticks is 95.1% of the
 	 * expected ticks.
 	 */
-	expect_u64_ge(total_ticks , niters * ticks * 9 / 10,
-	    "Mean off by > 10%%");
-	expect_u64_le(total_ticks , niters * ticks * 11 / 10,
-	    "Mean off by > 10%%");
+	expect_u64_ge(
+	    total_ticks, niters * ticks * 9 / 10, "Mean off by > 10%%");
+	expect_u64_le(
+	    total_ticks, niters * ticks * 11 / 10, "Mean off by > 10%%");
 }
 TEST_END
 
 TEST_BEGIN(test_ticker_delay) {
-	const int32_t ticks = 1000;
+	const int32_t  ticks = 1000;
 	const uint64_t niters = 10000;
 
 	ticker_t t1;
@@ -120,22 +120,19 @@ TEST_BEGIN(test_ticker_delay) {
 		expect_false(ticker_geom_tick(&t2, &prng_state, delay),
 		    "Unexpected ticker fire");
 		expect_d_eq(ticker_read(&t1), 0, "Unexpected ticker value");
-		expect_d_eq(ticker_geom_read(&t2), 0, "Unexpected ticker value");
+		expect_d_eq(
+		    ticker_geom_read(&t2), 0, "Unexpected ticker value");
 	}
 
 	delay = false;
 	expect_true(ticker_tick(&t1, delay), "Expected ticker fire");
-	expect_true(ticker_geom_tick(&t2, &prng_state, delay),
-	    "Expected ticker fire");
+	expect_true(
+	    ticker_geom_tick(&t2, &prng_state, delay), "Expected ticker fire");
 }
 TEST_END
 
 int
 main(void) {
-	return test(
-	    test_ticker_tick,
-	    test_ticker_ticks,
-	    test_ticker_copy,
-	    test_ticker_geom,
-	    test_ticker_delay);
+	return test(test_ticker_tick, test_ticker_ticks, test_ticker_copy,
+	    test_ticker_geom, test_ticker_delay);
 }
diff --git a/test/unit/tsd.c b/test/unit/tsd.c
index bb5cd9f6..9610ceac 100644
--- a/test/unit/tsd.c
+++ b/test/unit/tsd.c
@@ -5,7 +5,7 @@
  * be asserting that we're on one.
  */
 static bool originally_fast;
-static int data_cleanup_count;
+static int  data_cleanup_count;
 
 void
 data_cleanup(int *data) {
@@ -45,7 +45,7 @@ data_cleanup(int *data) {
 
 static void *
 thd_start(void *arg) {
-	int d = (int)(uintptr_t)arg;
+	int   d = (int)(uintptr_t)arg;
 	void *p;
 
 	/*
@@ -105,11 +105,10 @@ thd_start_reincarnated(void *arg) {
 	expect_ptr_not_null(p, "Unexpected malloc() failure");
 
 	/* Manually trigger reincarnation. */
-	expect_ptr_not_null(tsd_arena_get(tsd),
-	    "Should have tsd arena set.");
+	expect_ptr_not_null(tsd_arena_get(tsd), "Should have tsd arena set.");
 	tsd_cleanup((void *)tsd);
-	expect_ptr_null(*tsd_arenap_get_unsafe(tsd),
-	    "TSD arena should have been cleared.");
+	expect_ptr_null(
+	    *tsd_arenap_get_unsafe(tsd), "TSD arena should have been cleared.");
 	expect_u_eq(tsd_state_get(tsd), tsd_state_purgatory,
 	    "TSD state should be purgatory\n");
 
@@ -193,7 +192,7 @@ TEST_END
 
 typedef struct {
 	atomic_u32_t phase;
-	atomic_b_t error;
+	atomic_b_t   error;
 } global_slow_data_t;
 
 static void *
@@ -207,8 +206,8 @@ thd_start_global_slow(void *arg) {
 	 * No global slowness has happened yet; there was an error if we were
 	 * originally fast but aren't now.
 	 */
-	atomic_store_b(&data->error, originally_fast && !tsd_fast(tsd),
-	    ATOMIC_SEQ_CST);
+	atomic_store_b(
+	    &data->error, originally_fast && !tsd_fast(tsd), ATOMIC_SEQ_CST);
 	atomic_store_u32(&data->phase, 1, ATOMIC_SEQ_CST);
 
 	/* PHASE 2 */
@@ -241,8 +240,8 @@ thd_start_global_slow(void *arg) {
 	 * Both decrements happened; we should be fast again (if we ever
 	 * were)
 	 */
-	atomic_store_b(&data->error, originally_fast && !tsd_fast(tsd),
-	    ATOMIC_SEQ_CST);
+	atomic_store_b(
+	    &data->error, originally_fast && !tsd_fast(tsd), ATOMIC_SEQ_CST);
 	atomic_store_u32(&data->phase, 9, ATOMIC_SEQ_CST);
 
 	return NULL;
@@ -321,10 +320,7 @@ main(void) {
 		return test_status_fail;
 	}
 
-	return test_no_reentrancy(
-	    test_tsd_main_thread,
-	    test_tsd_sub_thread,
-	    test_tsd_sub_thread_dalloc_only,
-	    test_tsd_reincarnation,
+	return test_no_reentrancy(test_tsd_main_thread, test_tsd_sub_thread,
+	    test_tsd_sub_thread_dalloc_only, test_tsd_reincarnation,
 	    test_tsd_global_slow);
 }
diff --git a/test/unit/uaf.c b/test/unit/uaf.c
index a8433c29..25399ed0 100644
--- a/test/unit/uaf.c
+++ b/test/unit/uaf.c
@@ -11,7 +11,8 @@ const char *malloc_conf = TEST_SAN_UAF_ALIGN_ENABLE;
 static size_t san_uaf_align;
 
 static bool fake_abort_called;
-void fake_abort(const char *message) {
+void
+fake_abort(const char *message) {
 	(void)message;
 	fake_abort_called = true;
 }
@@ -24,8 +25,8 @@ test_write_after_free_pre(void) {
 
 static void
 test_write_after_free_post(void) {
-	assert_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
-	    0, "Unexpected tcache flush failure");
+	assert_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0), 0,
+	    "Unexpected tcache flush failure");
 	expect_true(fake_abort_called, "Use-after-free check didn't fire.");
 	safety_check_set_abort(NULL);
 }
@@ -37,9 +38,10 @@ uaf_detection_enabled(void) {
 	}
 
 	ssize_t lg_san_uaf_align;
-	size_t sz = sizeof(lg_san_uaf_align);
-	assert_d_eq(mallctl("opt.lg_san_uaf_align", &lg_san_uaf_align, &sz,
-	    NULL, 0), 0, "Unexpected mallctl failure");
+	size_t  sz = sizeof(lg_san_uaf_align);
+	assert_d_eq(
+	    mallctl("opt.lg_san_uaf_align", &lg_san_uaf_align, &sz, NULL, 0), 0,
+	    "Unexpected mallctl failure");
 	if (lg_san_uaf_align < 0) {
 		return false;
 	}
@@ -48,8 +50,9 @@ uaf_detection_enabled(void) {
 
 	bool tcache_enabled;
 	sz = sizeof(tcache_enabled);
-	assert_d_eq(mallctl("thread.tcache.enabled", &tcache_enabled, &sz, NULL,
-	    0), 0, "Unexpected mallctl failure");
+	assert_d_eq(
+	    mallctl("thread.tcache.enabled", &tcache_enabled, &sz, NULL, 0), 0,
+	    "Unexpected mallctl failure");
 	if (!tcache_enabled) {
 		return false;
 	}
@@ -69,10 +72,10 @@ read_tcache_stashed_bytes(unsigned arena_ind) {
 
 	size_t tcache_stashed_bytes;
 	size_t sz = sizeof(tcache_stashed_bytes);
-	assert_d_eq(mallctl(
-	    "stats.arenas." STRINGIFY(MALLCTL_ARENAS_ALL)
-	    ".tcache_stashed_bytes", &tcache_stashed_bytes, &sz, NULL, 0), 0,
-	    "Unexpected mallctl failure");
+	assert_d_eq(mallctl("stats.arenas." STRINGIFY(
+	                        MALLCTL_ARENAS_ALL) ".tcache_stashed_bytes",
+	                &tcache_stashed_bytes, &sz, NULL, 0),
+	    0, "Unexpected mallctl failure");
 
 	return tcache_stashed_bytes;
 }
@@ -91,17 +94,17 @@ test_use_after_free(size_t alloc_size, bool write_after_free) {
 	 * make use-after-free tolerable.
 	 */
 	unsigned arena_ind = do_arena_create(-1, -1);
-	int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
+	int      flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
 
 	size_t n_max = san_uaf_align * 2;
 	void **items = mallocx(n_max * sizeof(void *), flags);
 	assert_ptr_not_null(items, "Unexpected mallocx failure");
 
-	bool found = false;
+	bool   found = false;
 	size_t iter = 0;
-	char magic = 's';
-	assert_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
-	    0, "Unexpected tcache flush failure");
+	char   magic = 's';
+	assert_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0), 0,
+	    "Unexpected tcache flush failure");
 	while (!found) {
 		ptr = mallocx(alloc_size, flags);
 		assert_ptr_not_null(ptr, "Unexpected mallocx failure");
@@ -194,7 +197,7 @@ static bool
 check_allocated_intact(void **allocated, size_t n_alloc) {
 	for (unsigned i = 0; i < n_alloc; i++) {
 		void *ptr = *(void **)allocated[i];
-		bool found = false;
+		bool  found = false;
 		for (unsigned j = 0; j < n_alloc; j++) {
 			if (ptr == allocated[j]) {
 				found = true;
@@ -213,7 +216,7 @@ TEST_BEGIN(test_use_after_free_integration) {
 	test_skip_if(!uaf_detection_enabled());
 
 	unsigned arena_ind = do_arena_create(-1, -1);
-	int flags = MALLOCX_ARENA(arena_ind);
+	int      flags = MALLOCX_ARENA(arena_ind);
 
 	size_t n_alloc = san_uaf_align * 2;
 	void **allocated = mallocx(n_alloc * sizeof(void *), flags);
@@ -255,8 +258,6 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_read_after_free,
-	    test_write_after_free,
+	return test(test_read_after_free, test_write_after_free,
 	    test_use_after_free_integration);
 }
diff --git a/test/unit/witness.c b/test/unit/witness.c
index 5a6c4482..ccefb5a2 100644
--- a/test/unit/witness.c
+++ b/test/unit/witness.c
@@ -1,9 +1,9 @@
 #include "test/jemalloc_test.h"
 
-static witness_lock_error_t *witness_lock_error_orig;
-static witness_owner_error_t *witness_owner_error_orig;
+static witness_lock_error_t      *witness_lock_error_orig;
+static witness_owner_error_t     *witness_owner_error_orig;
 static witness_not_owner_error_t *witness_not_owner_error_orig;
-static witness_depth_error_t *witness_depth_error_orig;
+static witness_depth_error_t     *witness_depth_error_orig;
 
 static bool saw_lock_error;
 static bool saw_owner_error;
@@ -11,8 +11,8 @@ static bool saw_not_owner_error;
 static bool saw_depth_error;
 
 static void
-witness_lock_error_intercept(const witness_list_t *witnesses,
-    const witness_t *witness) {
+witness_lock_error_intercept(
+    const witness_list_t *witnesses, const witness_t *witness) {
 	saw_lock_error = true;
 }
 
@@ -43,8 +43,8 @@ witness_comp(const witness_t *a, void *oa, const witness_t *b, void *ob) {
 }
 
 static int
-witness_comp_reverse(const witness_t *a, void *oa, const witness_t *b,
-    void *ob) {
+witness_comp_reverse(
+    const witness_t *a, void *oa, const witness_t *b, void *ob) {
 	expect_u_eq(a->rank, b->rank, "Witnesses should have equal rank");
 
 	assert(oa == (void *)a);
@@ -54,8 +54,8 @@ witness_comp_reverse(const witness_t *a, void *oa, const witness_t *b,
 }
 
 TEST_BEGIN(test_witness) {
-	witness_t a, b;
-	witness_tsdn_t witness_tsdn = { WITNESS_TSD_INITIALIZER };
+	witness_t      a, b;
+	witness_tsdn_t witness_tsdn = {WITNESS_TSD_INITIALIZER};
 
 	test_skip_if(!config_debug);
 
@@ -94,8 +94,8 @@ TEST_BEGIN(test_witness) {
 TEST_END
 
 TEST_BEGIN(test_witness_comp) {
-	witness_t a, b, c, d;
-	witness_tsdn_t witness_tsdn = { WITNESS_TSD_INITIALIZER };
+	witness_t      a, b, c, d;
+	witness_tsdn_t witness_tsdn = {WITNESS_TSD_INITIALIZER};
 
 	test_skip_if(!config_debug);
 
@@ -146,8 +146,8 @@ TEST_BEGIN(test_witness_comp) {
 TEST_END
 
 TEST_BEGIN(test_witness_reversal) {
-	witness_t a, b;
-	witness_tsdn_t witness_tsdn = { WITNESS_TSD_INITIALIZER };
+	witness_t      a, b;
+	witness_tsdn_t witness_tsdn = {WITNESS_TSD_INITIALIZER};
 
 	test_skip_if(!config_debug);
 
@@ -177,8 +177,8 @@ TEST_BEGIN(test_witness_reversal) {
 TEST_END
 
 TEST_BEGIN(test_witness_recursive) {
-	witness_t a;
-	witness_tsdn_t witness_tsdn = { WITNESS_TSD_INITIALIZER };
+	witness_t      a;
+	witness_tsdn_t witness_tsdn = {WITNESS_TSD_INITIALIZER};
 
 	test_skip_if(!config_debug);
 
@@ -207,13 +207,12 @@ TEST_BEGIN(test_witness_recursive) {
 
 	witness_owner_error = witness_owner_error_orig;
 	witness_lock_error = witness_lock_error_orig;
-
 }
 TEST_END
 
 TEST_BEGIN(test_witness_unlock_not_owned) {
-	witness_t a;
-	witness_tsdn_t witness_tsdn = { WITNESS_TSD_INITIALIZER };
+	witness_t      a;
+	witness_tsdn_t witness_tsdn = {WITNESS_TSD_INITIALIZER};
 
 	test_skip_if(!config_debug);
 
@@ -236,8 +235,8 @@ TEST_BEGIN(test_witness_unlock_not_owned) {
 TEST_END
 
 TEST_BEGIN(test_witness_depth) {
-	witness_t a;
-	witness_tsdn_t witness_tsdn = { WITNESS_TSD_INITIALIZER };
+	witness_t      a;
+	witness_tsdn_t witness_tsdn = {WITNESS_TSD_INITIALIZER};
 
 	test_skip_if(!config_debug);
 
@@ -270,11 +269,7 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_witness,
-	    test_witness_comp,
-	    test_witness_reversal,
-	    test_witness_recursive,
-	    test_witness_unlock_not_owned,
+	return test(test_witness, test_witness_comp, test_witness_reversal,
+	    test_witness_recursive, test_witness_unlock_not_owned,
 	    test_witness_depth);
 }
diff --git a/test/unit/zero.c b/test/unit/zero.c
index d3e81f1b..522d6908 100644
--- a/test/unit/zero.c
+++ b/test/unit/zero.c
@@ -3,35 +3,35 @@
 static void
 test_zero(size_t sz_min, size_t sz_max) {
 	uint8_t *s;
-	size_t sz_prev, sz, i;
-#define MAGIC	((uint8_t)0x61)
+	size_t   sz_prev, sz, i;
+#define MAGIC ((uint8_t)0x61)
 
 	sz_prev = 0;
 	s = (uint8_t *)mallocx(sz_min, 0);
 	expect_ptr_not_null((void *)s, "Unexpected mallocx() failure");
 
 	for (sz = sallocx(s, 0); sz <= sz_max;
-	    sz_prev = sz, sz = sallocx(s, 0)) {
+	     sz_prev = sz, sz = sallocx(s, 0)) {
 		if (sz_prev > 0) {
 			expect_u_eq(s[0], MAGIC,
 			    "Previously allocated byte %zu/%zu is corrupted",
 			    ZU(0), sz_prev);
-			expect_u_eq(s[sz_prev-1], MAGIC,
+			expect_u_eq(s[sz_prev - 1], MAGIC,
 			    "Previously allocated byte %zu/%zu is corrupted",
-			    sz_prev-1, sz_prev);
+			    sz_prev - 1, sz_prev);
 		}
 
 		for (i = sz_prev; i < sz; i++) {
 			expect_u_eq(s[i], 0x0,
-			    "Newly allocated byte %zu/%zu isn't zero-filled",
-			    i, sz);
+			    "Newly allocated byte %zu/%zu isn't zero-filled", i,
+			    sz);
 			s[i] = MAGIC;
 		}
 
-		if (xallocx(s, sz+1, 0, 0) == sz) {
-			s = (uint8_t *)rallocx(s, sz+1, 0);
-			expect_ptr_not_null((void *)s,
-			    "Unexpected rallocx() failure");
+		if (xallocx(s, sz + 1, 0, 0) == sz) {
+			s = (uint8_t *)rallocx(s, sz + 1, 0);
+			expect_ptr_not_null(
+			    (void *)s, "Unexpected rallocx() failure");
 		}
 	}
 
@@ -53,7 +53,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_zero_small,
-	    test_zero_large);
+	return test(test_zero_small, test_zero_large);
 }
diff --git a/test/unit/zero_realloc_abort.c b/test/unit/zero_realloc_abort.c
index f014cdc2..1d8bf9c3 100644
--- a/test/unit/zero_realloc_abort.c
+++ b/test/unit/zero_realloc_abort.c
@@ -4,7 +4,8 @@
 
 static bool abort_called = false;
 
-void set_abort_called(const char *message) {
+void
+set_abort_called(const char *message) {
 	(void)message;
 	abort_called = true;
 };
@@ -21,7 +22,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_realloc_abort);
+	return test(test_realloc_abort);
 }
-
diff --git a/test/unit/zero_realloc_alloc.c b/test/unit/zero_realloc_alloc.c
index 6954818c..5b4f985f 100644
--- a/test/unit/zero_realloc_alloc.c
+++ b/test/unit/zero_realloc_alloc.c
@@ -6,9 +6,10 @@ allocated(void) {
 		return 0;
 	}
 	uint64_t allocated;
-	size_t sz = sizeof(allocated);
-	expect_d_eq(mallctl("thread.allocated", (void *)&allocated, &sz, NULL,
-	    0), 0, "Unexpected mallctl failure");
+	size_t   sz = sizeof(allocated);
+	expect_d_eq(
+	    mallctl("thread.allocated", (void *)&allocated, &sz, NULL, 0), 0,
+	    "Unexpected mallctl failure");
 	return allocated;
 }
 
@@ -18,9 +19,10 @@ deallocated(void) {
 		return 0;
 	}
 	uint64_t deallocated;
-	size_t sz = sizeof(deallocated);
-	expect_d_eq(mallctl("thread.deallocated", (void *)&deallocated, &sz,
-	    NULL, 0), 0, "Unexpected mallctl failure");
+	size_t   sz = sizeof(deallocated);
+	expect_d_eq(
+	    mallctl("thread.deallocated", (void *)&deallocated, &sz, NULL, 0),
+	    0, "Unexpected mallctl failure");
 	return deallocated;
 }
 
@@ -43,6 +45,5 @@ TEST_BEGIN(test_realloc_alloc) {
 TEST_END
 int
 main(void) {
-	return test(
-	    test_realloc_alloc);
+	return test(test_realloc_alloc);
 }
diff --git a/test/unit/zero_realloc_free.c b/test/unit/zero_realloc_free.c
index 277f219d..c2aa0afa 100644
--- a/test/unit/zero_realloc_free.c
+++ b/test/unit/zero_realloc_free.c
@@ -6,9 +6,10 @@ deallocated(void) {
 		return 0;
 	}
 	uint64_t deallocated;
-	size_t sz = sizeof(deallocated);
-	expect_d_eq(mallctl("thread.deallocated", (void *)&deallocated, &sz,
-	    NULL, 0), 0, "Unexpected mallctl failure");
+	size_t   sz = sizeof(deallocated);
+	expect_d_eq(
+	    mallctl("thread.deallocated", (void *)&deallocated, &sz, NULL, 0),
+	    0, "Unexpected mallctl failure");
 	return deallocated;
 }
 
@@ -28,6 +29,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_realloc_free);
+	return test(test_realloc_free);
 }
diff --git a/test/unit/zero_reallocs.c b/test/unit/zero_reallocs.c
index a9077222..6c4a51d6 100644
--- a/test/unit/zero_reallocs.c
+++ b/test/unit/zero_reallocs.c
@@ -8,8 +8,9 @@ zero_reallocs(void) {
 	size_t count = 12345;
 	size_t sz = sizeof(count);
 
-	expect_d_eq(mallctl("stats.zero_reallocs", (void *)&count, &sz,
-	    NULL, 0), 0, "Unexpected mallctl failure");
+	expect_d_eq(
+	    mallctl("stats.zero_reallocs", (void *)&count, &sz, NULL, 0), 0,
+	    "Unexpected mallctl failure");
 	return count;
 }
 
@@ -35,6 +36,5 @@ main(void) {
 	 * We expect explicit counts; reentrant tests run multiple times, so
 	 * counts leak across runs.
 	 */
-	return test_no_reentrancy(
-	    test_zero_reallocs);
+	return test_no_reentrancy(test_zero_reallocs);
 }

From 5847516692b4022fa8e0fe333f6e676ae48f02a7 Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@meta.com>
Date: Fri, 20 Jun 2025 14:41:13 -0700
Subject: [PATCH 306/395] Ignore the clang-format changes in the git blame.

---
 .git-blame-ignore-revs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
index 365e8bb1..7f5f6975 100644
--- a/.git-blame-ignore-revs
+++ b/.git-blame-ignore-revs
@@ -1 +1,2 @@
 554185356bf990155df8d72060c4efe993642baf
+34f359e0ca613b5f9d970e9b2152a5203c9df8d6

From 711fff750ce904d0b881a6fe534732dcb75874e6 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Mon, 9 Jun 2025 21:29:55 -0700
Subject: [PATCH 307/395] Add experimental support for usdt systemtap probes

---
 configure.ac                                  |  49 ++++++
 .../internal/jemalloc_internal_defs.h.in      |   9 ++
 include/jemalloc/internal/jemalloc_probe.h    |  49 ++++++
 .../jemalloc/internal/jemalloc_probe_custom.h | 148 ++++++++++++++++++
 .../jemalloc/internal/jemalloc_probe_stap.h   |  11 ++
 5 files changed, 266 insertions(+)
 create mode 100644 include/jemalloc/internal/jemalloc_probe.h
 create mode 100644 include/jemalloc/internal/jemalloc_probe_custom.h
 create mode 100644 include/jemalloc/internal/jemalloc_probe_stap.h

diff --git a/configure.ac b/configure.ac
index c615cab2..c703a6d1 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1681,6 +1681,55 @@ else
 fi
 AC_SUBST([enable_utrace])
 
+dnl Disable experimental sdt tracing by default.
+AC_ARG_ENABLE([experimental-sdt],
+  [AS_HELP_STRING([--enable-experimental-sdt], [Enable systemtap USDT probes])],
+[if test "x$enable_experimental_sdt" = "xno" ; then
+  enable_experimental_sdt="0"
+else
+	JE_COMPILABLE([systemtap sdt], [
+#include <sys/sdt.h>
+	], [
+void foo(int i, void *p) { STAP_PROBE2(jemalloc, test, i, p); }
+  	],
+	[je_cv_stap_sdt])
+
+	if test "x${je_cv_stap_sdt}" = "xyes" ; then
+	   enable_experimental_sdt="1"
+	elif test "x${abi}" = "xelf" ; then
+	     case "${host}" in
+	     	  *-*-linux-android*)
+			case "${host_cpu}" in aarch64|x86_64)
+			     enable_experimental_sdt="2"
+			     ;;
+			esac
+			;;
+		  *-*-linux*)
+			case "${host_cpu}" in x86_64|aarch64|arm*)
+			      enable_experimental_sdt="2"
+			      ;;
+			esac
+		        ;;
+		  *)
+			enable_experimental_sdt="0"
+			AC_MSG_ERROR([Unsupported sdt on this platform])
+			;;
+	     esac
+	else
+	   AC_MSG_ERROR([Unsupported sdt on this platform])
+   	fi
+fi
+],
+[enable_experimental_sdt="0"]
+)
+
+if test "x$enable_experimental_sdt" = "x1" ; then
+    AC_DEFINE([JEMALLOC_EXPERIMENTAL_USDT_STAP], [ ], [ ])
+elif test "x$enable_experimental_sdt" = "x2"; then
+    AC_DEFINE([JEMALLOC_EXPERIMENTAL_USDT_CUSTOM], [ ], [ ])
+fi
+AC_SUBST([enable_experimental_sdt])
+
 dnl Do not support the xmalloc option by default.
 AC_ARG_ENABLE([xmalloc],
   [AS_HELP_STRING([--enable-xmalloc], [Support xmalloc option])],
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 6d557959..31ae2e8e 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -353,6 +353,15 @@
 /* Defined if mprotect(2) is available. */
 #undef JEMALLOC_HAVE_MPROTECT
 
+/* Defined if sys/sdt.h is available and sdt tracing enabled */
+#undef JEMALLOC_EXPERIMENTAL_USDT_STAP
+
+/*
+ * Defined if sys/sdt.h is unavailable, sdt tracing enabled, and
+ * platform is supported
+ */
+#undef JEMALLOC_EXPERIMENTAL_USDT_CUSTOM
+
 /*
  * Defined if transparent huge pages (THPs) are supported via the
  * MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled.
diff --git a/include/jemalloc/internal/jemalloc_probe.h b/include/jemalloc/internal/jemalloc_probe.h
new file mode 100644
index 00000000..8ef3105d
--- /dev/null
+++ b/include/jemalloc/internal/jemalloc_probe.h
@@ -0,0 +1,49 @@
+#ifndef JEMALLOC_INTERNAL_JEMALLOC_PROBE_H
+#define JEMALLOC_INTERNAL_JEMALLOC_PROBE_H
+
+#include <jemalloc/internal/jemalloc_preamble.h>
+
+#ifdef JEMALLOC_EXPERIMENTAL_USDT_STAP
+#include <jemalloc/internal/jemalloc_probe_stap.h>
+#elif defined(JEMALLOC_EXPERIMENTAL_USDT_CUSTOM)
+#include <jemalloc/internal/jemalloc_probe_custom.h>
+#elif defined(_MSC_VER)
+#define JE_USDT(name, N, ...) /* Nothing */
+#else /*  no USDT, just check the args */
+
+#define JE_USDT(name, N, ...) _JE_USDT_CHECK_ARG##N(__VA_ARGS__)
+
+#define _JE_USDT_CHECK_ARG1(a)						\
+	do {								\
+		(void)(a);						\
+	} while (0)
+#define _JE_USDT_CHECK_ARG2(a, b)					\
+	do {								\
+		(void)(a);						\
+		(void)(b);						\
+	} while (0)
+#define _JE_USDT_CHECK_ARG3(a, b, c)					\
+	do {								\
+		(void)(a);						\
+		(void)(b);						\
+		(void)(c);						\
+	} while (0)
+#define _JE_USDT_CHECK_ARG4(a, b, c, d)					\
+	do {								\
+		(void)(a);						\
+		(void)(b);						\
+		(void)(c);						\
+		(void)(d);						\
+	} while (0)
+#define _JE_USDT_CHECK_ARG5(a, b, c, d, e)				\
+	do {								\
+		(void)(a);						\
+		(void)(b);						\
+		(void)(c);						\
+		(void)(d);						\
+		(void)(e);						\
+	} while (0)
+
+#endif /* JEMALLOC_EXPERIMENTAL_USDT_* */
+
+#endif /* JEMALLOC_INTERNAL_JEMALLOC_PROBE_H */
diff --git a/include/jemalloc/internal/jemalloc_probe_custom.h b/include/jemalloc/internal/jemalloc_probe_custom.h
new file mode 100644
index 00000000..3c22749f
--- /dev/null
+++ b/include/jemalloc/internal/jemalloc_probe_custom.h
@@ -0,0 +1,148 @@
+#ifndef JEMALLOC_INTERNAL_JEMALLOC_PROBE_CUSTOM_H
+#define JEMALLOC_INTERNAL_JEMALLOC_PROBE_CUSTOM_H
+
+/* clang-format off */
+
+/*
+ * This section is based on sys/sdt.h and
+ * https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation
+ */
+
+/* Emit NOP for the probe. */
+#if (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) || \
+     defined(__arm__)) && defined(__linux__)
+#define JE_SDT_NOP nop
+#else
+#error "Architecture not supported"
+#endif
+
+/* Assembly macros */
+#define JE_SDT_S(x) #x
+
+#define JE_SDT_ASM_1(x) JE_SDT_S(x) "\n"
+
+#define JE_SDT_ASM_2(x, y)			\
+	JE_SDT_S(x) "," JE_SDT_S(y) "\n"
+
+#define JE_SDT_ASM_3(x, y, z)					\
+	JE_SDT_S(x) "," JE_SDT_S(y) ","  JE_SDT_S(z) "\n"
+
+#define JE_SDT_ASM_3(x, y, z)					\
+	JE_SDT_S(x) "," JE_SDT_S(y) ","  JE_SDT_S(z) "\n"
+
+#define JE_SDT_ASM_4(x, y, z, p)					\
+	JE_SDT_S(x) "," JE_SDT_S(y) "," JE_SDT_S(z) "," JE_SDT_S(p) "\n"
+
+#define JE_SDT_ASM_5(x, y, z, p, q)					\
+	JE_SDT_S(x) "," JE_SDT_S(y) ","	JE_SDT_S(z) "," JE_SDT_S(p) ","	\
+		JE_SDT_S(q) "\n"
+
+/* Arg size */
+#ifdef __LP64__
+#define JE_SDT_ASM_ADDR            .8byte
+#else
+#define JE_SDT_ASM_ADDR            .4byte
+#endif
+
+#define JE_SDT_NOTE_NAME  "stapsdt"
+#define JE_SDT_NOTE_TYPE  3
+
+#define JE_SDT_SEMAPHORE_NONE(provider, name)			\
+	JE_SDT_ASM_1(JE_SDT_ASM_ADDR 0) /* No Semaphore support */
+#define JE_SDT_SEMAPHORE_OPERAND(provider, name)	\
+	[__sdt_semaphore] "ip" (0) /* No Semaphore */
+
+#define JE_SDT_ASM_STRING(x)     JE_SDT_ASM_1(.asciz JE_SDT_S(x))
+
+#define JE_SDT_NOTE(provider, name, arg_template)			\
+	JE_SDT_ASM_1(990: JE_SDT_NOP)					\
+	JE_SDT_ASM_3(     .pushsection .note.stapsdt,"?","note")	\
+	JE_SDT_ASM_1(     .balign 4)					\
+	JE_SDT_ASM_3(     .4byte 992f-991f, 994f-993f, JE_SDT_NOTE_TYPE) \
+	JE_SDT_ASM_1(991: .asciz JE_SDT_NOTE_NAME)			\
+	JE_SDT_ASM_1(992: .balign 4)					\
+	JE_SDT_ASM_1(993: JE_SDT_ASM_ADDR 990b)				\
+	JE_SDT_ASM_1(     JE_SDT_ASM_ADDR _.stapsdt.base)		\
+	JE_SDT_SEMAPHORE_NONE(provider, name)				\
+	JE_SDT_ASM_STRING(provider)					\
+	JE_SDT_ASM_STRING(name)						\
+	JE_SDT_ASM_STRING(arg_template)					\
+	JE_SDT_ASM_1(994: .balign 4)					\
+	JE_SDT_ASM_1(     .popsection)
+
+#define JE_SDT_BASE							\
+	JE_SDT_ASM_1(     .ifndef _.stapsdt.base)			\
+	JE_SDT_ASM_5(     .pushsection .stapsdt.base, "aG", "progbits",	\
+		    .stapsdt.base,comdat)				\
+	JE_SDT_ASM_1(     .weak _.stapsdt.base)				\
+	JE_SDT_ASM_1(     .hidden _.stapsdt.base)			\
+	JE_SDT_ASM_1(     _.stapsdt.base: .space 1)			\
+	JE_SDT_ASM_2(     .size _.stapsdt.base, 1)			\
+	JE_SDT_ASM_1(     .popsection)					\
+	JE_SDT_ASM_1(     .endif)
+
+
+/*
+ * Default constraint for probes arguments.
+ * See https://gcc.gnu.org/onlinedocs/gcc/Constraints.html
+ */
+#ifndef JE_SDT_ARG_CONSTRAINT
+#define JE_SDT_ARG_CONSTRAINT      "nor"
+#endif
+
+#define JE_SDT_ARGARRAY(x)  ((__builtin_classify_type(x) == 14) ||  \
+			     (__builtin_classify_type(x) == 5))
+#define JE_SDT_ARGSIZE(x)   (JE_SDT_ARGARRAY(x) ? sizeof(void*) : sizeof(x))
+
+/*
+ * Format of each probe argument as operand.  Size tagged with JE_SDT_Sn,
+ * with "n" constraint.  Value is tagged with JE_SDT_An with configured
+ * constraint.
+ */
+#define JE_SDT_ARG(n, x)						\
+	[JE_SDT_S##n] "n"                ((size_t)JE_SDT_ARGSIZE(x)),	\
+		[JE_SDT_A##n] JE_SDT_ARG_CONSTRAINT(x)
+
+/* Templates to append arguments as operands. */
+#define JE_SDT_OPERANDS_0()     [__sdt_dummy] "g" (0)
+#define JE_SDT_OPERANDS_1(_1)      JE_SDT_ARG(1, _1)
+#define JE_SDT_OPERANDS_2(_1, _2)  JE_SDT_OPERANDS_1(_1), JE_SDT_ARG(2, _2)
+#define JE_SDT_OPERANDS_3(_1, _2, _3) JE_SDT_OPERANDS_2(_1, _2), JE_SDT_ARG(3, _3)
+#define JE_SDT_OPERANDS_4(_1, _2, _3, _4)			\
+	JE_SDT_OPERANDS_3(_1, _2, _3), JE_SDT_ARG(4, _4)
+#define JE_SDT_OPERANDS_5(_1, _2, _3, _4, _5)			\
+	JE_SDT_OPERANDS_4(_1, _2, _3, _4), JE_SDT_ARG(5, _5)
+#define JE_SDT_OPERANDS_6(_1, _2, _3, _4, _5, _6)			\
+	JE_SDT_OPERANDS_5(_1, _2, _3, _4, _5), JE_SDT_ARG(6, _6)
+#define JE_SDT_OPERANDS_7(_1, _2, _3, _4, _5, _6, _7)		\
+	JE_SDT_OPERANDS_6(_1, _2, _3, _4, _5, _6), JE_SDT_ARG(7, _7)
+
+/* Templates to reference the arguments from operands. */
+#define JE_SDT_ARGFMT(num)        %n[JE_SDT_S##num]@%[JE_SDT_A##num]
+#define JE_SDT_ARG_TEMPLATE_0    /* No args */
+#define JE_SDT_ARG_TEMPLATE_1    JE_SDT_ARGFMT(1)
+#define JE_SDT_ARG_TEMPLATE_2    JE_SDT_ARG_TEMPLATE_1 JE_SDT_ARGFMT(2)
+#define JE_SDT_ARG_TEMPLATE_3    JE_SDT_ARG_TEMPLATE_2 JE_SDT_ARGFMT(3)
+#define JE_SDT_ARG_TEMPLATE_4    JE_SDT_ARG_TEMPLATE_3 JE_SDT_ARGFMT(4)
+#define JE_SDT_ARG_TEMPLATE_5    JE_SDT_ARG_TEMPLATE_4 JE_SDT_ARGFMT(5)
+#define JE_SDT_ARG_TEMPLATE_6    JE_SDT_ARG_TEMPLATE_5 JE_SDT_ARGFMT(6)
+#define JE_SDT_ARG_TEMPLATE_7    JE_SDT_ARG_TEMPLATE_6 JE_SDT_ARGFMT(7)
+
+#define JE_SDT_PROBE(							\
+	provider, name, n, arglist)					\
+	do {								\
+		__asm__ __volatile__(					\
+			JE_SDT_NOTE(provider, name,			\
+				    JE_SDT_ARG_TEMPLATE_##n)		\
+			:: JE_SDT_SEMAPHORE_OPERAND(provider, name),	\
+			JE_SDT_OPERANDS_##n arglist);			\
+		__asm__ __volatile__(JE_SDT_BASE);			\
+	} while (0)
+
+#define JE_USDT(name, N, ...)						\
+  JE_SDT_PROBE(jemalloc, name, N, (__VA_ARGS__))
+
+
+#endif /* JEMALLOC_INTERNAL_JEMALLOC_PROBE_CUSTOM_H */
+
+/* clang-format on */
diff --git a/include/jemalloc/internal/jemalloc_probe_stap.h b/include/jemalloc/internal/jemalloc_probe_stap.h
new file mode 100644
index 00000000..302b6cbb
--- /dev/null
+++ b/include/jemalloc/internal/jemalloc_probe_stap.h
@@ -0,0 +1,11 @@
+#ifndef JEMALLOC_INTERNAL_JEMALLOC_PROBE_STAP_H
+#define JEMALLOC_INTERNAL_JEMALLOC_PROBE_STAP_H
+
+#include <sys/sdt.h>
+
+#define JE_USDT(name, N, ...) JE_USDT_PROBE_N(name, N, ##__VA_ARGS__)
+
+#define JE_USDT_PROBE_N(name, N, ...)                                          \
+	STAP_PROBE##N(jemalloc, name, ##__VA_ARGS__)
+
+#endif /* JEMALLOC_INTERNAL_JEMALLOC_PROBE_STAP_H */

From f87bbab22cf5a81dd314c7811867edc5c69025d2 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Tue, 10 Jun 2025 11:44:23 -0700
Subject: [PATCH 308/395] Add several USDT probes for hpa

---
 src/hpa_hooks.c | 13 +++++++++++--
 src/hpdata.c    |  5 +++++
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/src/hpa_hooks.c b/src/hpa_hooks.c
index 45bebe41..14005ae0 100644
--- a/src/hpa_hooks.c
+++ b/src/hpa_hooks.c
@@ -2,6 +2,7 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/hpa_hooks.h"
+#include "jemalloc/internal/jemalloc_probe.h"
 
 static void    *hpa_hooks_map(size_t size);
 static void     hpa_hooks_unmap(void *ptr, size_t size);
@@ -19,16 +20,20 @@ const hpa_hooks_t hpa_hooks_default = {&hpa_hooks_map, &hpa_hooks_unmap,
 static void *
 hpa_hooks_map(size_t size) {
 	bool commit = true;
-	return pages_map(NULL, size, HUGEPAGE, &commit);
+	void *ret = pages_map(NULL, size, HUGEPAGE, &commit);
+	JE_USDT(hpa_map, 2, size, ret);
+	return ret;
 }
 
 static void
 hpa_hooks_unmap(void *ptr, size_t size) {
+	JE_USDT(hpa_unmap, 2, size, ptr);
 	pages_unmap(ptr, size);
 }
 
 static void
 hpa_hooks_purge(void *ptr, size_t size) {
+	JE_USDT(hpa_purge, 2, size, ptr);
 	pages_purge_forced(ptr, size);
 }
 
@@ -52,12 +57,14 @@ hpa_hooks_hugify(void *ptr, size_t size, bool sync) {
 	if (sync) {
 		err = pages_collapse(ptr, size);
 	}
+	JE_USDT(hpa_hugify, 4, size, ptr, err, sync);
 	return err;
 }
 
 static void
 hpa_hooks_dehugify(void *ptr, size_t size) {
 	bool err = pages_nohuge(ptr, size);
+	JE_USDT(hpa_dehugify, 3, size, ptr, err);
 	(void)err;
 }
 
@@ -78,7 +85,9 @@ hpa_hooks_ms_since(nstime_t *past_nstime) {
 static bool
 hpa_hooks_vectorized_purge(void *vec, size_t vlen, size_t nbytes) {
 #ifdef JEMALLOC_HAVE_PROCESS_MADVISE
-	return pages_purge_process_madvise(vec, vlen, nbytes);
+	bool err = pages_purge_process_madvise(vec, vlen, nbytes);
+	JE_USDT(hpa_vectorized_purge, 3, nbytes, vlen, err);
+	return err;
 #else
 	return true;
 #endif
diff --git a/src/hpdata.c b/src/hpdata.c
index 9d324952..e9ee2738 100644
--- a/src/hpdata.c
+++ b/src/hpdata.c
@@ -2,6 +2,7 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/hpdata.h"
+#include "jemalloc/internal/jemalloc_probe.h"
 
 static int
 hpdata_age_comp(const hpdata_t *a, const hpdata_t *b) {
@@ -100,6 +101,8 @@ hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz) {
 	    hpdata->touched_pages, HUGEPAGE_PAGES, result, npages);
 	fb_set_range(hpdata->touched_pages, HUGEPAGE_PAGES, result, npages);
 	hpdata->h_ntouched += new_dirty;
+	JE_USDT(hpa_reserve, 5, npages, hpdata->h_nactive, hpdata->h_ntouched,
+	    new_dirty, largest_unchosen_range);
 
 	/*
 	 * If we allocated out of a range that was the longest in the hpdata, it
@@ -160,6 +163,8 @@ hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz) {
 	hpdata->h_nactive -= npages;
 
 	hpdata_assert_consistent(hpdata);
+	JE_USDT(hpa_unreserve, 5, npages, hpdata->h_nactive, hpdata->h_ntouched,
+	    old_longest_range, new_range_len);
 }
 
 size_t

From 4246475b44e660010256206857d941e6f45ca113 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Fri, 25 Jul 2025 10:14:28 -0700
Subject: [PATCH 309/395] [process_madvise] Make init lazy so that python tests
 pass. Reset the pidfd on fork

---
 include/jemalloc/internal/atomic.h |  2 ++
 include/jemalloc/internal/pages.h  |  1 +
 src/jemalloc.c                     |  1 +
 src/pages.c                        | 37 ++++++++++++++++++++++++------
 4 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h
index ddd9341e..f80e5640 100644
--- a/include/jemalloc/internal/atomic.h
+++ b/include/jemalloc/internal/atomic.h
@@ -89,6 +89,8 @@ JEMALLOC_GENERATE_ATOMICS(bool, b, 0)
 
 JEMALLOC_GENERATE_EXPANDED_INT_ATOMICS(unsigned, u, LG_SIZEOF_INT)
 
+JEMALLOC_GENERATE_EXPANDED_INT_ATOMICS(int, i, LG_SIZEOF_INT)
+
 JEMALLOC_GENERATE_EXPANDED_INT_ATOMICS(size_t, zu, LG_SIZEOF_PTR)
 
 JEMALLOC_GENERATE_EXPANDED_INT_ATOMICS(ssize_t, zd, LG_SIZEOF_PTR)
diff --git a/include/jemalloc/internal/pages.h b/include/jemalloc/internal/pages.h
index 31909934..b0cc5bba 100644
--- a/include/jemalloc/internal/pages.h
+++ b/include/jemalloc/internal/pages.h
@@ -127,5 +127,6 @@ bool pages_boot(void);
 void pages_set_thp_state(void *ptr, size_t size);
 void pages_mark_guards(void *head, void *tail);
 void pages_unmark_guards(void *head, void *tail);
+void pages_postfork_child(void);
 
 #endif /* JEMALLOC_INTERNAL_PAGES_EXTERNS_H */
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 876c49e8..4adcbf3c 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -4549,6 +4549,7 @@ jemalloc_postfork_child(void) {
 	malloc_mutex_postfork_child(tsd_tsdn(tsd), &arenas_lock);
 	tcache_postfork_child(tsd_tsdn(tsd));
 	ctl_postfork_child(tsd_tsdn(tsd));
+	pages_postfork_child();
 }
 
 /******************************************************************************/
diff --git a/src/pages.c b/src/pages.c
index 88301c2b..54678a38 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -621,7 +621,7 @@ pages_dodump(void *addr, size_t size) {
 #ifdef JEMALLOC_HAVE_PROCESS_MADVISE
 #	include <sys/mman.h>
 #	include <sys/syscall.h>
-static int pidfd;
+static atomic_i_t process_madvise_pidfd = ATOMIC_INIT(-1);
 
 static bool
 init_process_madvise(void) {
@@ -632,11 +632,6 @@ init_process_madvise(void) {
 	if (opt_process_madvise_max_batch > PROCESS_MADVISE_MAX_BATCH_LIMIT) {
 		opt_process_madvise_max_batch = PROCESS_MADVISE_MAX_BATCH_LIMIT;
 	}
-	pid_t pid = getpid();
-	pidfd = syscall(SYS_pidfd_open, pid, 0);
-	if (pidfd == -1) {
-		return true;
-	}
 
 	return false;
 }
@@ -651,12 +646,38 @@ init_process_madvise(void) {
 static bool
 pages_purge_process_madvise_impl(
     void *vec, size_t vec_len, size_t total_bytes) {
-	size_t purged_bytes = (size_t)syscall(JE_SYS_PROCESS_MADVISE_NR, pidfd,
+	int pid_fd = atomic_load_i(&process_madvise_pidfd, ATOMIC_SEQ_CST);
+	while (pid_fd == -1) {
+		int newfd = syscall(SYS_pidfd_open, getpid(), 0);
+		if (newfd == -1) {
+			return true;
+		}
+		if (!atomic_compare_exchange_strong_i(&process_madvise_pidfd,
+						      &pid_fd, newfd,
+						      ATOMIC_SEQ_CST,
+						      ATOMIC_SEQ_CST)) {
+			/* Someone else set the fd, so we close ours */
+			assert(pid_fd != -1);
+			close(newfd);
+		} else {
+			pid_fd = newfd;
+		}
+	}
+	size_t purged_bytes = (size_t)syscall(JE_SYS_PROCESS_MADVISE_NR, pid_fd,
 	    (struct iovec *)vec, vec_len, MADV_DONTNEED, 0);
 
 	return purged_bytes != total_bytes;
 }
 
+void pages_postfork_child(void) {
+	/* Reset the file descriptor we inherited from parent process */
+	int pid_fd = atomic_load_i(&process_madvise_pidfd, ATOMIC_SEQ_CST);
+	if (pid_fd != -1) {
+		atomic_store_i(&process_madvise_pidfd, -1, ATOMIC_SEQ_CST);
+		close(pid_fd);
+	}
+}
+
 #else
 
 static bool
@@ -671,6 +692,8 @@ pages_purge_process_madvise_impl(
 	return true;
 }
 
+void pages_postfork_child(void) {}
+
 #endif
 
 bool

From 395e63bf7e79b9faf7187add17ee6b0571857a60 Mon Sep 17 00:00:00 2001
From: lexprfuncall <5360361+lexprfuncall@users.noreply.github.com>
Date: Mon, 4 Aug 2025 11:25:10 -0700
Subject: [PATCH 310/395] Fix several spelling errors in comments

---
 src/hpa.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/hpa.c b/src/hpa.c
index 03668f06..4c0f4e36 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -240,7 +240,7 @@ hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap,
 /*
  * Note that the stats functions here follow the usual stats naming conventions;
  * "merge" obtains the stats from some live object of instance, while "accum"
- * only combines the stats from one stats objet to another.  Hence the lack of
+ * only combines the stats from one stats object to another.  Hence the lack of
  * locking here.
  */
 static void
@@ -368,7 +368,7 @@ hpa_update_purge_hugify_eligibility(
 	 * could lead to situations where a hugepage that spends most of its
 	 * time meeting the criteria never quite getting hugified if there are
 	 * intervening deallocations).  The idea is that the hugification delay
-	 * will allow them to get purged, reseting their "hugify-allowed" bit.
+	 * will allow them to get purged, resetting their "hugify-allowed" bit.
 	 * If they don't get purged, then the hugification isn't hurting and
 	 * might help.  As an exception, we don't hugify hugepages that are now
 	 * empty; it definitely doesn't help there until the hugepage gets
@@ -642,11 +642,11 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
 	shard->stats.nhugifies++;
 	if (err) {
 		/*
-		 * When asynchronious hugification is used
+		 * When asynchronous hugification is used
 		 * (shard->opts.hugify_sync option is false), we are not
 		 * expecting to get here, unless something went terrible wrong.
 		 * Because underlying syscall is only setting kernel flag for
-		 * memory range (actual hugification happens asynchroniously
+		 * memory range (actual hugification happens asynchronously
 		 * and we are not getting any feedback about its outcome), we
 		 * expect syscall to be successful all the time.
 		 */
@@ -706,7 +706,7 @@ hpa_shard_maybe_do_deferred_work(
 		 * When experimental_max_purge_nhp option is used, there is no
 		 * guarantee we'll always respect dirty_mult option.  Option
 		 * experimental_max_purge_nhp provides a way to configure same
-		 * behaviour as was possible before, with buggy implementation
+		 * behavior as was possible before, with buggy implementation
 		 * of purging algorithm.
 		 */
 		ssize_t max_purge_nhp = shard->opts.experimental_max_purge_nhp;

From a156e997d7037aba2b2dc09993a62798966c991e Mon Sep 17 00:00:00 2001
From: lexprfuncall <5360361+lexprfuncall@users.noreply.github.com>
Date: Mon, 4 Aug 2025 14:43:03 -0700
Subject: [PATCH 311/395] Do not dehugify when purging

Giving the advice MADV_DONTNEED to a range of virtual memory backed by
a transparent huge page already causes that range of virtual memory to
become backed by regular pages.
---
 include/jemalloc/internal/hpa_hooks.h         |  1 -
 src/hpa.c                                     |  9 +-------
 src/hpa_hooks.c                               | 12 ++--------
 test/unit/hpa.c                               | 23 -------------------
 test/unit/hpa_vectorized_madvise.c            | 11 ---------
 .../unit/hpa_vectorized_madvise_large_batch.c |  1 -
 6 files changed, 3 insertions(+), 54 deletions(-)

diff --git a/include/jemalloc/internal/hpa_hooks.h b/include/jemalloc/internal/hpa_hooks.h
index f50ff58f..5e68e349 100644
--- a/include/jemalloc/internal/hpa_hooks.h
+++ b/include/jemalloc/internal/hpa_hooks.h
@@ -10,7 +10,6 @@ struct hpa_hooks_s {
 	void (*unmap)(void *ptr, size_t size);
 	void (*purge)(void *ptr, size_t size);
 	bool (*hugify)(void *ptr, size_t size, bool sync);
-	void (*dehugify)(void *ptr, size_t size);
 	void (*curtime)(nstime_t *r_time, bool first_reading);
 	uint64_t (*ms_since)(nstime_t *r_time);
 	bool (*vectorized_purge)(void *vec, size_t vlen, size_t nbytes);
diff --git a/src/hpa.c b/src/hpa.c
index 4c0f4e36..e297e411 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -432,18 +432,11 @@ hpa_purge_actual_unlocked(
 	hpa_range_accum_init(&accum, vec, len);
 
 	for (size_t i = 0; i < batch_sz; ++i) {
-		hpdata_t *to_purge = batch[i].hp;
-
-		/* Actually do the purging, now that the lock is dropped. */
-		if (batch[i].dehugify) {
-			shard->central->hooks.dehugify(
-			    hpdata_addr_get(to_purge), HUGEPAGE);
-		}
 		void  *purge_addr;
 		size_t purge_size;
 		size_t total_purged_on_one_hp = 0;
 		while (hpdata_purge_next(
-		    to_purge, &batch[i].state, &purge_addr, &purge_size)) {
+		    batch[i].hp, &batch[i].state, &purge_addr, &purge_size)) {
 			total_purged_on_one_hp += purge_size;
 			assert(total_purged_on_one_hp <= HUGEPAGE);
 			hpa_range_accum_add(
diff --git a/src/hpa_hooks.c b/src/hpa_hooks.c
index 14005ae0..e40d30ec 100644
--- a/src/hpa_hooks.c
+++ b/src/hpa_hooks.c
@@ -8,14 +8,13 @@ static void    *hpa_hooks_map(size_t size);
 static void     hpa_hooks_unmap(void *ptr, size_t size);
 static void     hpa_hooks_purge(void *ptr, size_t size);
 static bool     hpa_hooks_hugify(void *ptr, size_t size, bool sync);
-static void     hpa_hooks_dehugify(void *ptr, size_t size);
 static void     hpa_hooks_curtime(nstime_t *r_nstime, bool first_reading);
 static uint64_t hpa_hooks_ms_since(nstime_t *past_nstime);
 static bool hpa_hooks_vectorized_purge(void *vec, size_t vlen, size_t nbytes);
 
 const hpa_hooks_t hpa_hooks_default = {&hpa_hooks_map, &hpa_hooks_unmap,
-    &hpa_hooks_purge, &hpa_hooks_hugify, &hpa_hooks_dehugify,
-    &hpa_hooks_curtime, &hpa_hooks_ms_since, &hpa_hooks_vectorized_purge};
+    &hpa_hooks_purge, &hpa_hooks_hugify, &hpa_hooks_curtime,
+    &hpa_hooks_ms_since, &hpa_hooks_vectorized_purge};
 
 static void *
 hpa_hooks_map(size_t size) {
@@ -61,13 +60,6 @@ hpa_hooks_hugify(void *ptr, size_t size, bool sync) {
 	return err;
 }
 
-static void
-hpa_hooks_dehugify(void *ptr, size_t size) {
-	bool err = pages_nohuge(ptr, size);
-	JE_USDT(hpa_dehugify, 3, size, ptr, err);
-	(void)err;
-}
-
 static void
 hpa_hooks_curtime(nstime_t *r_nstime, bool first_reading) {
 	if (first_reading) {
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index 1fed8a80..d62ac762 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -389,12 +389,6 @@ defer_test_hugify(void *ptr, size_t size, bool sync) {
 	return false;
 }
 
-static size_t ndefer_dehugify_calls = 0;
-static void
-defer_test_dehugify(void *ptr, size_t size) {
-	++ndefer_dehugify_calls;
-}
-
 static nstime_t defer_curtime;
 static void
 defer_test_curtime(nstime_t *r_time, bool first_reading) {
@@ -414,7 +408,6 @@ TEST_BEGIN(test_defer_time) {
 	hooks.unmap = &defer_test_unmap;
 	hooks.purge = &defer_test_purge;
 	hooks.hugify = &defer_test_hugify;
-	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
 	hooks.ms_since = &defer_test_ms_since;
 	hooks.vectorized_purge = &defer_vectorized_purge;
@@ -453,10 +446,8 @@ TEST_BEGIN(test_defer_time) {
 	hpa_shard_do_deferred_work(tsdn, shard);
 
 	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
-	expect_zu_eq(1, ndefer_dehugify_calls, "Should have dehugified");
 	expect_zu_eq(1, ndefer_purge_calls, "Should have purged");
 	ndefer_hugify_calls = 0;
-	ndefer_dehugify_calls = 0;
 	ndefer_purge_calls = 0;
 
 	/*
@@ -477,7 +468,6 @@ TEST_BEGIN(test_defer_time) {
 	nstime_init2(&defer_curtime, 22, 0);
 	hpa_shard_do_deferred_work(tsdn, shard);
 	expect_zu_eq(1, ndefer_hugify_calls, "Failed to hugify");
-	expect_zu_eq(0, ndefer_dehugify_calls, "Unexpected dehugify");
 	expect_zu_eq(0, ndefer_purge_calls, "Unexpected purge");
 	ndefer_hugify_calls = 0;
 
@@ -524,7 +514,6 @@ TEST_BEGIN(test_no_min_purge_interval) {
 	hooks.unmap = &defer_test_unmap;
 	hooks.purge = &defer_test_purge;
 	hooks.hugify = &defer_test_hugify;
-	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
 	hooks.ms_since = &defer_test_ms_since;
 	hooks.vectorized_purge = &defer_vectorized_purge;
@@ -551,7 +540,6 @@ TEST_BEGIN(test_no_min_purge_interval) {
 	 * we have dirty pages.
 	 */
 	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
-	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
 	expect_zu_eq(1, ndefer_purge_calls, "Expect purge");
 	ndefer_purge_calls = 0;
 
@@ -567,7 +555,6 @@ TEST_BEGIN(test_min_purge_interval) {
 	hooks.unmap = &defer_test_unmap;
 	hooks.purge = &defer_test_purge;
 	hooks.hugify = &defer_test_hugify;
-	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
 	hooks.ms_since = &defer_test_ms_since;
 	hooks.vectorized_purge = &defer_vectorized_purge;
@@ -593,7 +580,6 @@ TEST_BEGIN(test_min_purge_interval) {
 	 * opt.min_purge_interval_ms didn't pass yet.
 	 */
 	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
-	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
 	expect_zu_eq(0, ndefer_purge_calls, "Purged too early");
 
 	/* Minumum purge interval is set to 5 seconds in options. */
@@ -602,7 +588,6 @@ TEST_BEGIN(test_min_purge_interval) {
 
 	/* Now we should purge, but nothing else. */
 	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
-	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
 	expect_zu_eq(1, ndefer_purge_calls, "Expect purge");
 	ndefer_purge_calls = 0;
 
@@ -618,7 +603,6 @@ TEST_BEGIN(test_purge) {
 	hooks.unmap = &defer_test_unmap;
 	hooks.purge = &defer_test_purge;
 	hooks.hugify = &defer_test_hugify;
-	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
 	hooks.ms_since = &defer_test_ms_since;
 	hooks.vectorized_purge = &defer_vectorized_purge;
@@ -648,7 +632,6 @@ TEST_BEGIN(test_purge) {
 	hpa_shard_do_deferred_work(tsdn, shard);
 
 	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
-	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
 	/*
 	 * Expect only 2 purges, because opt.dirty_mult is set to 0.25 and we still
 	 * have 5 active hugepages (1 / 5 = 0.2 < 0.25).
@@ -665,7 +648,6 @@ TEST_BEGIN(test_purge) {
 	 */
 	expect_zu_eq(5, ndefer_hugify_calls, "Expect hugification");
 	ndefer_hugify_calls = 0;
-	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
 	/*
 	 * We still have completely dirty hugepage, but we are below
 	 * opt.dirty_mult.
@@ -685,7 +667,6 @@ TEST_BEGIN(test_experimental_max_purge_nhp) {
 	hooks.unmap = &defer_test_unmap;
 	hooks.purge = &defer_test_purge;
 	hooks.hugify = &defer_test_hugify;
-	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
 	hooks.ms_since = &defer_test_ms_since;
 	hooks.vectorized_purge = &defer_vectorized_purge;
@@ -716,7 +697,6 @@ TEST_BEGIN(test_experimental_max_purge_nhp) {
 	hpa_shard_do_deferred_work(tsdn, shard);
 
 	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
-	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
 	/*
 	 * Expect only one purge call, because opts.experimental_max_purge_nhp
 	 * is set to 1.
@@ -729,7 +709,6 @@ TEST_BEGIN(test_experimental_max_purge_nhp) {
 
 	expect_zu_eq(5, ndefer_hugify_calls, "Expect hugification");
 	ndefer_hugify_calls = 0;
-	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
 	/* We still above the limit for dirty pages. */
 	expect_zu_eq(1, ndefer_purge_calls, "Expect purge");
 	ndefer_purge_calls = 0;
@@ -738,7 +717,6 @@ TEST_BEGIN(test_experimental_max_purge_nhp) {
 	hpa_shard_do_deferred_work(tsdn, shard);
 
 	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
-	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
 	/* Finally, we are below the limit, no purges are expected. */
 	expect_zu_eq(0, ndefer_purge_calls, "Purged too early");
 
@@ -754,7 +732,6 @@ TEST_BEGIN(test_vectorized_opt_eq_zero) {
 	hooks.unmap = &defer_test_unmap;
 	hooks.purge = &defer_test_purge;
 	hooks.hugify = &defer_test_hugify;
-	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
 	hooks.ms_since = &defer_test_ms_since;
 	hooks.vectorized_purge = &defer_vectorized_purge;
diff --git a/test/unit/hpa_vectorized_madvise.c b/test/unit/hpa_vectorized_madvise.c
index 8df54d06..c2aa3b58 100644
--- a/test/unit/hpa_vectorized_madvise.c
+++ b/test/unit/hpa_vectorized_madvise.c
@@ -123,12 +123,6 @@ defer_test_hugify(void *ptr, size_t size, bool sync) {
 	return false;
 }
 
-static size_t ndefer_dehugify_calls = 0;
-static void
-defer_test_dehugify(void *ptr, size_t size) {
-	++ndefer_dehugify_calls;
-}
-
 static nstime_t defer_curtime;
 static void
 defer_test_curtime(nstime_t *r_time, bool first_reading) {
@@ -148,7 +142,6 @@ TEST_BEGIN(test_vectorized_failure_fallback) {
 	hooks.unmap = &defer_test_unmap;
 	hooks.purge = &defer_test_purge;
 	hooks.hugify = &defer_test_hugify;
-	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
 	hooks.ms_since = &defer_test_ms_since;
 	hooks.vectorized_purge = &defer_vectorized_purge_fail;
@@ -188,7 +181,6 @@ TEST_BEGIN(test_more_regions_purged_from_one_page) {
 	hooks.unmap = &defer_test_unmap;
 	hooks.purge = &defer_test_purge;
 	hooks.hugify = &defer_test_hugify;
-	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
 	hooks.ms_since = &defer_test_ms_since;
 	hooks.vectorized_purge = &defer_vectorized_purge;
@@ -231,7 +223,6 @@ TEST_BEGIN(test_more_regions_purged_from_one_page) {
 	 * we have dirty pages.
 	 */
 	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
-	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
 
 	/* We purge from 2 huge pages, each one 3 dirty continous segments.
 	 * For opt_process_madvise_max_batch = 2, that is
@@ -259,7 +250,6 @@ TEST_BEGIN(test_more_pages_than_batch_page_size) {
 	hooks.unmap = &defer_test_unmap;
 	hooks.purge = &defer_test_purge;
 	hooks.hugify = &defer_test_hugify;
-	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
 	hooks.ms_since = &defer_test_ms_since;
 	hooks.vectorized_purge = &defer_vectorized_purge;
@@ -296,7 +286,6 @@ TEST_BEGIN(test_more_pages_than_batch_page_size) {
 	 * we have dirty pages.
 	 */
 	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
-	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
 
 	/* We have page batch size = 1.
 	 * we have 5 * HP active pages, 3 * HP dirty pages
diff --git a/test/unit/hpa_vectorized_madvise_large_batch.c b/test/unit/hpa_vectorized_madvise_large_batch.c
index a5766620..c974500c 100644
--- a/test/unit/hpa_vectorized_madvise_large_batch.c
+++ b/test/unit/hpa_vectorized_madvise_large_batch.c
@@ -140,7 +140,6 @@ TEST_BEGIN(test_vectorized_purge) {
 	hooks.unmap = &defer_test_unmap;
 	hooks.purge = &defer_test_purge;
 	hooks.hugify = &defer_test_hugify;
-	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
 	hooks.ms_since = &defer_test_ms_since;
 	hooks.vectorized_purge = &defer_vectorized_purge;

From 9528a2e2dd37154475b8a36186e62f32de17cf58 Mon Sep 17 00:00:00 2001
From: lexprfuncall <5360361+lexprfuncall@users.noreply.github.com>
Date: Mon, 4 Aug 2025 13:13:27 -0700
Subject: [PATCH 312/395] Use relaxed atomics to access the process madvise pid
 fd

Relaxed atomics already provide sequentially consistent access to single
location data structures.
---
 src/pages.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/pages.c b/src/pages.c
index 54678a38..076091e3 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -646,7 +646,7 @@ init_process_madvise(void) {
 static bool
 pages_purge_process_madvise_impl(
     void *vec, size_t vec_len, size_t total_bytes) {
-	int pid_fd = atomic_load_i(&process_madvise_pidfd, ATOMIC_SEQ_CST);
+	int pid_fd = atomic_load_i(&process_madvise_pidfd, ATOMIC_RELAXED);
 	while (pid_fd == -1) {
 		int newfd = syscall(SYS_pidfd_open, getpid(), 0);
 		if (newfd == -1) {
@@ -654,8 +654,8 @@ pages_purge_process_madvise_impl(
 		}
 		if (!atomic_compare_exchange_strong_i(&process_madvise_pidfd,
 						      &pid_fd, newfd,
-						      ATOMIC_SEQ_CST,
-						      ATOMIC_SEQ_CST)) {
+						      ATOMIC_RELAXED,
+						      ATOMIC_RELAXED)) {
 			/* Someone else set the fd, so we close ours */
 			assert(pid_fd != -1);
 			close(newfd);
@@ -671,9 +671,9 @@ pages_purge_process_madvise_impl(
 
 void pages_postfork_child(void) {
 	/* Reset the file descriptor we inherited from parent process */
-	int pid_fd = atomic_load_i(&process_madvise_pidfd, ATOMIC_SEQ_CST);
+	int pid_fd = atomic_load_i(&process_madvise_pidfd, ATOMIC_RELAXED);
 	if (pid_fd != -1) {
-		atomic_store_i(&process_madvise_pidfd, -1, ATOMIC_SEQ_CST);
+		atomic_store_i(&process_madvise_pidfd, -1, ATOMIC_RELAXED);
 		close(pid_fd);
 	}
 }

From d73de95f722247a56b5266a27267cd24668081e9 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Wed, 4 Jun 2025 09:48:14 -0700
Subject: [PATCH 313/395] Experimental configuration option for fast path
 prefetch from cache_bin

---
 configure.ac                                  | 30 +++++++++++++++++++
 .../internal/jemalloc_internal_defs.h.in      |  5 ++++
 .../internal/jemalloc_internal_inlines_c.h    |  6 ++++
 3 files changed, 41 insertions(+)

diff --git a/configure.ac b/configure.ac
index c703a6d1..d9153feb 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1434,6 +1434,36 @@ if test "x$enable_experimental_smallocx" = "x1" ; then
 fi
 AC_SUBST([enable_experimental_smallocx])
 
+dnl Do not enable fastpath prefetch by default.
+AC_ARG_ENABLE([experimental_fp_prefetch],
+  [AS_HELP_STRING([--enable-experimental-fp-prefetch], [Enable experimental fastpath prefetch])],
+[if test "x$enable_experimental_fp_prefetch" = "xno" ; then
+enable_experimental_fp_prefetch="0"
+else
+  dnl Check if we have __builtin_prefetch.
+  JE_CFLAGS_SAVE()
+  JE_CFLAGS_ADD([-Werror=implicit-function-declaration])
+  JE_COMPILABLE([builtin prefetch], [], [
+void foo(void *p) { __builtin_prefetch(p, 1, 3); }
+  	],
+	[je_cv_have_builtin_prefetch])
+
+	if test "x${je_cv_have_builtin_prefetch}" = "xyes" ; then
+	   enable_experimental_fp_prefetch="1"
+	else
+	   enable_experimental_fp_prefetch="0"
+	   AC_MSG_ERROR([--enable--experimental-fp-prefetch can only be used when builtin_preftech is available])
+	fi
+   JE_CFLAGS_RESTORE()
+fi
+],
+[enable_experimental_fp_prefetch="0"]
+)
+if test "x$enable_experimental_fp_prefetch" = "x1" ; then
+  AC_DEFINE([JEMALLOC_EXPERIMENTAL_FASTPATH_PREFETCH], [ ], [ ])
+fi
+AC_SUBST([enable_experimental_fp_prefetch])
+
 dnl Do not enable profiling by default.
 AC_ARG_ENABLE([prof],
   [AS_HELP_STRING([--enable-prof], [Enable allocation profiling])],
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 31ae2e8e..3a945ba1 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -160,6 +160,11 @@
 /* JEMALLOC_EXPERIMENTAL_SMALLOCX_API enables experimental smallocx API. */
 #undef JEMALLOC_EXPERIMENTAL_SMALLOCX_API
 
+/* JEMALLOC_EXPERIMENTAL_FASTPATH_PREFETCH enables prefetch
+ * on malloc fast path.
+ */
+#undef JEMALLOC_EXPERIMENTAL_FASTPATH_PREFETCH
+
 /* JEMALLOC_PROF enables allocation profiling. */
 #undef JEMALLOC_PROF
 
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index 2c61f8c4..16f86ad4 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -374,6 +374,12 @@ imalloc_fastpath(size_t size, void *(fallback_alloc)(size_t)) {
 	 */
 	ret = cache_bin_alloc_easy(bin, &tcache_success);
 	if (tcache_success) {
+#if defined(JEMALLOC_EXPERIMENTAL_FASTPATH_PREFETCH)
+		cache_bin_sz_t lb = (cache_bin_sz_t)(uintptr_t)bin->stack_head;
+		if(likely(lb != bin->low_bits_empty)) {
+			util_prefetch_write_range(*(bin->stack_head), usize);
+		}
+#endif
 		fastpath_success_finish(tsd, allocated_after, bin, ret);
 		return ret;
 	}

From e4fa33148a4e93275dac0f306d8759c89597d55f Mon Sep 17 00:00:00 2001
From: lexprfuncall <5360361+lexprfuncall@users.noreply.github.com>
Date: Wed, 20 Aug 2025 16:30:00 -0700
Subject: [PATCH 314/395] Remove an unused function and global variable

When the dehugify functionality was retired in an previous commit, a
dehugify-related function and global variable in a test was
accidentally left in-place causing builds that add -Werror to CFLAGS
to fail.
---
 test/unit/hpa_vectorized_madvise_large_batch.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/test/unit/hpa_vectorized_madvise_large_batch.c b/test/unit/hpa_vectorized_madvise_large_batch.c
index c974500c..e1393225 100644
--- a/test/unit/hpa_vectorized_madvise_large_batch.c
+++ b/test/unit/hpa_vectorized_madvise_large_batch.c
@@ -113,12 +113,6 @@ defer_test_hugify(void *ptr, size_t size, bool sync) {
 	return false;
 }
 
-static size_t ndefer_dehugify_calls = 0;
-static void
-defer_test_dehugify(void *ptr, size_t size) {
-	++ndefer_dehugify_calls;
-}
-
 static nstime_t defer_curtime;
 static void
 defer_test_curtime(nstime_t *r_time, bool first_reading) {

From 5e98585b37556cdb762e36f02b657742b8c47fe3 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Wed, 13 Aug 2025 17:59:36 -0700
Subject: [PATCH 315/395] Save and restore errno when calling process_madvise

---
 src/pages.c | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/src/pages.c b/src/pages.c
index 076091e3..78f3a1b7 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -622,6 +622,7 @@ pages_dodump(void *addr, size_t size) {
 #	include <sys/mman.h>
 #	include <sys/syscall.h>
 static atomic_i_t process_madvise_pidfd = ATOMIC_INIT(-1);
+static atomic_b_t process_madvise_gate = ATOMIC_INIT(true);
 
 static bool
 init_process_madvise(void) {
@@ -646,9 +647,12 @@ init_process_madvise(void) {
 static bool
 pages_purge_process_madvise_impl(
     void *vec, size_t vec_len, size_t total_bytes) {
+	if (!atomic_load_b(&process_madvise_gate, ATOMIC_RELAXED)) {
+		return true;
+	}
 	int pid_fd = atomic_load_i(&process_madvise_pidfd, ATOMIC_RELAXED);
 	while (pid_fd == -1) {
-		int newfd = syscall(SYS_pidfd_open, getpid(), 0);
+		int newfd = (int) syscall(SYS_pidfd_open, getpid(), 0);
 		if (newfd == -1) {
 			return true;
 		}
@@ -663,8 +667,22 @@ pages_purge_process_madvise_impl(
 			pid_fd = newfd;
 		}
 	}
+
+	/*
+	 * TODO: remove this save/restore of errno after supporting errno
+	 * preservation for free() call properly.
+	 */
+	int saved_errno = get_errno();
 	size_t purged_bytes = (size_t)syscall(JE_SYS_PROCESS_MADVISE_NR, pid_fd,
 	    (struct iovec *)vec, vec_len, MADV_DONTNEED, 0);
+	if (purged_bytes == (size_t) -1) {
+		if (errno == EPERM || errno == EINVAL || errno == ENOSYS) {
+			/* Process madvise not supported the way we need it. */
+			atomic_store_b(&process_madvise_gate, false,
+				       ATOMIC_RELAXED);
+		}
+		set_errno(saved_errno);
+	}
 
 	return purged_bytes != total_bytes;
 }

From ced8b3cffb650af8b7bef7f6995b9032b55aeb0b Mon Sep 17 00:00:00 2001
From: lexprfuncall <5360361+lexprfuncall@users.noreply.github.com>
Date: Thu, 21 Aug 2025 11:16:33 -0700
Subject: [PATCH 316/395] Fix the compilation check for process madvise

An include of unistd.h is needed to make the declaration of the
syscall function visible to the compiler.  The include of sys/mman.h
is not used at all.
---
 configure.ac | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index d9153feb..ce5c8adc 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2633,8 +2633,8 @@ if test "x${je_cv_madvise}" = "xyes" ; then
 
   dnl Check for process_madvise
   JE_COMPILABLE([process_madvise(2)], [
-#include <sys/mman.h>
 #include <sys/syscall.h>
+#include <unistd.h>
 ], [
 	syscall(SYS_process_madvise, 0, (void *)0, 0, 0, 0);
 ], [je_cv_process_madvise])

From 2114349a4e9933ebff87df01572a94a12eca5d86 Mon Sep 17 00:00:00 2001
From: Shirui Cheng <sherrycheng@meta.com>
Date: Tue, 15 Jul 2025 15:44:14 -0700
Subject: [PATCH 317/395] Revert PR #2608: Manually revert commits
 70c94d..f9c0b5

Closes: #2707
---
 Makefile.in                                   |   3 -
 include/jemalloc/internal/arena_inlines_b.h   | 154 +---------
 include/jemalloc/internal/arena_structs.h     |   2 +-
 include/jemalloc/internal/batcher.h           |  46 ---
 include/jemalloc/internal/bin.h               |  74 +----
 include/jemalloc/internal/bin_info.h          |  11 -
 include/jemalloc/internal/bin_stats.h         |   5 -
 include/jemalloc/internal/witness.h           |   3 +-
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |   3 +-
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |   5 +-
 .../projects/vc2017/jemalloc/jemalloc.vcxproj |   3 +-
 .../vc2017/jemalloc/jemalloc.vcxproj.filters  |   5 +-
 .../projects/vc2019/jemalloc/jemalloc.vcxproj |   3 +-
 .../vc2019/jemalloc/jemalloc.vcxproj.filters  |   5 +-
 .../projects/vc2022/jemalloc/jemalloc.vcxproj |   3 +-
 .../vc2022/jemalloc/jemalloc.vcxproj.filters  |   5 +-
 src/arena.c                                   |  89 ++----
 src/batcher.c                                 |  98 -------
 src/bin.c                                     |  48 +---
 src/bin_info.c                                |  24 --
 src/ctl.c                                     |  37 ---
 src/jemalloc.c                                |  14 -
 src/stats.c                                   |  58 +---
 src/tcache.c                                  | 194 +++----------
 test/analyze/sizes.c                          |   2 -
 test/include/test/fork.h                      |  34 ---
 test/unit/batcher.c                           | 243 ----------------
 test/unit/bin_batching.c                      | 270 ------------------
 test/unit/bin_batching.sh                     |  10 -
 test/unit/fork.c                              |  37 ++-
 30 files changed, 124 insertions(+), 1364 deletions(-)
 delete mode 100644 include/jemalloc/internal/batcher.h
 delete mode 100644 src/batcher.c
 delete mode 100644 test/include/test/fork.h
 delete mode 100644 test/unit/batcher.c
 delete mode 100644 test/unit/bin_batching.c
 delete mode 100644 test/unit/bin_batching.sh

diff --git a/Makefile.in b/Makefile.in
index 2519ed83..4e9d0bea 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -98,7 +98,6 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/arena.c \
 	$(srcroot)src/background_thread.c \
 	$(srcroot)src/base.c \
-	$(srcroot)src/batcher.c \
 	$(srcroot)src/bin.c \
 	$(srcroot)src/bin_info.c \
 	$(srcroot)src/bitmap.c \
@@ -208,8 +207,6 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/background_thread_enable.c \
 	$(srcroot)test/unit/base.c \
 	$(srcroot)test/unit/batch_alloc.c \
-	$(srcroot)test/unit/batcher.c \
-	$(srcroot)test/unit/bin_batching.c \
 	$(srcroot)test/unit/binshard.c \
 	$(srcroot)test/unit/bitmap.c \
 	$(srcroot)test/unit/bit_util.c \
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 549dfb8a..6276deaa 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -588,11 +588,10 @@ arena_dalloc_bin_locked_begin(
  * stats updates, which happen during finish (this lets running counts get left
  * in a register).
  */
-JEMALLOC_ALWAYS_INLINE void
+JEMALLOC_ALWAYS_INLINE bool
 arena_dalloc_bin_locked_step(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
     arena_dalloc_bin_locked_info_t *info, szind_t binind, edata_t *slab,
-    void *ptr, edata_t **dalloc_slabs, unsigned ndalloc_slabs,
-    unsigned *dalloc_slabs_count, edata_list_active_t *dalloc_slabs_extra) {
+    void *ptr) {
 	const bin_info_t *bin_info = &bin_infos[binind];
 	size_t            regind = arena_slab_regind(info, binind, slab, ptr);
 	slab_data_t      *slab_data = edata_slab_data_get(slab);
@@ -612,17 +611,12 @@ arena_dalloc_bin_locked_step(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
 	if (nfree == bin_info->nregs) {
 		arena_dalloc_bin_locked_handle_newly_empty(
 		    tsdn, arena, slab, bin);
-
-		if (*dalloc_slabs_count < ndalloc_slabs) {
-			dalloc_slabs[*dalloc_slabs_count] = slab;
-			(*dalloc_slabs_count)++;
-		} else {
-			edata_list_active_append(dalloc_slabs_extra, slab);
-		}
+		return true;
 	} else if (nfree == 1 && slab != bin->slabcur) {
 		arena_dalloc_bin_locked_handle_newly_nonempty(
 		    tsdn, arena, slab, bin);
 	}
+	return false;
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -635,148 +629,10 @@ arena_dalloc_bin_locked_finish(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
 	}
 }
 
-JEMALLOC_ALWAYS_INLINE void
-arena_bin_flush_batch_impl(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
-    arena_dalloc_bin_locked_info_t *dalloc_bin_info, unsigned binind,
-    edata_t **dalloc_slabs, unsigned ndalloc_slabs, unsigned *dalloc_count,
-    edata_list_active_t *dalloc_slabs_extra) {
-	assert(binind < bin_info_nbatched_sizes);
-	bin_with_batch_t *batched_bin = (bin_with_batch_t *)bin;
-	size_t            nelems_to_pop = batcher_pop_begin(
-            tsdn, &batched_bin->remote_frees);
-
-	bin_batching_test_mid_pop(nelems_to_pop);
-	if (nelems_to_pop == BATCHER_NO_IDX) {
-		malloc_mutex_assert_not_owner(
-		    tsdn, &batched_bin->remote_frees.mtx);
-		return;
-	} else {
-		malloc_mutex_assert_owner(tsdn, &batched_bin->remote_frees.mtx);
-	}
-
-	size_t npushes = batcher_pop_get_pushes(
-	    tsdn, &batched_bin->remote_frees);
-	bin_remote_free_data_t remote_free_data[BIN_REMOTE_FREE_ELEMS_MAX];
-	for (size_t i = 0; i < nelems_to_pop; i++) {
-		remote_free_data[i] = batched_bin->remote_free_data[i];
-	}
-	batcher_pop_end(tsdn, &batched_bin->remote_frees);
-
-	for (size_t i = 0; i < nelems_to_pop; i++) {
-		arena_dalloc_bin_locked_step(tsdn, arena, bin, dalloc_bin_info,
-		    binind, remote_free_data[i].slab, remote_free_data[i].ptr,
-		    dalloc_slabs, ndalloc_slabs, dalloc_count,
-		    dalloc_slabs_extra);
-	}
-
-	bin->stats.batch_pops++;
-	bin->stats.batch_pushes += npushes;
-	bin->stats.batch_pushed_elems += nelems_to_pop;
-}
-
-typedef struct arena_bin_flush_batch_state_s arena_bin_flush_batch_state_t;
-struct arena_bin_flush_batch_state_s {
-	arena_dalloc_bin_locked_info_t info;
-
-	/*
-	 * Bin batching is subtle in that there are unusual edge cases in which
-	 * it can trigger the deallocation of more slabs than there were items
-	 * flushed (say, if every original deallocation triggered a slab
-	 * deallocation, and so did every batched one).  So we keep a small
-	 * backup array for any "extra" slabs, as well as a a list to allow a
-	 * dynamic number of ones exceeding that array.
-	 */
-	edata_t            *dalloc_slabs[8];
-	unsigned            dalloc_slab_count;
-	edata_list_active_t dalloc_slabs_extra;
-};
-
-JEMALLOC_ALWAYS_INLINE unsigned
-arena_bin_batch_get_ndalloc_slabs(unsigned preallocated_slabs) {
-	if (preallocated_slabs > bin_batching_test_ndalloc_slabs_max) {
-		return bin_batching_test_ndalloc_slabs_max;
-	}
-	return preallocated_slabs;
-}
-
-JEMALLOC_ALWAYS_INLINE void
-arena_bin_flush_batch_after_lock(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
-    unsigned binind, arena_bin_flush_batch_state_t *state) {
-	if (binind >= bin_info_nbatched_sizes) {
-		return;
-	}
-
-	arena_dalloc_bin_locked_begin(&state->info, binind);
-	state->dalloc_slab_count = 0;
-	edata_list_active_init(&state->dalloc_slabs_extra);
-
-	unsigned preallocated_slabs = (unsigned)(sizeof(state->dalloc_slabs)
-	    / sizeof(state->dalloc_slabs[0]));
-	unsigned ndalloc_slabs = arena_bin_batch_get_ndalloc_slabs(
-	    preallocated_slabs);
-
-	arena_bin_flush_batch_impl(tsdn, arena, bin, &state->info, binind,
-	    state->dalloc_slabs, ndalloc_slabs, &state->dalloc_slab_count,
-	    &state->dalloc_slabs_extra);
-}
-
-JEMALLOC_ALWAYS_INLINE void
-arena_bin_flush_batch_before_unlock(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
-    unsigned binind, arena_bin_flush_batch_state_t *state) {
-	if (binind >= bin_info_nbatched_sizes) {
-		return;
-	}
-
-	arena_dalloc_bin_locked_finish(tsdn, arena, bin, &state->info);
-}
-
-static inline bool
-arena_bin_has_batch(szind_t binind) {
-	return binind < bin_info_nbatched_sizes;
-}
-
-JEMALLOC_ALWAYS_INLINE void
-arena_bin_flush_batch_after_unlock(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
-    unsigned binind, arena_bin_flush_batch_state_t *state) {
-	if (!arena_bin_has_batch(binind)) {
-		return;
-	}
-	/*
-	 * The initialization of dalloc_slabs_extra is guarded by an
-	 * arena_bin_has_batch check higher up the stack.  But the clang
-	 * analyzer forgets this down the stack, triggering a spurious error
-	 * reported here.
-	 */
-	JEMALLOC_CLANG_ANALYZER_SUPPRESS {
-		bin_batching_test_after_unlock(state->dalloc_slab_count,
-		    edata_list_active_empty(&state->dalloc_slabs_extra));
-	}
-	for (unsigned i = 0; i < state->dalloc_slab_count; i++) {
-		edata_t *slab = state->dalloc_slabs[i];
-		arena_slab_dalloc(tsdn, arena_get_from_edata(slab), slab);
-	}
-	while (!edata_list_active_empty(&state->dalloc_slabs_extra)) {
-		edata_t *slab = edata_list_active_first(
-		    &state->dalloc_slabs_extra);
-		edata_list_active_remove(&state->dalloc_slabs_extra, slab);
-		arena_slab_dalloc(tsdn, arena_get_from_edata(slab), slab);
-	}
-}
-
 static inline bin_t *
 arena_get_bin(arena_t *arena, szind_t binind, unsigned binshard) {
 	bin_t *shard0 = (bin_t *)((byte_t *)arena + arena_bin_offsets[binind]);
-	bin_t *ret;
-	if (arena_bin_has_batch(binind)) {
-		ret = (bin_t *)((bin_with_batch_t *)shard0 + binshard);
-	} else {
-		ret = shard0 + binshard;
-	}
-	assert(binind >= SC_NBINS - 1
-	    || (uintptr_t)ret
-	        < (uintptr_t)arena + arena_bin_offsets[binind + 1]);
-
-	return ret;
+	return shard0 + binshard;
 }
 
 #endif /* JEMALLOC_INTERNAL_ARENA_INLINES_B_H */
diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index 4778ca1b..471f7692 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -105,7 +105,7 @@ struct arena_s {
 	    "Do not use this field directly. "
 	    "Use `arena_get_bin` instead.")
 	JEMALLOC_ALIGNED(CACHELINE)
-	bin_with_batch_t all_bins[0];
+	bin_t all_bins[0];
 };
 
 #endif /* JEMALLOC_INTERNAL_ARENA_STRUCTS_H */
diff --git a/include/jemalloc/internal/batcher.h b/include/jemalloc/internal/batcher.h
deleted file mode 100644
index 3ceb8256..00000000
--- a/include/jemalloc/internal/batcher.h
+++ /dev/null
@@ -1,46 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_BATCHER_H
-#define JEMALLOC_INTERNAL_BATCHER_H
-
-#include "jemalloc/internal/jemalloc_preamble.h"
-#include "jemalloc/internal/atomic.h"
-#include "jemalloc/internal/mutex.h"
-
-#define BATCHER_NO_IDX ((size_t) - 1)
-
-typedef struct batcher_s batcher_t;
-struct batcher_s {
-	/*
-	 * Optimize for locality -- nelems_max and nelems are always touched
-	 * togehter, along with the front of the mutex. The end of the mutex is
-	 * only touched if there's contention.
-	 */
-	atomic_zu_t    nelems;
-	size_t         nelems_max;
-	size_t         npushes;
-	malloc_mutex_t mtx;
-};
-
-void batcher_init(batcher_t *batcher, size_t nelems_max);
-
-/*
- * Returns an index (into some user-owned array) to use for pushing, or
- * BATCHER_NO_IDX if no index is free.  If the former, the caller must call
- * batcher_push_end once done.
- */
-size_t batcher_push_begin(
-    tsdn_t *tsdn, batcher_t *batcher, size_t elems_to_push);
-void batcher_push_end(tsdn_t *tsdn, batcher_t *batcher);
-
-/*
- * Returns the number of items to pop, or BATCHER_NO_IDX if there are none.
- * If the former, must be followed by a call to batcher_pop_end.
- */
-size_t batcher_pop_begin(tsdn_t *tsdn, batcher_t *batcher);
-size_t batcher_pop_get_pushes(tsdn_t *tsdn, batcher_t *batcher);
-void   batcher_pop_end(tsdn_t *tsdn, batcher_t *batcher);
-
-void batcher_prefork(tsdn_t *tsdn, batcher_t *batcher);
-void batcher_postfork_parent(tsdn_t *tsdn, batcher_t *batcher);
-void batcher_postfork_child(tsdn_t *tsdn, batcher_t *batcher);
-
-#endif /* JEMALLOC_INTERNAL_BATCHER_H */
diff --git a/include/jemalloc/internal/bin.h b/include/jemalloc/internal/bin.h
index e91583d7..05a2f845 100644
--- a/include/jemalloc/internal/bin.h
+++ b/include/jemalloc/internal/bin.h
@@ -2,60 +2,12 @@
 #define JEMALLOC_INTERNAL_BIN_H
 
 #include "jemalloc/internal/jemalloc_preamble.h"
-#include "jemalloc/internal/batcher.h"
 #include "jemalloc/internal/bin_stats.h"
 #include "jemalloc/internal/bin_types.h"
 #include "jemalloc/internal/edata.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/sc.h"
 
-#define BIN_REMOTE_FREE_ELEMS_MAX 16
-
-#ifdef JEMALLOC_JET
-extern void (*bin_batching_test_after_push_hook)(size_t idx);
-extern void (*bin_batching_test_mid_pop_hook)(size_t elems_to_pop);
-extern void (*bin_batching_test_after_unlock_hook)(
-    unsigned slab_dalloc_count, bool list_empty);
-#endif
-
-#ifdef JEMALLOC_JET
-extern unsigned bin_batching_test_ndalloc_slabs_max;
-#else
-static const unsigned bin_batching_test_ndalloc_slabs_max = (unsigned)-1;
-#endif
-
-JEMALLOC_ALWAYS_INLINE void
-bin_batching_test_after_push(size_t idx) {
-	(void)idx;
-#ifdef JEMALLOC_JET
-	if (bin_batching_test_after_push_hook != NULL) {
-		bin_batching_test_after_push_hook(idx);
-	}
-#endif
-}
-
-JEMALLOC_ALWAYS_INLINE void
-bin_batching_test_mid_pop(size_t elems_to_pop) {
-	(void)elems_to_pop;
-#ifdef JEMALLOC_JET
-	if (bin_batching_test_mid_pop_hook != NULL) {
-		bin_batching_test_mid_pop_hook(elems_to_pop);
-	}
-#endif
-}
-
-JEMALLOC_ALWAYS_INLINE void
-bin_batching_test_after_unlock(unsigned slab_dalloc_count, bool list_empty) {
-	(void)slab_dalloc_count;
-	(void)list_empty;
-#ifdef JEMALLOC_JET
-	if (bin_batching_test_after_unlock_hook != NULL) {
-		bin_batching_test_after_unlock_hook(
-		    slab_dalloc_count, list_empty);
-	}
-#endif
-}
-
 /*
  * A bin contains a set of extents that are currently being used for slab
  * allocations.
@@ -90,19 +42,6 @@ struct bin_s {
 	edata_list_active_t slabs_full;
 };
 
-typedef struct bin_remote_free_data_s bin_remote_free_data_t;
-struct bin_remote_free_data_s {
-	void    *ptr;
-	edata_t *slab;
-};
-
-typedef struct bin_with_batch_s bin_with_batch_t;
-struct bin_with_batch_s {
-	bin_t                  bin;
-	batcher_t              remote_frees;
-	bin_remote_free_data_t remote_free_data[BIN_REMOTE_FREE_ELEMS_MAX];
-};
-
 /* A set of sharded bins of the same size class. */
 typedef struct bins_s bins_t;
 struct bins_s {
@@ -115,12 +54,12 @@ bool bin_update_shard_size(unsigned bin_shards[SC_NBINS], size_t start_size,
     size_t end_size, size_t nshards);
 
 /* Initializes a bin to empty.  Returns true on error. */
-bool bin_init(bin_t *bin, unsigned binind);
+bool bin_init(bin_t *bin);
 
 /* Forking. */
-void bin_prefork(tsdn_t *tsdn, bin_t *bin, bool has_batch);
-void bin_postfork_parent(tsdn_t *tsdn, bin_t *bin, bool has_batch);
-void bin_postfork_child(tsdn_t *tsdn, bin_t *bin, bool has_batch);
+void bin_prefork(tsdn_t *tsdn, bin_t *bin);
+void bin_postfork_parent(tsdn_t *tsdn, bin_t *bin);
+void bin_postfork_child(tsdn_t *tsdn, bin_t *bin);
 
 /* Stats. */
 static inline void
@@ -138,11 +77,6 @@ bin_stats_merge(tsdn_t *tsdn, bin_stats_data_t *dst_bin_stats, bin_t *bin) {
 	stats->reslabs += bin->stats.reslabs;
 	stats->curslabs += bin->stats.curslabs;
 	stats->nonfull_slabs += bin->stats.nonfull_slabs;
-
-	stats->batch_failed_pushes += bin->stats.batch_failed_pushes;
-	stats->batch_pushes += bin->stats.batch_pushes;
-	stats->batch_pushed_elems += bin->stats.batch_pushed_elems;
-
 	malloc_mutex_unlock(tsdn, &bin->lock);
 }
 
diff --git a/include/jemalloc/internal/bin_info.h b/include/jemalloc/internal/bin_info.h
index 0022c3f7..8c563dee 100644
--- a/include/jemalloc/internal/bin_info.h
+++ b/include/jemalloc/internal/bin_info.h
@@ -44,17 +44,6 @@ struct bin_info_s {
 	bitmap_info_t bitmap_info;
 };
 
-/* The maximum size a size class can be and still get batching behavior. */
-extern size_t opt_bin_info_max_batched_size;
-/* The number of batches per batched size class. */
-extern size_t opt_bin_info_remote_free_max_batch;
-// The max number of pending elems (across all batches)
-extern size_t opt_bin_info_remote_free_max;
-
-extern szind_t  bin_info_nbatched_sizes;
-extern unsigned bin_info_nbatched_bins;
-extern unsigned bin_info_nunbatched_bins;
-
 extern bin_info_t bin_infos[SC_NBINS];
 
 void bin_info_boot(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]);
diff --git a/include/jemalloc/internal/bin_stats.h b/include/jemalloc/internal/bin_stats.h
index e1095f38..9900e0d1 100644
--- a/include/jemalloc/internal/bin_stats.h
+++ b/include/jemalloc/internal/bin_stats.h
@@ -48,11 +48,6 @@ struct bin_stats_s {
 
 	/* Current size of nonfull slabs heap in this bin. */
 	size_t nonfull_slabs;
-
-	uint64_t batch_pops;
-	uint64_t batch_failed_pushes;
-	uint64_t batch_pushes;
-	uint64_t batch_pushed_elems;
 };
 
 typedef struct bin_stats_data_s bin_stats_data_t;
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index 73770713..7ca3c347 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -64,10 +64,9 @@ enum witness_rank_e {
 	WITNESS_RANK_BASE,
 	WITNESS_RANK_ARENA_LARGE,
 	WITNESS_RANK_HOOK,
-	WITNESS_RANK_BIN,
 
 	WITNESS_RANK_LEAF = 0x1000,
-	WITNESS_RANK_BATCHER = WITNESS_RANK_LEAF,
+	WITNESS_RANK_BIN = WITNESS_RANK_LEAF,
 	WITNESS_RANK_ARENA_STATS = WITNESS_RANK_LEAF,
 	WITNESS_RANK_COUNTER_ACCUM = WITNESS_RANK_LEAF,
 	WITNESS_RANK_DSS = WITNESS_RANK_LEAF,
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index c43b30b1..9743e10b 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -38,7 +38,6 @@
     <ClCompile Include="..\..\..\..\src\arena.c" />
     <ClCompile Include="..\..\..\..\src\background_thread.c" />
     <ClCompile Include="..\..\..\..\src\base.c" />
-    <ClCompile Include="..\..\..\..\src\batcher.c" />
     <ClCompile Include="..\..\..\..\src\bin.c" />
     <ClCompile Include="..\..\..\..\src\bin_info.c" />
     <ClCompile Include="..\..\..\..\src\bitmap.c" />
@@ -380,4 +379,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
+</Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index f091475e..c8236a12 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -16,9 +16,6 @@
     <ClCompile Include="..\..\..\..\src\base.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\batcher.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\bin.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -203,4 +200,4 @@
       <Filter>Source Files</Filter>
     </ClCompile>
   </ItemGroup>
-</Project>
+</Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index a195f6b3..c1ff11a9 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -38,7 +38,6 @@
     <ClCompile Include="..\..\..\..\src\arena.c" />
     <ClCompile Include="..\..\..\..\src\background_thread.c" />
     <ClCompile Include="..\..\..\..\src\base.c" />
-    <ClCompile Include="..\..\..\..\src\batcher.c" />
     <ClCompile Include="..\..\..\..\src\bin.c" />
     <ClCompile Include="..\..\..\..\src\bin_info.c" />
     <ClCompile Include="..\..\..\..\src\bitmap.c" />
@@ -379,4 +378,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
+</Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index f091475e..c8236a12 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -16,9 +16,6 @@
     <ClCompile Include="..\..\..\..\src\base.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\batcher.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\bin.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -203,4 +200,4 @@
       <Filter>Source Files</Filter>
     </ClCompile>
   </ItemGroup>
-</Project>
+</Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
index cd16005d..6cb1b35e 100644
--- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
@@ -38,7 +38,6 @@
     <ClCompile Include="..\..\..\..\src\arena.c" />
     <ClCompile Include="..\..\..\..\src\background_thread.c" />
     <ClCompile Include="..\..\..\..\src\base.c" />
-    <ClCompile Include="..\..\..\..\src\batcher.c" />
     <ClCompile Include="..\..\..\..\src\bin.c" />
     <ClCompile Include="..\..\..\..\src\bin_info.c" />
     <ClCompile Include="..\..\..\..\src\bitmap.c" />
@@ -379,4 +378,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
+</Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
index f091475e..c8236a12 100644
--- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
@@ -16,9 +16,6 @@
     <ClCompile Include="..\..\..\..\src\base.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\batcher.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\bin.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -203,4 +200,4 @@
       <Filter>Source Files</Filter>
     </ClCompile>
   </ItemGroup>
-</Project>
+</Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
index 2d8c4be6..5c7b00a2 100644
--- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
@@ -38,7 +38,6 @@
     <ClCompile Include="..\..\..\..\src\arena.c" />
     <ClCompile Include="..\..\..\..\src\background_thread.c" />
     <ClCompile Include="..\..\..\..\src\base.c" />
-    <ClCompile Include="..\..\..\..\src\batcher.c" />
     <ClCompile Include="..\..\..\..\src\bin.c" />
     <ClCompile Include="..\..\..\..\src\bin_info.c" />
     <ClCompile Include="..\..\..\..\src\bitmap.c" />
@@ -379,4 +378,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
+</Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
index f091475e..c8236a12 100644
--- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
@@ -16,9 +16,6 @@
     <ClCompile Include="..\..\..\..\src\base.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\batcher.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\bin.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -203,4 +200,4 @@
       <Filter>Source Files</Filter>
     </ClCompile>
   </ItemGroup>
-</Project>
+</Project>
\ No newline at end of file
diff --git a/src/arena.c b/src/arena.c
index 2f58b038..962a325d 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -39,7 +39,8 @@ div_info_t arena_binind_div_info[SC_NBINS];
 size_t opt_oversize_threshold = OVERSIZE_THRESHOLD_DEFAULT;
 size_t oversize_threshold = OVERSIZE_THRESHOLD_DEFAULT;
 
-uint32_t arena_bin_offsets[SC_NBINS];
+uint32_t        arena_bin_offsets[SC_NBINS];
+static unsigned nbins_total;
 
 /*
  * a0 is used to handle huge requests before malloc init completes. After
@@ -674,17 +675,11 @@ arena_bin_slabs_full_remove(arena_t *arena, bin_t *bin, edata_t *slab) {
 }
 
 static void
-arena_bin_reset(tsd_t *tsd, arena_t *arena, bin_t *bin, unsigned binind) {
+arena_bin_reset(tsd_t *tsd, arena_t *arena, bin_t *bin) {
 	edata_t *slab;
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
 
-	if (arena_bin_has_batch(binind)) {
-		bin_with_batch_t *batched_bin = (bin_with_batch_t *)bin;
-		batcher_init(
-		    &batched_bin->remote_frees, BIN_REMOTE_FREE_ELEMS_MAX);
-	}
-
 	if (bin->slabcur != NULL) {
 		slab = bin->slabcur;
 		bin->slabcur = NULL;
@@ -835,8 +830,7 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 	/* Bins. */
 	for (unsigned i = 0; i < SC_NBINS; i++) {
 		for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
-			arena_bin_reset(
-			    tsd, arena, arena_get_bin(arena, i, j), i);
+			arena_bin_reset(tsd, arena, arena_get_bin(arena, i, j));
 		}
 	}
 	pa_shard_reset(tsd_tsdn(tsd), &arena->pa_shard);
@@ -1103,19 +1097,8 @@ arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena, cache_bin_t *cache_bin,
 	unsigned       binshard;
 	bin_t         *bin = arena_bin_choose(tsdn, arena, binind, &binshard);
 
-	/*
-	 * This has some fields that are conditionally initialized down batch
-	 * flush pathways.  This can trigger static analysis warnings deeper
-	 * down in the static.  The accesses are guarded by the same checks as
-	 * the initialization, but the analysis isn't able to track that across
-	 * multiple stack frames.
-	 */
-	arena_bin_flush_batch_state_t batch_flush_state
-	    JEMALLOC_CLANG_ANALYZER_SILENCE_INIT({0});
 label_refill:
 	malloc_mutex_lock(tsdn, &bin->lock);
-	arena_bin_flush_batch_after_lock(
-	    tsdn, arena, bin, binind, &batch_flush_state);
 
 	while (filled < nfill_min) {
 		/* Try batch-fill from slabcur first. */
@@ -1176,11 +1159,7 @@ label_refill:
 		cache_bin->tstats.nrequests = 0;
 	}
 
-	arena_bin_flush_batch_before_unlock(
-	    tsdn, arena, bin, binind, &batch_flush_state);
 	malloc_mutex_unlock(tsdn, &bin->lock);
-	arena_bin_flush_batch_after_unlock(
-	    tsdn, arena, bin, binind, &batch_flush_state);
 
 	if (alloc_and_retry) {
 		assert(fresh_slab == NULL);
@@ -1474,16 +1453,12 @@ arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, edata_t *edata, void *ptr) {
 	malloc_mutex_lock(tsdn, &bin->lock);
 	arena_dalloc_bin_locked_info_t info;
 	arena_dalloc_bin_locked_begin(&info, binind);
-	edata_t *dalloc_slabs[1];
-	unsigned dalloc_slabs_count = 0;
-	arena_dalloc_bin_locked_step(tsdn, arena, bin, &info, binind, edata,
-	    ptr, dalloc_slabs, /* ndalloc_slabs */ 1, &dalloc_slabs_count,
-	    /* dalloc_slabs_extra */ NULL);
+	bool ret = arena_dalloc_bin_locked_step(
+	    tsdn, arena, bin, &info, binind, edata, ptr);
 	arena_dalloc_bin_locked_finish(tsdn, arena, bin, &info);
 	malloc_mutex_unlock(tsdn, &bin->lock);
 
-	if (dalloc_slabs_count != 0) {
-		assert(dalloc_slabs[0] == edata);
+	if (ret) {
 		arena_slab_dalloc(tsdn, arena, edata);
 	}
 }
@@ -1722,6 +1697,7 @@ arena_t *
 arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) {
 	arena_t *arena;
 	base_t  *base;
+	unsigned i;
 
 	if (ind == 0) {
 		base = b0get();
@@ -1734,13 +1710,14 @@ arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) {
 	}
 
 	size_t arena_size = ALIGNMENT_CEILING(sizeof(arena_t), CACHELINE)
-	    + sizeof(bin_with_batch_t) * bin_info_nbatched_bins
-	    + sizeof(bin_t) * bin_info_nunbatched_bins;
+	    + sizeof(bin_t) * nbins_total;
 	arena = (arena_t *)base_alloc(tsdn, base, arena_size, CACHELINE);
 	if (arena == NULL) {
 		goto label_error;
 	}
-
+	JEMALLOC_SUPPRESS_WARN_ON_USAGE(
+	    assert((uintptr_t)&arena->all_bins[nbins_total - 1] + sizeof(bin_t)
+	        <= (uintptr_t)arena + arena_size);)
 	atomic_store_u(&arena->nthreads[0], 0, ATOMIC_RELAXED);
 	atomic_store_u(&arena->nthreads[1], 0, ATOMIC_RELAXED);
 	arena->last_thd = NULL;
@@ -1779,13 +1756,11 @@ arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) {
 
 	/* Initialize bins. */
 	atomic_store_u(&arena->binshard_next, 0, ATOMIC_RELEASE);
-	for (unsigned i = 0; i < SC_NBINS; i++) {
-		for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
-			bin_t *bin = arena_get_bin(arena, i, j);
-			bool   err = bin_init(bin, i);
-			if (err) {
-				goto label_error;
-			}
+	for (i = 0; i < nbins_total; i++) {
+		JEMALLOC_SUPPRESS_WARN_ON_USAGE(
+		    bool err = bin_init(&arena->all_bins[i]);)
+		if (err) {
+			goto label_error;
 		}
 	}
 
@@ -1943,10 +1918,8 @@ arena_boot(sc_data_t *sc_data, base_t *base, bool hpa) {
 	    uint32_t cur_offset = (uint32_t)offsetof(arena_t, all_bins);)
 	for (szind_t i = 0; i < SC_NBINS; i++) {
 		arena_bin_offsets[i] = cur_offset;
-		uint32_t bin_sz = (i < bin_info_nbatched_sizes
-		        ? sizeof(bin_with_batch_t)
-		        : sizeof(bin_t));
-		cur_offset += (uint32_t)bin_infos[i].n_shards * bin_sz;
+		nbins_total += bin_infos[i].n_shards;
+		cur_offset += (uint32_t)(bin_infos[i].n_shards * sizeof(bin_t));
 	}
 	return pa_central_init(
 	    &arena_pa_central_global, base, hpa, &hpa_hooks_default);
@@ -1996,21 +1969,17 @@ arena_prefork7(tsdn_t *tsdn, arena_t *arena) {
 
 void
 arena_prefork8(tsdn_t *tsdn, arena_t *arena) {
-	for (szind_t i = 0; i < SC_NBINS; i++) {
-		for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
-			bin_t *bin = arena_get_bin(arena, i, j);
-			bin_prefork(tsdn, bin, arena_bin_has_batch(i));
-		}
+	for (unsigned i = 0; i < nbins_total; i++) {
+		JEMALLOC_SUPPRESS_WARN_ON_USAGE(
+		    bin_prefork(tsdn, &arena->all_bins[i]);)
 	}
 }
 
 void
 arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) {
-	for (szind_t i = 0; i < SC_NBINS; i++) {
-		for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
-			bin_t *bin = arena_get_bin(arena, i, j);
-			bin_postfork_parent(tsdn, bin, arena_bin_has_batch(i));
-		}
+	for (unsigned i = 0; i < nbins_total; i++) {
+		JEMALLOC_SUPPRESS_WARN_ON_USAGE(
+		    bin_postfork_parent(tsdn, &arena->all_bins[i]);)
 	}
 
 	malloc_mutex_postfork_parent(tsdn, &arena->large_mtx);
@@ -2047,11 +2016,9 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
 		}
 	}
 
-	for (szind_t i = 0; i < SC_NBINS; i++) {
-		for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
-			bin_t *bin = arena_get_bin(arena, i, j);
-			bin_postfork_child(tsdn, bin, arena_bin_has_batch(i));
-		}
+	for (unsigned i = 0; i < nbins_total; i++) {
+		JEMALLOC_SUPPRESS_WARN_ON_USAGE(
+		    bin_postfork_child(tsdn, &arena->all_bins[i]);)
 	}
 
 	malloc_mutex_postfork_child(tsdn, &arena->large_mtx);
diff --git a/src/batcher.c b/src/batcher.c
deleted file mode 100644
index af71dae5..00000000
--- a/src/batcher.c
+++ /dev/null
@@ -1,98 +0,0 @@
-#include "jemalloc/internal/jemalloc_preamble.h"
-
-#include "jemalloc/internal/batcher.h"
-
-#include "jemalloc/internal/assert.h"
-#include "jemalloc/internal/atomic.h"
-
-void
-batcher_init(batcher_t *batcher, size_t nelems_max) {
-	atomic_store_zu(&batcher->nelems, 0, ATOMIC_RELAXED);
-	batcher->nelems_max = nelems_max;
-	batcher->npushes = 0;
-	malloc_mutex_init(&batcher->mtx, "batcher", WITNESS_RANK_BATCHER,
-	    malloc_mutex_rank_exclusive);
-}
-
-/*
- * Returns an index (into some user-owned array) to use for pushing, or
- * BATCHER_NO_IDX if no index is free.
- */
-size_t
-batcher_push_begin(tsdn_t *tsdn, batcher_t *batcher, size_t elems_to_push) {
-	assert(elems_to_push > 0);
-	size_t nelems_guess = atomic_load_zu(&batcher->nelems, ATOMIC_RELAXED);
-	if (nelems_guess + elems_to_push > batcher->nelems_max) {
-		return BATCHER_NO_IDX;
-	}
-	malloc_mutex_lock(tsdn, &batcher->mtx);
-	size_t nelems = atomic_load_zu(&batcher->nelems, ATOMIC_RELAXED);
-	if (nelems + elems_to_push > batcher->nelems_max) {
-		malloc_mutex_unlock(tsdn, &batcher->mtx);
-		return BATCHER_NO_IDX;
-	}
-	assert(elems_to_push <= batcher->nelems_max - nelems);
-	/*
-	 * We update nelems at push time (instead of during pop) so that other
-	 * racing accesses of the batcher can fail fast instead of trying to
-	 * acquire a mutex only to discover that there's no space for them.
-	 */
-	atomic_store_zu(
-	    &batcher->nelems, nelems + elems_to_push, ATOMIC_RELAXED);
-	batcher->npushes++;
-	return nelems;
-}
-
-size_t
-batcher_pop_get_pushes(tsdn_t *tsdn, batcher_t *batcher) {
-	malloc_mutex_assert_owner(tsdn, &batcher->mtx);
-	size_t npushes = batcher->npushes;
-	batcher->npushes = 0;
-	return npushes;
-}
-
-void
-batcher_push_end(tsdn_t *tsdn, batcher_t *batcher) {
-	malloc_mutex_assert_owner(tsdn, &batcher->mtx);
-	assert(atomic_load_zu(&batcher->nelems, ATOMIC_RELAXED) > 0);
-	malloc_mutex_unlock(tsdn, &batcher->mtx);
-}
-
-size_t
-batcher_pop_begin(tsdn_t *tsdn, batcher_t *batcher) {
-	size_t nelems_guess = atomic_load_zu(&batcher->nelems, ATOMIC_RELAXED);
-	assert(nelems_guess <= batcher->nelems_max);
-	if (nelems_guess == 0) {
-		return BATCHER_NO_IDX;
-	}
-	malloc_mutex_lock(tsdn, &batcher->mtx);
-	size_t nelems = atomic_load_zu(&batcher->nelems, ATOMIC_RELAXED);
-	assert(nelems <= batcher->nelems_max);
-	if (nelems == 0) {
-		malloc_mutex_unlock(tsdn, &batcher->mtx);
-		return BATCHER_NO_IDX;
-	}
-	atomic_store_zu(&batcher->nelems, 0, ATOMIC_RELAXED);
-	return nelems;
-}
-
-void
-batcher_pop_end(tsdn_t *tsdn, batcher_t *batcher) {
-	assert(atomic_load_zu(&batcher->nelems, ATOMIC_RELAXED) == 0);
-	malloc_mutex_unlock(tsdn, &batcher->mtx);
-}
-
-void
-batcher_prefork(tsdn_t *tsdn, batcher_t *batcher) {
-	malloc_mutex_prefork(tsdn, &batcher->mtx);
-}
-
-void
-batcher_postfork_parent(tsdn_t *tsdn, batcher_t *batcher) {
-	malloc_mutex_postfork_parent(tsdn, &batcher->mtx);
-}
-
-void
-batcher_postfork_child(tsdn_t *tsdn, batcher_t *batcher) {
-	malloc_mutex_postfork_child(tsdn, &batcher->mtx);
-}
diff --git a/src/bin.c b/src/bin.c
index 98d1da02..a11b108e 100644
--- a/src/bin.c
+++ b/src/bin.c
@@ -6,14 +6,6 @@
 #include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/witness.h"
 
-#ifdef JEMALLOC_JET
-unsigned bin_batching_test_ndalloc_slabs_max = (unsigned)-1;
-void (*bin_batching_test_after_push_hook)(size_t push_idx);
-void (*bin_batching_test_mid_pop_hook)(size_t nelems_to_pop);
-void (*bin_batching_test_after_unlock_hook)(
-    unsigned slab_dalloc_count, bool list_empty);
-#endif
-
 bool
 bin_update_shard_size(unsigned bin_shard_sizes[SC_NBINS], size_t start_size,
     size_t end_size, size_t nshards) {
@@ -47,7 +39,7 @@ bin_shard_sizes_boot(unsigned bin_shard_sizes[SC_NBINS]) {
 }
 
 bool
-bin_init(bin_t *bin, unsigned binind) {
+bin_init(bin_t *bin) {
 	if (malloc_mutex_init(&bin->lock, "bin", WITNESS_RANK_BIN,
 	        malloc_mutex_rank_exclusive)) {
 		return true;
@@ -58,52 +50,20 @@ bin_init(bin_t *bin, unsigned binind) {
 	if (config_stats) {
 		memset(&bin->stats, 0, sizeof(bin_stats_t));
 	}
-	if (arena_bin_has_batch(binind)) {
-		bin_with_batch_t *batched_bin = (bin_with_batch_t *)bin;
-		batcher_init(
-		    &batched_bin->remote_frees, opt_bin_info_remote_free_max);
-	}
 	return false;
 }
 
 void
-bin_prefork(tsdn_t *tsdn, bin_t *bin, bool has_batch) {
+bin_prefork(tsdn_t *tsdn, bin_t *bin) {
 	malloc_mutex_prefork(tsdn, &bin->lock);
-	if (has_batch) {
-		/*
-		 * The batch mutex has lower rank than the bin mutex (as it must
-		 * -- it's acquired later).  But during forking, we go
-		 *  bin-at-a-time, so that we acquire mutex on bin 0, then on
-		 *  the bin 0 batcher, then on bin 1.  This is a safe ordering
-		 *  (it's ordered by the index of arenas and bins within those
-		 *  arenas), but will trigger witness errors that would
-		 *  otherwise force another level of arena forking that breaks
-		 *  bin encapsulation (because the witness API doesn't "know"
-		 *  about arena or bin ordering -- it just sees that the batcher
-		 *  has a lower rank than the bin).  So instead we exclude the
-		 *  batcher mutex from witness checking during fork (which is
-		 *  the only time we touch multiple bins at once) by passing
-		 *  TSDN_NULL.
-		 */
-		bin_with_batch_t *batched = (bin_with_batch_t *)bin;
-		batcher_prefork(TSDN_NULL, &batched->remote_frees);
-	}
 }
 
 void
-bin_postfork_parent(tsdn_t *tsdn, bin_t *bin, bool has_batch) {
+bin_postfork_parent(tsdn_t *tsdn, bin_t *bin) {
 	malloc_mutex_postfork_parent(tsdn, &bin->lock);
-	if (has_batch) {
-		bin_with_batch_t *batched = (bin_with_batch_t *)bin;
-		batcher_postfork_parent(TSDN_NULL, &batched->remote_frees);
-	}
 }
 
 void
-bin_postfork_child(tsdn_t *tsdn, bin_t *bin, bool has_batch) {
+bin_postfork_child(tsdn_t *tsdn, bin_t *bin) {
 	malloc_mutex_postfork_child(tsdn, &bin->lock);
-	if (has_batch) {
-		bin_with_batch_t *batched = (bin_with_batch_t *)bin;
-		batcher_postfork_child(TSDN_NULL, &batched->remote_frees);
-	}
 }
diff --git a/src/bin_info.c b/src/bin_info.c
index de93418a..e10042fd 100644
--- a/src/bin_info.c
+++ b/src/bin_info.c
@@ -3,26 +3,8 @@
 
 #include "jemalloc/internal/bin_info.h"
 
-/*
- * We leave bin-batching disabled by default, with other settings chosen mostly
- * empirically; across the test programs I looked at they provided the most bang
- * for the buck.  With other default settings, these choices for bin batching
- * result in them consuming far less memory (even in the worst case) than the
- * tcaches themselves, the arena, etc.
- * Note that we always try to pop all bins on every arena cache bin lock
- * operation, so the typical memory waste is far less than this (and only on
- * hot bins, which tend to be large anyways).
- */
-size_t opt_bin_info_max_batched_size = 0; /* 192 is a good default. */
-size_t opt_bin_info_remote_free_max_batch = 4;
-size_t opt_bin_info_remote_free_max = BIN_REMOTE_FREE_ELEMS_MAX;
-
 bin_info_t bin_infos[SC_NBINS];
 
-szind_t  bin_info_nbatched_sizes;
-unsigned bin_info_nbatched_bins;
-unsigned bin_info_nunbatched_bins;
-
 static void
 bin_infos_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
     bin_info_t infos[SC_NBINS]) {
@@ -38,12 +20,6 @@ bin_infos_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 		bitmap_info_t bitmap_info = BITMAP_INFO_INITIALIZER(
 		    bin_info->nregs);
 		bin_info->bitmap_info = bitmap_info;
-		if (bin_info->reg_size <= opt_bin_info_max_batched_size) {
-			bin_info_nbatched_sizes++;
-			bin_info_nbatched_bins += bin_info->n_shards;
-		} else {
-			bin_info_nunbatched_bins += bin_info->n_shards;
-		}
 	}
 }
 
diff --git a/src/ctl.c b/src/ctl.c
index 9e9a4b43..a4c60ce0 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -134,9 +134,6 @@ CTL_PROTO(opt_utrace)
 CTL_PROTO(opt_xmalloc)
 CTL_PROTO(opt_experimental_infallible_new)
 CTL_PROTO(opt_experimental_tcache_gc)
-CTL_PROTO(opt_max_batched_size)
-CTL_PROTO(opt_remote_free_max)
-CTL_PROTO(opt_remote_free_max_batch)
 CTL_PROTO(opt_tcache)
 CTL_PROTO(opt_tcache_max)
 CTL_PROTO(opt_tcache_nslots_small_min)
@@ -248,10 +245,6 @@ CTL_PROTO(stats_arenas_i_bins_j_nslabs)
 CTL_PROTO(stats_arenas_i_bins_j_nreslabs)
 CTL_PROTO(stats_arenas_i_bins_j_curslabs)
 CTL_PROTO(stats_arenas_i_bins_j_nonfull_slabs)
-CTL_PROTO(stats_arenas_i_bins_j_batch_pops)
-CTL_PROTO(stats_arenas_i_bins_j_batch_failed_pushes)
-CTL_PROTO(stats_arenas_i_bins_j_batch_pushes)
-CTL_PROTO(stats_arenas_i_bins_j_batch_pushed_elems)
 INDEX_PROTO(stats_arenas_i_bins_j)
 CTL_PROTO(stats_arenas_i_lextents_j_nmalloc)
 CTL_PROTO(stats_arenas_i_lextents_j_ndalloc)
@@ -501,9 +494,6 @@ static const ctl_named_node_t opt_node[] = {{NAME("abort"), CTL(opt_abort)},
     {NAME("utrace"), CTL(opt_utrace)}, {NAME("xmalloc"), CTL(opt_xmalloc)},
     {NAME("experimental_infallible_new"), CTL(opt_experimental_infallible_new)},
     {NAME("experimental_tcache_gc"), CTL(opt_experimental_tcache_gc)},
-    {NAME("max_batched_size"), CTL(opt_max_batched_size)},
-    {NAME("remote_free_max"), CTL(opt_remote_free_max)},
-    {NAME("remote_free_max_batch"), CTL(opt_remote_free_max_batch)},
     {NAME("tcache"), CTL(opt_tcache)},
     {NAME("tcache_max"), CTL(opt_tcache_max)},
     {NAME("tcache_nslots_small_min"), CTL(opt_tcache_nslots_small_min)},
@@ -673,11 +663,6 @@ static const ctl_named_node_t stats_arenas_i_bins_j_node[] = {
     {NAME("nreslabs"), CTL(stats_arenas_i_bins_j_nreslabs)},
     {NAME("curslabs"), CTL(stats_arenas_i_bins_j_curslabs)},
     {NAME("nonfull_slabs"), CTL(stats_arenas_i_bins_j_nonfull_slabs)},
-    {NAME("batch_pops"), CTL(stats_arenas_i_bins_j_batch_pops)},
-    {NAME("batch_failed_pushes"),
-        CTL(stats_arenas_i_bins_j_batch_failed_pushes)},
-    {NAME("batch_pushes"), CTL(stats_arenas_i_bins_j_batch_pushes)},
-    {NAME("batch_pushed_elems"), CTL(stats_arenas_i_bins_j_batch_pushed_elems)},
     {NAME("mutex"), CHILD(named, stats_arenas_i_bins_j_mutex)}};
 
 static const ctl_named_node_t super_stats_arenas_i_bins_j_node[] = {
@@ -1219,14 +1204,6 @@ ctl_arena_stats_sdmerge(
 				assert(bstats->curslabs == 0);
 				assert(bstats->nonfull_slabs == 0);
 			}
-
-			merged->batch_pops += bstats->batch_pops;
-			merged->batch_failed_pushes +=
-			    bstats->batch_failed_pushes;
-			merged->batch_pushes += bstats->batch_pushes;
-			merged->batch_pushed_elems +=
-			    bstats->batch_pushed_elems;
-
 			malloc_mutex_prof_merge(&sdstats->bstats[i].mutex_data,
 			    &astats->bstats[i].mutex_data);
 		}
@@ -2202,10 +2179,6 @@ CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool)
 CTL_RO_NL_CGEN(config_enable_cxx, opt_experimental_infallible_new,
     opt_experimental_infallible_new, bool)
 CTL_RO_NL_GEN(opt_experimental_tcache_gc, opt_experimental_tcache_gc, bool)
-CTL_RO_NL_GEN(opt_max_batched_size, opt_bin_info_max_batched_size, size_t)
-CTL_RO_NL_GEN(opt_remote_free_max, opt_bin_info_remote_free_max, size_t)
-CTL_RO_NL_GEN(
-    opt_remote_free_max_batch, opt_bin_info_remote_free_max_batch, size_t)
 CTL_RO_NL_GEN(opt_tcache, opt_tcache, bool)
 CTL_RO_NL_GEN(opt_tcache_max, opt_tcache_max, size_t)
 CTL_RO_NL_GEN(
@@ -3982,16 +3955,6 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curslabs,
     arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.curslabs, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nonfull_slabs,
     arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.nonfull_slabs, size_t)
-CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_batch_pops,
-    arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.batch_pops, uint64_t)
-CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_batch_failed_pushes,
-    arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.batch_failed_pushes,
-    uint64_t)
-CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_batch_pushes,
-    arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.batch_pushes, uint64_t)
-CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_batch_pushed_elems,
-    arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.batch_pushed_elems,
-    uint64_t)
 
 static const ctl_named_node_t *
 stats_arenas_i_bins_j_index(
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 4adcbf3c..9f59a781 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1391,20 +1391,6 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				} while (vlen_left > 0);
 				CONF_CONTINUE;
 			}
-			CONF_HANDLE_SIZE_T(opt_bin_info_max_batched_size,
-			    "max_batched_size", 0, SIZE_T_MAX,
-			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX,
-			    /* clip */ true)
-			CONF_HANDLE_SIZE_T(opt_bin_info_remote_free_max_batch,
-			    "remote_free_max_batch", 0,
-			    BIN_REMOTE_FREE_ELEMS_MAX, CONF_DONT_CHECK_MIN,
-			    CONF_CHECK_MAX,
-			    /* clip */ true)
-			CONF_HANDLE_SIZE_T(opt_bin_info_remote_free_max,
-			    "remote_free_max", 0, BIN_REMOTE_FREE_ELEMS_MAX,
-			    CONF_DONT_CHECK_MIN, CONF_CHECK_MAX,
-			    /* clip */ true)
-
 			if (CONF_MATCH("tcache_ncached_max")) {
 				bool err = tcache_bin_info_default_init(
 				    v, vlen);
diff --git a/src/stats.c b/src/stats.c
index 84af3911..a8a574ac 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -357,15 +357,6 @@ stats_arena_bins_print(
 	COL_HDR(row, nreslabs, NULL, right, 13, uint64)
 	COL_HDR(row, nreslabs_ps, "(#/sec)", right, 8, uint64)
 
-	COL_HDR(row, pops, NULL, right, 10, uint64)
-	COL_HDR(row, pops_ps, "(#/sec)", right, 8, uint64)
-	COL_HDR(row, failed_push, NULL, right, 13, uint64)
-	COL_HDR(row, failed_push_ps, "(#/sec)", right, 8, uint64)
-	COL_HDR(row, push, NULL, right, 7, uint64)
-	COL_HDR(row, push_ps, "(#/sec)", right, 8, uint64)
-	COL_HDR(row, push_elem, NULL, right, 12, uint64)
-	COL_HDR(row, push_elem_ps, "(#/sec)", right, 8, uint64)
-
 	/* Don't want to actually print the name. */
 	header_justify_spacer.str_val = " ";
 	col_justify_spacer.str_val = " ";
@@ -406,15 +397,13 @@ stats_arena_bins_print(
 	}
 
 	for (j = 0, in_gap = false; j < nbins; j++) {
-		uint64_t nslabs;
-		size_t   reg_size, slab_size, curregs;
-		size_t   curslabs;
-		size_t   nonfull_slabs;
-		uint32_t nregs, nshards;
-		uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes;
-		uint64_t nreslabs;
-		uint64_t batch_pops, batch_failed_pushes, batch_pushes,
-		    batch_pushed_elems;
+		uint64_t     nslabs;
+		size_t       reg_size, slab_size, curregs;
+		size_t       curslabs;
+		size_t       nonfull_slabs;
+		uint32_t     nregs, nshards;
+		uint64_t     nmalloc, ndalloc, nrequests, nfills, nflushes;
+		uint64_t     nreslabs;
 		prof_stats_t prof_live;
 		prof_stats_t prof_accum;
 
@@ -463,15 +452,6 @@ stats_arena_bins_print(
 		CTL_LEAF(stats_arenas_mib, 5, "nonfull_slabs", &nonfull_slabs,
 		    size_t);
 
-		CTL_LEAF(
-		    stats_arenas_mib, 5, "batch_pops", &batch_pops, uint64_t);
-		CTL_LEAF(stats_arenas_mib, 5, "batch_failed_pushes",
-		    &batch_failed_pushes, uint64_t);
-		CTL_LEAF(stats_arenas_mib, 5, "batch_pushes", &batch_pushes,
-		    uint64_t);
-		CTL_LEAF(stats_arenas_mib, 5, "batch_pushed_elems",
-		    &batch_pushed_elems, uint64_t);
-
 		if (mutex) {
 			mutex_stats_read_arena_bin(stats_arenas_mib, 5,
 			    col_mutex64, col_mutex32, uptime);
@@ -506,14 +486,6 @@ stats_arena_bins_print(
 		    emitter, "curslabs", emitter_type_size, &curslabs);
 		emitter_json_kv(emitter, "nonfull_slabs", emitter_type_size,
 		    &nonfull_slabs);
-		emitter_json_kv(
-		    emitter, "batch_pops", emitter_type_uint64, &batch_pops);
-		emitter_json_kv(emitter, "batch_failed_pushes",
-		    emitter_type_uint64, &batch_failed_pushes);
-		emitter_json_kv(emitter, "batch_pushes", emitter_type_uint64,
-		    &batch_pushes);
-		emitter_json_kv(emitter, "batch_pushed_elems",
-		    emitter_type_uint64, &batch_pushed_elems);
 		if (mutex) {
 			emitter_json_object_kv_begin(emitter, "mutex");
 			mutex_stats_emit(
@@ -573,19 +545,6 @@ stats_arena_bins_print(
 		col_nreslabs.uint64_val = nreslabs;
 		col_nreslabs_ps.uint64_val = rate_per_second(nreslabs, uptime);
 
-		col_pops.uint64_val = batch_pops;
-		col_pops_ps.uint64_val = rate_per_second(batch_pops, uptime);
-
-		col_failed_push.uint64_val = batch_failed_pushes;
-		col_failed_push_ps.uint64_val = rate_per_second(
-		    batch_failed_pushes, uptime);
-		col_push.uint64_val = batch_pushes;
-		col_push_ps.uint64_val = rate_per_second(batch_pushes, uptime);
-
-		col_push_elem.uint64_val = batch_pushed_elems;
-		col_push_elem_ps.uint64_val = rate_per_second(
-		    batch_pushed_elems, uptime);
-
 		/*
 		 * Note that mutex columns were initialized above, if mutex ==
 		 * true.
@@ -1677,9 +1636,6 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_BOOL("xmalloc")
 	OPT_WRITE_BOOL("experimental_infallible_new")
 	OPT_WRITE_BOOL("experimental_tcache_gc")
-	OPT_WRITE_SIZE_T("max_batched_size")
-	OPT_WRITE_SIZE_T("remote_free_max")
-	OPT_WRITE_SIZE_T("remote_free_max_batch")
 	OPT_WRITE_BOOL("tcache")
 	OPT_WRITE_SIZE_T("tcache_max")
 	OPT_WRITE_UNSIGNED("tcache_nslots_small_min")
diff --git a/src/tcache.c b/src/tcache.c
index 44a96841..2d73237b 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -608,7 +608,7 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 	}
 	arena_cache_bin_fill_small(tsdn, arena, cache_bin, binind,
 	    /* nfill_min */
-	        opt_experimental_tcache_gc ? ((nfill >> 1) + 1) : nfill,
+	    opt_experimental_tcache_gc ? ((nfill >> 1) + 1) : nfill,
 	    /* nfill_max */ nfill);
 	tcache_slow->bin_refilled[binind] = true;
 	tcache_nfill_small_burst_prepare(tcache_slow, binind);
@@ -680,8 +680,6 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache,
 	assert(binind < SC_NBINS);
 	arena_t *tcache_arena = tcache_slow->arena;
 	assert(tcache_arena != NULL);
-	unsigned tcache_binshard =
-	    tsd_binshardsp_get(tsdn_tsd(tsdn))->binshard[binind];
 
 	/*
 	 * Variable length array must have > 0 length; the last element is never
@@ -698,25 +696,12 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache,
 	unsigned dalloc_count = 0;
 	VARIABLE_ARRAY(edata_t *, dalloc_slabs, nflush + 1);
 
-	/*
-	 * There's an edge case where we need to deallocate more slabs than we
-	 * have elements of dalloc_slabs.  This can if we end up deallocating
-	 * items batched by another thread in addition to ones flushed from the
-	 * cache.  Since this is not very likely (most small object
-	 * deallocations don't free up a whole slab), we don't want to burn the
-	 * stack space to keep those excess slabs in an array.  Instead we'll
-	 * maintain an overflow list.
-	 */
-	edata_list_active_t dalloc_slabs_extra;
-	edata_list_active_init(&dalloc_slabs_extra);
-
 	/*
 	 * We're about to grab a bunch of locks.  If one of them happens to be
 	 * the one guarding the arena-level stats counters we flush our
 	 * thread-local ones to, we do so under one critical section.
 	 */
 	bool merged_stats = false;
-
 	/*
 	 * We maintain the invariant that all edatas yet to be flushed are
 	 * contained in the half-open range [flush_start, flush_end).  We'll
@@ -741,7 +726,6 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache,
 		unsigned cur_binshard = edata_binshard_get(cur_edata);
 		bin_t *cur_bin = arena_get_bin(cur_arena, binind, cur_binshard);
 		assert(cur_binshard < bin_infos[binind].n_shards);
-
 		/*
 		 * Start off the partition; item_edata[i] always matches itself
 		 * of course.
@@ -788,150 +772,43 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache,
 			}
 		}
 
-		/*
-		 * We never batch when flushing to our home-base bin shard,
-		 * since it's likely that we'll have to acquire that lock anyway
-		 * when flushing stats.
-		 *
-		 * A plausible check we could add to can_batch is
-		 * '&& arena_is_auto(cur_arena)'.  The motivation would be that
-		 * we have a higher tolerance for dubious user assumptions
-		 * around non-auto arenas (e.g. "if I deallocate every object I
-		 * allocated, and then call tcache.flush, then the arena stats
-		 * must reflect zero live allocations").
-		 *
-		 * This is dubious for a couple reasons:
-		 * - We already don't provide perfect fidelity for stats
-		 *   counting (e.g. for profiled allocations, whose size can
-		 *   inflate in stats).
-		 * - Hanging load-bearing guarantees around stats impedes
-		 *   scalability in general.
-		 *
-		 * There are some "complete" strategies we could do instead:
-		 * - Add a arena.<i>.quiesce call to pop all bins for users who
-		 *   do want those stats accounted for.
-		 * - Make batchability a user-controllable per-arena option.
-		 * - Do a batch pop after every mutex acquisition for which we
-		 *   want to provide accurate stats.  This gives perfectly
-		 *   accurate stats, but can cause weird performance effects
-		 *   (because doing stats collection can now result in slabs
-		 *   becoming empty, and therefore purging, large mutex
-		 *   acquisition, etc.).
-		 * - Propagate the "why" behind a flush down to the level of the
-		 *   batcher, and include a batch pop attempt down full tcache
-		 *   flushing pathways.  This is just a lot of plumbing and
-		 *   internal complexity.
-		 *
-		 * We don't do any of these right now, but the decision calculus
-		 * and tradeoffs are subtle enough that the reasoning was worth
-		 * leaving in this comment.
-		 */
-		bool bin_is_batched = arena_bin_has_batch(binind);
-		bool home_binshard = (cur_arena == tcache_arena
-		    && cur_binshard == tcache_binshard);
-		bool can_batch = (flush_start - prev_flush_start
-		                     <= opt_bin_info_remote_free_max_batch)
-		    && !home_binshard && bin_is_batched;
+		/* Actually do the flushing. */
+		malloc_mutex_lock(tsdn, &cur_bin->lock);
 
 		/*
-		 * We try to avoid the batching pathway if we can, so we always
-		 * at least *try* to lock.
+		 * Flush stats first, if that was the right lock.  Note that we
+		 * don't actually have to flush stats into the current thread's
+		 * binshard. Flushing into any binshard in the same arena is
+		 * enough; we don't expose stats on per-binshard basis (just
+		 * per-bin).
 		 */
-		bool locked = false;
-		bool batched = false;
-		bool batch_failed = false;
-		if (can_batch) {
-			locked = !malloc_mutex_trylock(tsdn, &cur_bin->lock);
+		if (config_stats && tcache_arena == cur_arena
+		    && !merged_stats) {
+			merged_stats = true;
+			cur_bin->stats.nflushes++;
+			cur_bin->stats.nrequests += cache_bin->tstats.nrequests;
+			cache_bin->tstats.nrequests = 0;
 		}
-		if (can_batch && !locked) {
-			bin_with_batch_t *batched_bin = (bin_with_batch_t *)
-			    cur_bin;
-			size_t push_idx = batcher_push_begin(tsdn,
-			    &batched_bin->remote_frees,
-			    flush_start - prev_flush_start);
-			bin_batching_test_after_push(push_idx);
 
-			if (push_idx != BATCHER_NO_IDX) {
-				batched = true;
-				unsigned nbatched = flush_start
-				    - prev_flush_start;
-				for (unsigned i = 0; i < nbatched; i++) {
-					unsigned src_ind = prev_flush_start + i;
-					batched_bin
-					    ->remote_free_data[push_idx + i]
-					    .ptr = ptrs->ptr[src_ind];
-					batched_bin
-					    ->remote_free_data[push_idx + i]
-					    .slab = item_edata[src_ind].edata;
-				}
-				batcher_push_end(
-				    tsdn, &batched_bin->remote_frees);
-			} else {
-				batch_failed = true;
+		/* Next flush objects. */
+		/* Init only to avoid used-uninitialized warning. */
+		arena_dalloc_bin_locked_info_t dalloc_bin_info = {0};
+		arena_dalloc_bin_locked_begin(&dalloc_bin_info, binind);
+		for (unsigned i = prev_flush_start; i < flush_start; i++) {
+			void    *ptr = ptrs->ptr[i];
+			edata_t *edata = item_edata[i].edata;
+			if (arena_dalloc_bin_locked_step(tsdn, cur_arena,
+			        cur_bin, &dalloc_bin_info, binind, edata,
+			        ptr)) {
+				dalloc_slabs[dalloc_count] = edata;
+				dalloc_count++;
 			}
 		}
-		if (!batched) {
-			if (!locked) {
-				malloc_mutex_lock(tsdn, &cur_bin->lock);
-			}
-			/*
-			 * Unlike other stats (which only ever get flushed into
-			 * a tcache's associated arena), batch_failed counts get
-			 * accumulated into the bin where the push attempt
-			 * failed.
-			 */
-			if (config_stats && batch_failed) {
-				cur_bin->stats.batch_failed_pushes++;
-			}
 
-			/*
-			 * Flush stats first, if that was the right lock.  Note
-			 * that we don't actually have to flush stats into the
-			 * current thread's binshard. Flushing into any binshard
-			 * in the same arena is enough; we don't expose stats on
-			 * per-binshard basis (just per-bin).
-			 */
-			if (config_stats && tcache_arena == cur_arena
-			    && !merged_stats) {
-				merged_stats = true;
-				cur_bin->stats.nflushes++;
-				cur_bin->stats.nrequests +=
-				    cache_bin->tstats.nrequests;
-				cache_bin->tstats.nrequests = 0;
-			}
-			unsigned preallocated_slabs = nflush;
-			unsigned ndalloc_slabs =
-			    arena_bin_batch_get_ndalloc_slabs(
-			        preallocated_slabs);
+		arena_dalloc_bin_locked_finish(
+		    tsdn, cur_arena, cur_bin, &dalloc_bin_info);
+		malloc_mutex_unlock(tsdn, &cur_bin->lock);
 
-			/* Next flush objects our own objects. */
-			/* Init only to avoid used-uninitialized warning. */
-			arena_dalloc_bin_locked_info_t dalloc_bin_info = {0};
-			arena_dalloc_bin_locked_begin(&dalloc_bin_info, binind);
-			for (unsigned i = prev_flush_start; i < flush_start;
-			     i++) {
-				void    *ptr = ptrs->ptr[i];
-				edata_t *edata = item_edata[i].edata;
-				arena_dalloc_bin_locked_step(tsdn, cur_arena,
-				    cur_bin, &dalloc_bin_info, binind, edata,
-				    ptr, dalloc_slabs, ndalloc_slabs,
-				    &dalloc_count, &dalloc_slabs_extra);
-			}
-			/*
-			 * Lastly, flush any batched objects (from other
-			 * threads).
-			 */
-			if (bin_is_batched) {
-				arena_bin_flush_batch_impl(tsdn, cur_arena,
-				    cur_bin, &dalloc_bin_info, binind,
-				    dalloc_slabs, ndalloc_slabs, &dalloc_count,
-				    &dalloc_slabs_extra);
-			}
-
-			arena_dalloc_bin_locked_finish(
-			    tsdn, cur_arena, cur_bin, &dalloc_bin_info);
-			malloc_mutex_unlock(tsdn, &cur_bin->lock);
-		}
 		arena_decay_ticks(
 		    tsdn, cur_arena, flush_start - prev_flush_start);
 	}
@@ -941,18 +818,13 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache,
 		edata_t *slab = dalloc_slabs[i];
 		arena_slab_dalloc(tsdn, arena_get_from_edata(slab), slab);
 	}
-	while (!edata_list_active_empty(&dalloc_slabs_extra)) {
-		edata_t *slab = edata_list_active_first(&dalloc_slabs_extra);
-		edata_list_active_remove(&dalloc_slabs_extra, slab);
-		arena_slab_dalloc(tsdn, arena_get_from_edata(slab), slab);
-	}
 
 	if (config_stats && !merged_stats) {
 		/*
-			 * The flush loop didn't happen to flush to this
-			 * thread's arena, so the stats didn't get merged.
-			 * Manually do so now.
-			 */
+		 * The flush loop didn't happen to flush to this
+		 * thread's arena, so the stats didn't get merged.
+		 * Manually do so now.
+		 */
 		bin_t *bin = arena_bin_choose(tsdn, tcache_arena, binind, NULL);
 		malloc_mutex_lock(tsdn, &bin->lock);
 		bin->stats.nflushes++;
diff --git a/test/analyze/sizes.c b/test/analyze/sizes.c
index cc6c3806..b8d10629 100644
--- a/test/analyze/sizes.c
+++ b/test/analyze/sizes.c
@@ -34,8 +34,6 @@ main(void) {
 	P(arena_t);
 	P(arena_stats_t);
 	P(base_t);
-	P(bin_t);
-	P(bin_with_batch_t);
 	P(decay_t);
 	P(edata_t);
 	P(ecache_t);
diff --git a/test/include/test/fork.h b/test/include/test/fork.h
deleted file mode 100644
index 9e04d279..00000000
--- a/test/include/test/fork.h
+++ /dev/null
@@ -1,34 +0,0 @@
-#ifndef JEMALLOC_TEST_FORK_H
-#define JEMALLOC_TEST_FORK_H
-
-#ifndef _WIN32
-
-#	include <sys/wait.h>
-
-static inline void
-fork_wait_for_child_exit(int pid) {
-	int status;
-	while (true) {
-		if (waitpid(pid, &status, 0) == -1) {
-			test_fail("Unexpected waitpid() failure.");
-		}
-		if (WIFSIGNALED(status)) {
-			test_fail(
-			    "Unexpected child termination due to "
-			    "signal %d",
-			    WTERMSIG(status));
-			break;
-		}
-		if (WIFEXITED(status)) {
-			if (WEXITSTATUS(status) != 0) {
-				test_fail("Unexpected child exit value %d",
-				    WEXITSTATUS(status));
-			}
-			break;
-		}
-	}
-}
-
-#endif
-
-#endif /* JEMALLOC_TEST_FORK_H */
diff --git a/test/unit/batcher.c b/test/unit/batcher.c
deleted file mode 100644
index 1052ca27..00000000
--- a/test/unit/batcher.c
+++ /dev/null
@@ -1,243 +0,0 @@
-#include "test/jemalloc_test.h"
-
-#include "jemalloc/internal/batcher.h"
-
-TEST_BEGIN(test_simple) {
-	enum { NELEMS_MAX = 10, DATA_BASE_VAL = 100, NRUNS = 5 };
-	batcher_t batcher;
-	size_t    data[NELEMS_MAX];
-	for (size_t nelems = 0; nelems < NELEMS_MAX; nelems++) {
-		batcher_init(&batcher, nelems);
-		for (int run = 0; run < NRUNS; run++) {
-			for (int i = 0; i < NELEMS_MAX; i++) {
-				data[i] = (size_t)-1;
-			}
-			for (size_t i = 0; i < nelems; i++) {
-				size_t idx = batcher_push_begin(
-				    TSDN_NULL, &batcher, 1);
-				assert_zu_eq(i, idx, "Wrong index");
-				assert_zu_eq((size_t)-1, data[idx],
-				    "Expected uninitialized slot");
-				data[idx] = DATA_BASE_VAL + i;
-				batcher_push_end(TSDN_NULL, &batcher);
-			}
-			if (nelems > 0) {
-				size_t idx = batcher_push_begin(
-				    TSDN_NULL, &batcher, 1);
-				assert_zu_eq(BATCHER_NO_IDX, idx,
-				    "Shouldn't be able to push into a full "
-				    "batcher");
-			}
-
-			size_t npop = batcher_pop_begin(TSDN_NULL, &batcher);
-			if (nelems == 0) {
-				assert_zu_eq(npop, BATCHER_NO_IDX,
-				    "Shouldn't get any items out of an empty "
-				    "batcher");
-			} else {
-				assert_zu_eq(npop, nelems,
-				    "Wrong number of elements popped");
-			}
-			for (size_t i = 0; i < nelems; i++) {
-				assert_zu_eq(data[i], DATA_BASE_VAL + i,
-				    "Item popped out of order!");
-			}
-			if (nelems != 0) {
-				batcher_pop_end(TSDN_NULL, &batcher);
-			}
-		}
-	}
-}
-TEST_END
-
-TEST_BEGIN(test_multi_push) {
-	size_t    idx, nelems;
-	batcher_t batcher;
-	batcher_init(&batcher, 11);
-	/* Push two at a time, 5 times, for 10 total. */
-	for (int i = 0; i < 5; i++) {
-		idx = batcher_push_begin(TSDN_NULL, &batcher, 2);
-		assert_zu_eq(2 * i, idx, "Should push in order");
-		batcher_push_end(TSDN_NULL, &batcher);
-	}
-	/* Pushing two more should fail -- would put us at 12 elems. */
-	idx = batcher_push_begin(TSDN_NULL, &batcher, 2);
-	assert_zu_eq(BATCHER_NO_IDX, idx, "Should be out of space");
-	/* But one more should work */
-	idx = batcher_push_begin(TSDN_NULL, &batcher, 1);
-	assert_zu_eq(10, idx, "Should be out of space");
-	batcher_push_end(TSDN_NULL, &batcher);
-	nelems = batcher_pop_begin(TSDN_NULL, &batcher);
-	batcher_pop_end(TSDN_NULL, &batcher);
-	assert_zu_eq(11, nelems, "Should have popped everything");
-}
-TEST_END
-
-enum {
-	STRESS_TEST_ELEMS = 10,
-	STRESS_TEST_THREADS = 4,
-	STRESS_TEST_OPS = 1000 * 1000,
-	STRESS_TEST_PUSH_TO_POP_RATIO = 5,
-};
-
-typedef struct stress_test_data_s stress_test_data_t;
-struct stress_test_data_s {
-	batcher_t    batcher;
-	mtx_t        pop_mtx;
-	atomic_u32_t thread_id;
-
-	uint32_t    elems_data[STRESS_TEST_ELEMS];
-	size_t      push_count[STRESS_TEST_ELEMS];
-	size_t      pop_count[STRESS_TEST_ELEMS];
-	atomic_zu_t atomic_push_count[STRESS_TEST_ELEMS];
-	atomic_zu_t atomic_pop_count[STRESS_TEST_ELEMS];
-};
-
-/*
- * Note: 0-indexed. If one element is set and you want to find it, you call
- * get_nth_set(elems, 0).
- */
-static size_t
-get_nth_set(bool elems_owned[STRESS_TEST_ELEMS], size_t n) {
-	size_t ntrue = 0;
-	for (size_t i = 0; i < STRESS_TEST_ELEMS; i++) {
-		if (elems_owned[i]) {
-			ntrue++;
-		}
-		if (ntrue > n) {
-			return i;
-		}
-	}
-	assert_not_reached(
-	    "Asked for the %zu'th set element when < %zu are "
-	    "set",
-	    n, n);
-	/* Just to silence a compiler warning. */
-	return 0;
-}
-
-static void *
-stress_test_thd(void *arg) {
-	stress_test_data_t *data = arg;
-	size_t prng = atomic_fetch_add_u32(&data->thread_id, 1, ATOMIC_RELAXED);
-
-	size_t nelems_owned = 0;
-	bool   elems_owned[STRESS_TEST_ELEMS] = {0};
-	size_t local_push_count[STRESS_TEST_ELEMS] = {0};
-	size_t local_pop_count[STRESS_TEST_ELEMS] = {0};
-
-	for (int i = 0; i < STRESS_TEST_OPS; i++) {
-		size_t rnd = prng_range_zu(
-		    &prng, STRESS_TEST_PUSH_TO_POP_RATIO);
-		if (rnd == 0 || nelems_owned == 0) {
-			size_t nelems = batcher_pop_begin(
-			    TSDN_NULL, &data->batcher);
-			if (nelems == BATCHER_NO_IDX) {
-				continue;
-			}
-			for (size_t i = 0; i < nelems; i++) {
-				uint32_t elem = data->elems_data[i];
-				assert_false(elems_owned[elem],
-				    "Shouldn't already own what we just "
-				    "popped");
-				elems_owned[elem] = true;
-				nelems_owned++;
-				local_pop_count[elem]++;
-				data->pop_count[elem]++;
-			}
-			batcher_pop_end(TSDN_NULL, &data->batcher);
-		} else {
-			size_t elem_to_push_idx = prng_range_zu(
-			    &prng, nelems_owned);
-			size_t elem = get_nth_set(
-			    elems_owned, elem_to_push_idx);
-			assert_true(elems_owned[elem],
-			    "Should own element we're about to pop");
-			elems_owned[elem] = false;
-			local_push_count[elem]++;
-			data->push_count[elem]++;
-			nelems_owned--;
-			size_t idx = batcher_push_begin(
-			    TSDN_NULL, &data->batcher, 1);
-			assert_zu_ne(idx, BATCHER_NO_IDX,
-			    "Batcher can't be full -- we have one of its "
-			    "elems!");
-			data->elems_data[idx] = (uint32_t)elem;
-			batcher_push_end(TSDN_NULL, &data->batcher);
-		}
-	}
-
-	/* Push all local elems back, flush local counts to the shared ones. */
-	size_t push_idx = 0;
-	if (nelems_owned != 0) {
-		push_idx = batcher_push_begin(
-		    TSDN_NULL, &data->batcher, nelems_owned);
-		assert_zu_ne(
-		    BATCHER_NO_IDX, push_idx, "Should be space to push");
-	}
-	for (size_t i = 0; i < STRESS_TEST_ELEMS; i++) {
-		if (elems_owned[i]) {
-			data->elems_data[push_idx] = (uint32_t)i;
-			push_idx++;
-			local_push_count[i]++;
-			data->push_count[i]++;
-		}
-		atomic_fetch_add_zu(&data->atomic_push_count[i],
-		    local_push_count[i], ATOMIC_RELAXED);
-		atomic_fetch_add_zu(&data->atomic_pop_count[i],
-		    local_pop_count[i], ATOMIC_RELAXED);
-	}
-	if (nelems_owned != 0) {
-		batcher_push_end(TSDN_NULL, &data->batcher);
-	}
-
-	return NULL;
-}
-
-TEST_BEGIN(test_stress) {
-	stress_test_data_t data;
-	batcher_init(&data.batcher, STRESS_TEST_ELEMS);
-	bool err = mtx_init(&data.pop_mtx);
-	assert_false(err, "mtx_init failure");
-	atomic_store_u32(&data.thread_id, 0, ATOMIC_RELAXED);
-	for (int i = 0; i < STRESS_TEST_ELEMS; i++) {
-		data.push_count[i] = 0;
-		data.pop_count[i] = 0;
-		atomic_store_zu(&data.atomic_push_count[i], 0, ATOMIC_RELAXED);
-		atomic_store_zu(&data.atomic_pop_count[i], 0, ATOMIC_RELAXED);
-
-		size_t idx = batcher_push_begin(TSDN_NULL, &data.batcher, 1);
-		assert_zu_eq(i, idx, "Should push in order");
-		data.elems_data[idx] = i;
-		batcher_push_end(TSDN_NULL, &data.batcher);
-	}
-
-	thd_t threads[STRESS_TEST_THREADS];
-	for (int i = 0; i < STRESS_TEST_THREADS; i++) {
-		thd_create(&threads[i], stress_test_thd, &data);
-	}
-	for (int i = 0; i < STRESS_TEST_THREADS; i++) {
-		thd_join(threads[i], NULL);
-	}
-	for (int i = 0; i < STRESS_TEST_ELEMS; i++) {
-		assert_zu_ne(
-		    0, data.push_count[i], "Should have done something!");
-		assert_zu_eq(data.push_count[i], data.pop_count[i],
-		    "every element should be pushed and popped an equal number "
-		    "of times");
-		assert_zu_eq(data.push_count[i],
-		    atomic_load_zu(&data.atomic_push_count[i], ATOMIC_RELAXED),
-		    "atomic and non-atomic count should be equal given proper "
-		    "synchronization");
-		assert_zu_eq(data.pop_count[i],
-		    atomic_load_zu(&data.atomic_pop_count[i], ATOMIC_RELAXED),
-		    "atomic and non-atomic count should be equal given proper "
-		    "synchronization");
-	}
-}
-TEST_END
-
-int
-main(void) {
-	return test_no_reentrancy(test_simple, test_multi_push, test_stress);
-}
diff --git a/test/unit/bin_batching.c b/test/unit/bin_batching.c
deleted file mode 100644
index a422586d..00000000
--- a/test/unit/bin_batching.c
+++ /dev/null
@@ -1,270 +0,0 @@
-#include "test/jemalloc_test.h"
-#include "test/fork.h"
-
-enum {
-	STRESS_THREADS = 3,
-	STRESS_OBJECTS_PER_THREAD = 1000,
-	STRESS_ALLOC_SZ = PAGE / 2,
-};
-
-typedef struct stress_thread_data_s stress_thread_data_t;
-struct stress_thread_data_s {
-	unsigned     thd_id;
-	atomic_zu_t *ready_thds;
-	atomic_zu_t *done_thds;
-	void       **to_dalloc;
-};
-
-static atomic_zu_t push_failure_count;
-static atomic_zu_t pop_attempt_results[2];
-static atomic_zu_t dalloc_zero_slab_count;
-static atomic_zu_t dalloc_nonzero_slab_count;
-static atomic_zu_t dalloc_nonempty_list_count;
-
-static bool
-should_skip() {
-	return
-	    /*
-	     * We do batching operations on tcache flush pathways; we can't if
-	     * caching is disabled.
-	     */
-	    !opt_tcache ||
-	    /* We rely on tcache fill/flush operations of the size we use. */
-	    opt_tcache_max < STRESS_ALLOC_SZ
-	    /*
-	     * Some of the races we want to trigger are fiddly enough that they
-	     * only show up under real concurrency.  We add 1 to account for the
-	     * main thread, which also does some work.
-	     */
-	    || ncpus < STRESS_THREADS + 1;
-}
-
-static void
-increment_push_failure(size_t push_idx) {
-	if (push_idx == BATCHER_NO_IDX) {
-		atomic_fetch_add_zu(&push_failure_count, 1, ATOMIC_RELAXED);
-	} else {
-		assert_zu_lt(push_idx, 4, "Only 4 elems");
-		volatile size_t x = 10000;
-		while (--x) {
-			/* Spin for a while, to try to provoke a failure. */
-			if (x == push_idx) {
-#ifdef _WIN32
-				SwitchToThread();
-#else
-				sched_yield();
-#endif
-			}
-		}
-	}
-}
-
-static void
-increment_pop_attempt(size_t elems_to_pop) {
-	bool elems = (elems_to_pop != BATCHER_NO_IDX);
-	atomic_fetch_add_zu(&pop_attempt_results[elems], 1, ATOMIC_RELAXED);
-}
-
-static void
-increment_slab_dalloc_count(unsigned slab_dalloc_count, bool list_empty) {
-	if (slab_dalloc_count > 0) {
-		atomic_fetch_add_zu(
-		    &dalloc_nonzero_slab_count, 1, ATOMIC_RELAXED);
-	} else {
-		atomic_fetch_add_zu(&dalloc_zero_slab_count, 1, ATOMIC_RELAXED);
-	}
-	if (!list_empty) {
-		atomic_fetch_add_zu(
-		    &dalloc_nonempty_list_count, 1, ATOMIC_RELAXED);
-	}
-}
-
-static void
-flush_tcache() {
-	assert_d_eq(0, mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
-	    "Unexpected mallctl failure");
-}
-
-static void *
-stress_thread(void *arg) {
-	stress_thread_data_t *data = arg;
-	uint64_t              prng_state = data->thd_id;
-	atomic_fetch_add_zu(data->ready_thds, 1, ATOMIC_RELAXED);
-	while (atomic_load_zu(data->ready_thds, ATOMIC_RELAXED)
-	    != STRESS_THREADS) {
-		/* Spin */
-	}
-	for (int i = 0; i < STRESS_OBJECTS_PER_THREAD; i++) {
-		dallocx(data->to_dalloc[i], 0);
-		if (prng_range_u64(&prng_state, 3) == 0) {
-			flush_tcache();
-		}
-	}
-	flush_tcache();
-	atomic_fetch_add_zu(data->done_thds, 1, ATOMIC_RELAXED);
-	return NULL;
-}
-
-/*
- * Run main_thread_fn in conditions that trigger all the various edge cases and
- * subtle race conditions.
- */
-static void
-stress_run(void (*main_thread_fn)(), int nruns) {
-	bin_batching_test_ndalloc_slabs_max = 1;
-	bin_batching_test_after_push_hook = &increment_push_failure;
-	bin_batching_test_mid_pop_hook = &increment_pop_attempt;
-	bin_batching_test_after_unlock_hook = &increment_slab_dalloc_count;
-
-	atomic_store_zu(&push_failure_count, 0, ATOMIC_RELAXED);
-	atomic_store_zu(&pop_attempt_results[0], 0, ATOMIC_RELAXED);
-	atomic_store_zu(&pop_attempt_results[1], 0, ATOMIC_RELAXED);
-	atomic_store_zu(&dalloc_zero_slab_count, 0, ATOMIC_RELAXED);
-	atomic_store_zu(&dalloc_nonzero_slab_count, 0, ATOMIC_RELAXED);
-	atomic_store_zu(&dalloc_nonempty_list_count, 0, ATOMIC_RELAXED);
-
-	for (int run = 0; run < nruns; run++) {
-		thd_t                thds[STRESS_THREADS];
-		stress_thread_data_t thd_datas[STRESS_THREADS];
-		atomic_zu_t          ready_thds;
-		atomic_store_zu(&ready_thds, 0, ATOMIC_RELAXED);
-		atomic_zu_t done_thds;
-		atomic_store_zu(&done_thds, 0, ATOMIC_RELAXED);
-
-		void *ptrs[STRESS_THREADS][STRESS_OBJECTS_PER_THREAD];
-		for (int i = 0; i < STRESS_THREADS; i++) {
-			thd_datas[i].thd_id = i;
-			thd_datas[i].ready_thds = &ready_thds;
-			thd_datas[i].done_thds = &done_thds;
-			thd_datas[i].to_dalloc = ptrs[i];
-			for (int j = 0; j < STRESS_OBJECTS_PER_THREAD; j++) {
-				void *ptr = mallocx(STRESS_ALLOC_SZ, 0);
-				assert_ptr_not_null(ptr, "alloc failure");
-				ptrs[i][j] = ptr;
-			}
-		}
-		for (int i = 0; i < STRESS_THREADS; i++) {
-			thd_create(&thds[i], stress_thread, &thd_datas[i]);
-		}
-		while (atomic_load_zu(&done_thds, ATOMIC_RELAXED)
-		    != STRESS_THREADS) {
-			main_thread_fn();
-		}
-		for (int i = 0; i < STRESS_THREADS; i++) {
-			thd_join(thds[i], NULL);
-		}
-	}
-
-	bin_batching_test_ndalloc_slabs_max = (unsigned)-1;
-	bin_batching_test_after_push_hook = NULL;
-	bin_batching_test_mid_pop_hook = NULL;
-	bin_batching_test_after_unlock_hook = NULL;
-}
-
-static void
-do_allocs_frees() {
-	enum { NALLOCS = 32 };
-	flush_tcache();
-	void *ptrs[NALLOCS];
-	for (int i = 0; i < NALLOCS; i++) {
-		ptrs[i] = mallocx(STRESS_ALLOC_SZ, 0);
-	}
-	for (int i = 0; i < NALLOCS; i++) {
-		dallocx(ptrs[i], 0);
-	}
-	flush_tcache();
-}
-
-static void
-test_arena_reset_main_fn() {
-	do_allocs_frees();
-}
-
-TEST_BEGIN(test_arena_reset) {
-	int      err;
-	unsigned arena;
-	unsigned old_arena;
-
-	test_skip_if(should_skip());
-	test_skip_if(opt_percpu_arena != percpu_arena_disabled);
-
-	size_t arena_sz = sizeof(arena);
-	err = mallctl("arenas.create", (void *)&arena, &arena_sz, NULL, 0);
-	assert_d_eq(0, err, "Arena creation failed");
-
-	err = mallctl("thread.arena", &old_arena, &arena_sz, &arena, arena_sz);
-	assert_d_eq(0, err, "changing arena failed");
-
-	stress_run(&test_arena_reset_main_fn, /* nruns */ 10);
-
-	flush_tcache();
-
-	char buf[100];
-	malloc_snprintf(buf, sizeof(buf), "arena.%u.reset", arena);
-	err = mallctl(buf, NULL, NULL, NULL, 0);
-	assert_d_eq(0, err, "Couldn't change arena");
-
-	do_allocs_frees();
-
-	err = mallctl("thread.arena", NULL, NULL, &old_arena, arena_sz);
-	assert_d_eq(0, err, "changing arena failed");
-}
-TEST_END
-
-static void
-test_fork_main_fn() {
-#ifndef _WIN32
-	pid_t pid = fork();
-	if (pid == -1) {
-		test_fail("Fork failure!");
-	} else if (pid == 0) {
-		/* Child */
-		do_allocs_frees();
-		_exit(0);
-	} else {
-		fork_wait_for_child_exit(pid);
-		do_allocs_frees();
-	}
-#endif
-}
-
-TEST_BEGIN(test_fork) {
-#ifdef _WIN32
-	test_skip("No fork on windows");
-#endif
-	test_skip_if(should_skip());
-	stress_run(&test_fork_main_fn, /* nruns */ 10);
-}
-TEST_END
-
-static void
-test_races_main_fn() {
-	do_allocs_frees();
-}
-
-TEST_BEGIN(test_races) {
-	test_skip_if(should_skip());
-
-	stress_run(&test_races_main_fn, /* nruns */ 400);
-
-	assert_zu_lt(0, atomic_load_zu(&push_failure_count, ATOMIC_RELAXED),
-	    "Should have seen some push failures");
-	assert_zu_lt(0, atomic_load_zu(&pop_attempt_results[0], ATOMIC_RELAXED),
-	    "Should have seen some pop failures");
-	assert_zu_lt(0, atomic_load_zu(&pop_attempt_results[1], ATOMIC_RELAXED),
-	    "Should have seen some pop successes");
-	assert_zu_lt(0, atomic_load_zu(&dalloc_zero_slab_count, ATOMIC_RELAXED),
-	    "Expected some frees that didn't empty a slab");
-	assert_zu_lt(0,
-	    atomic_load_zu(&dalloc_nonzero_slab_count, ATOMIC_RELAXED),
-	    "expected some frees that emptied a slab");
-	assert_zu_lt(0,
-	    atomic_load_zu(&dalloc_nonempty_list_count, ATOMIC_RELAXED),
-	    "expected some frees that used the empty list");
-}
-TEST_END
-
-int
-main(void) {
-	return test_no_reentrancy(test_arena_reset, test_races, test_fork);
-}
diff --git a/test/unit/bin_batching.sh b/test/unit/bin_batching.sh
deleted file mode 100644
index fef9bdc6..00000000
--- a/test/unit/bin_batching.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/sh
-
-# This value of max_batched_size effectively requires all bins to be batched;
-# our page limits are fuzzy, but we bound slab item counts to 2**32, so we'd be
-# at multi-gigabyte minimum page sizes.
-# The reason for this sort of hacky approach is that we want to
-# allocate/deallocate PAGE/2-sized objects (to trigger the "non-empty" ->
-# "empty" and "non-empty"-> "full" transitions often, which have special
-# handling). But the value of PAGE isn't easily available in test scripts.
-export MALLOC_CONF="narenas:2,bin_shards:1-1000000000:3,max_batched_size:1000000000,remote_free_max_batch:1,remote_free_max:4"
diff --git a/test/unit/fork.c b/test/unit/fork.c
index e52d0a6c..60675b77 100644
--- a/test/unit/fork.c
+++ b/test/unit/fork.c
@@ -1,5 +1,34 @@
 #include "test/jemalloc_test.h"
-#include "test/fork.h"
+
+#ifndef _WIN32
+#	include <sys/wait.h>
+#endif
+
+#ifndef _WIN32
+static void
+wait_for_child_exit(int pid) {
+	int status;
+	while (true) {
+		if (waitpid(pid, &status, 0) == -1) {
+			test_fail("Unexpected waitpid() failure.");
+		}
+		if (WIFSIGNALED(status)) {
+			test_fail(
+			    "Unexpected child termination due to "
+			    "signal %d",
+			    WTERMSIG(status));
+			break;
+		}
+		if (WIFEXITED(status)) {
+			if (WEXITSTATUS(status) != 0) {
+				test_fail("Unexpected child exit value %d",
+				    WEXITSTATUS(status));
+			}
+			break;
+		}
+	}
+}
+#endif
 
 TEST_BEGIN(test_fork) {
 #ifndef _WIN32
@@ -37,7 +66,7 @@ TEST_BEGIN(test_fork) {
 		/* Child. */
 		_exit(0);
 	} else {
-		fork_wait_for_child_exit(pid);
+		wait_for_child_exit(pid);
 	}
 #else
 	test_skip("fork(2) is irrelevant to Windows");
@@ -60,7 +89,7 @@ do_fork_thd(void *arg) {
 		test_fail("Exec failed");
 	} else {
 		/* Parent */
-		fork_wait_for_child_exit(pid);
+		wait_for_child_exit(pid);
 	}
 	return NULL;
 }
@@ -97,7 +126,7 @@ TEST_BEGIN(test_fork_multithreaded) {
 			do_test_fork_multithreaded();
 			_exit(0);
 		} else {
-			fork_wait_for_child_exit(pid);
+			wait_for_child_exit(pid);
 		}
 	}
 #else

From 48b4ad60a7ee897c813fb987183bb13d3596814c Mon Sep 17 00:00:00 2001
From: lexprfuncall <5360361+lexprfuncall@users.noreply.github.com>
Date: Tue, 5 Aug 2025 20:39:04 -0700
Subject: [PATCH 318/395] Remove an orphaned comment

This was left behind when definitions of malloc_open and malloc_close
were abstracted from code that had followed.
---
 src/pages.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/src/pages.c b/src/pages.c
index 78f3a1b7..1daab43b 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -766,11 +766,6 @@ os_overcommits_sysctl(void) {
 #endif
 
 #ifdef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY
-/*
- * Use syscall(2) rather than {open,read,close}(2) when possible to avoid
- * reentry during bootstrapping if another library has interposed system call
- * wrappers.
- */
 static bool
 os_overcommits_proc(void) {
 	int  fd;

From 9fdc1160c5793d99f26192aee0406c653affb484 Mon Sep 17 00:00:00 2001
From: lexprfuncall <5360361+lexprfuncall@users.noreply.github.com>
Date: Thu, 21 Aug 2025 20:44:18 -0700
Subject: [PATCH 319/395] Handle interruptions and retries of read(2) and
 write(2)

---
 include/jemalloc/internal/malloc_io.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/include/jemalloc/internal/malloc_io.h b/include/jemalloc/internal/malloc_io.h
index 0f82f678..5e0805ed 100644
--- a/include/jemalloc/internal/malloc_io.h
+++ b/include/jemalloc/internal/malloc_io.h
@@ -96,6 +96,11 @@ malloc_write_fd(int fd, const void *buf, size_t count) {
 		    &((const byte_t *)buf)[bytes_written],
 		    count - bytes_written);
 		if (result < 0) {
+#ifndef _WIN32
+			if (errno == EINTR) {
+				continue;
+			}
+#endif
 			return result;
 		}
 		bytes_written += result;
@@ -124,6 +129,11 @@ malloc_read_fd(int fd, void *buf, size_t count) {
 		ssize_t result = malloc_read_fd_syscall(
 		    fd, &((byte_t *)buf)[bytes_read], count - bytes_read);
 		if (result < 0) {
+#ifndef _WIN32
+			if (errno == EINTR) {
+				continue;
+			}
+#endif
 			return result;
 		} else if (result == 0) {
 			break;

From 38b12427b7a832fd97739d7cfcca4081a964df2e Mon Sep 17 00:00:00 2001
From: lexprfuncall <5360361+lexprfuncall@users.noreply.github.com>
Date: Wed, 6 Aug 2025 21:32:16 -0700
Subject: [PATCH 320/395] Define malloc_{write,read}_fd as non-inline global
 functions

The static inline definition made more sense when these functions just
dispatched to a syscall wrapper.  Since they acquired a retry loop, a
non-inline definition makes more sense.
---
 include/jemalloc/internal/malloc_io.h | 86 ++-------------------------
 src/malloc_io.c                       | 75 +++++++++++++++++++++++
 2 files changed, 80 insertions(+), 81 deletions(-)

diff --git a/include/jemalloc/internal/malloc_io.h b/include/jemalloc/internal/malloc_io.h
index 5e0805ed..0f70c3c3 100644
--- a/include/jemalloc/internal/malloc_io.h
+++ b/include/jemalloc/internal/malloc_io.h
@@ -66,94 +66,18 @@ void malloc_cprintf(write_cb_t *write_cb, void *cbopaque, const char *format,
     ...) JEMALLOC_FORMAT_PRINTF(3, 4);
 void malloc_printf(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2);
 
-static inline ssize_t
-malloc_write_fd_syscall(int fd, const void *buf, size_t count) {
-#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_write)
-	/*
-	 * Use syscall(2) rather than write(2) when possible in order to avoid
-	 * the possibility of memory allocation within libc.  This is necessary
-	 * on FreeBSD; most operating systems do not have this problem though.
-	 *
-	 * syscall() returns long or int, depending on platform, so capture the
-	 * result in the widest plausible type to avoid compiler warnings.
-	 */
-	long result = syscall(SYS_write, fd, buf, count);
-#else
-	ssize_t result = (ssize_t)write(fd, buf,
-#	ifdef _WIN32
-	    (unsigned int)
-#	endif
-	        count);
-#endif
-	return (ssize_t)result;
-}
-
-static inline ssize_t
-malloc_write_fd(int fd, const void *buf, size_t count) {
-	size_t bytes_written = 0;
-	do {
-		ssize_t result = malloc_write_fd_syscall(fd,
-		    &((const byte_t *)buf)[bytes_written],
-		    count - bytes_written);
-		if (result < 0) {
-#ifndef _WIN32
-			if (errno == EINTR) {
-				continue;
-			}
-#endif
-			return result;
-		}
-		bytes_written += result;
-	} while (bytes_written < count);
-	return bytes_written;
-}
-
-static inline ssize_t
-malloc_read_fd_syscall(int fd, void *buf, size_t count) {
-#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_read)
-	long result = syscall(SYS_read, fd, buf, count);
-#else
-	ssize_t result = read(fd, buf,
-#	ifdef _WIN32
-	    (unsigned int)
-#	endif
-	        count);
-#endif
-	return (ssize_t)result;
-}
-
-static inline ssize_t
-malloc_read_fd(int fd, void *buf, size_t count) {
-	size_t bytes_read = 0;
-	do {
-		ssize_t result = malloc_read_fd_syscall(
-		    fd, &((byte_t *)buf)[bytes_read], count - bytes_read);
-		if (result < 0) {
-#ifndef _WIN32
-			if (errno == EINTR) {
-				continue;
-			}
-#endif
-			return result;
-		} else if (result == 0) {
-			break;
-		}
-		bytes_read += result;
-	} while (bytes_read < count);
-	return bytes_read;
-}
+ssize_t malloc_write_fd(int fd, const void *buf, size_t count);
+ssize_t malloc_read_fd(int fd, void *buf, size_t count);
 
 static inline int
 malloc_open(const char *path, int flags) {
-	int fd;
 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
-	fd = (int)syscall(SYS_open, path, flags);
+	return (int)syscall(SYS_open, path, flags);
 #elif defined(JEMALLOC_USE_SYSCALL) && defined(SYS_openat)
-	fd = (int)syscall(SYS_openat, AT_FDCWD, path, flags);
+	return (int)syscall(SYS_openat, AT_FDCWD, path, flags);
 #else
-	fd = open(path, flags);
+	return open(path, flags);
 #endif
-	return fd;
 }
 
 static inline int
diff --git a/src/malloc_io.c b/src/malloc_io.c
index 0c5d6c03..779cdc05 100644
--- a/src/malloc_io.c
+++ b/src/malloc_io.c
@@ -760,6 +760,81 @@ malloc_printf(const char *format, ...) {
 	va_end(ap);
 }
 
+static ssize_t
+malloc_write_fd_syscall(int fd, const void *buf, size_t count) {
+#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_write)
+	/*
+	 * Use syscall(2) rather than write(2) when possible in order to avoid
+	 * the possibility of memory allocation within libc.  This is necessary
+	 * on FreeBSD; most operating systems do not have this problem though.
+	 *
+	 * syscall() returns long or int, depending on platform, so capture the
+	 * result in the widest plausible type to avoid compiler warnings.
+	 */
+	return (ssize_t)syscall(SYS_write, fd, buf, count);
+#else
+	return (ssize_t)write(fd, buf,
+#	ifdef _WIN32
+	    (unsigned int)
+#	endif
+	        count);
+#endif
+}
+
+ssize_t
+malloc_write_fd(int fd, const void *buf, size_t count) {
+	size_t bytes_written = 0;
+	do {
+		ssize_t result = malloc_write_fd_syscall(fd,
+		    &((const byte_t *)buf)[bytes_written],
+		    count - bytes_written);
+		if (result < 0) {
+#ifndef _WIN32
+			if (errno == EINTR) {
+				continue;
+			}
+#endif
+			return result;
+		}
+		bytes_written += result;
+	} while (bytes_written < count);
+	return bytes_written;
+}
+
+static ssize_t
+malloc_read_fd_syscall(int fd, void *buf, size_t count) {
+#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_read)
+	return (ssize_t)syscall(SYS_read, fd, buf, count);
+#else
+	return (ssize_t)read(fd, buf,
+#	ifdef _WIN32
+	    (unsigned int)
+#	endif
+	        count);
+#endif
+}
+
+ssize_t
+malloc_read_fd(int fd, void *buf, size_t count) {
+	size_t bytes_read = 0;
+	do {
+		ssize_t result = malloc_read_fd_syscall(
+		    fd, &((byte_t *)buf)[bytes_read], count - bytes_read);
+		if (result < 0) {
+#ifndef _WIN32
+			if (errno == EINTR) {
+				continue;
+			}
+#endif
+			return result;
+		} else if (result == 0) {
+			break;
+		}
+		bytes_read += result;
+	} while (bytes_read < count);
+	return bytes_read;
+}
+
 /*
  * Restore normal assertion macros, in order to make it possible to compile all
  * C files as a single concatenation.

From 2a66c0be5a3727817ccf95c6150d10c19aae00f4 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Sat, 23 Aug 2025 08:53:28 -0700
Subject: [PATCH 321/395] [EASY][BUGFIX] Spelling and format

---
 include/jemalloc/internal/pac.h | 2 +-
 src/hpdata.c                    | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/pac.h b/include/jemalloc/internal/pac.h
index a9edc19b..a19c8b35 100644
--- a/include/jemalloc/internal/pac.h
+++ b/include/jemalloc/internal/pac.h
@@ -131,7 +131,7 @@ struct pac_thp_s {
 	 * opt_thp controls THP for user requested allocations. Settings
 	 * "always", "never" and "default" are available if THP is supported
 	 * by the OS and the default extent hooks are used:
-	 * - "always" and "never" are convered by pages_set_thp_state() in
+	 * - "always" and "never" are covered by pages_set_thp_state() in
 	 *   ehooks_default_alloc_impl().
 	 * - "default" makes no change for all the other auto arenas except
 	 *   the huge arena. For the huge arena, we might also look at
diff --git a/src/hpdata.c b/src/hpdata.c
index e9ee2738..e18e03cd 100644
--- a/src/hpdata.c
+++ b/src/hpdata.c
@@ -18,7 +18,8 @@ hpdata_age_comp(const hpdata_t *a, const hpdata_t *b) {
 
 ph_gen(, hpdata_age_heap, hpdata_t, age_link, hpdata_age_comp)
 
-    void hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
+void
+hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
 	hpdata_addr_set(hpdata, addr);
 	hpdata_age_set(hpdata, age);
 	hpdata->h_huge = false;

From 9442300cc3adebdbf1d518dcba990a1c971e4f2e Mon Sep 17 00:00:00 2001
From: lexprfuncall <5360361+lexprfuncall@users.noreply.github.com>
Date: Mon, 25 Aug 2025 19:39:30 -0700
Subject: [PATCH 322/395] Change the default page size to 64KiB on Aarch64
 Linux

This updates the configuration script to set the default page size to
64KiB on Aarch64 Linux.  This is motivated by compatibility as a build
configured for a 64KiB page will work on kernels that use the smaller
4KiB or 16KiB pages, whereas the reverse is not true.

To make the configured page size setting more visible, the script now
displays the page size when printing the configuration results.

Users that want to override the page size in to choose a smaller value
can still do so with the --with-lg-pagesize configuration option.
---
 configure.ac | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/configure.ac b/configure.ac
index ce5c8adc..dd0c3cc8 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1990,6 +1990,11 @@ case "${host}" in
         LG_PAGE=14
       fi
       ;;
+  aarch64-unknown-linux-*)
+      if test "x$LG_PAGE" = "xdetect"; then
+        LG_PAGE=16
+      fi
+      ;;
 esac
 if test "x$LG_PAGE" = "xdetect"; then
   AC_CACHE_CHECK([LG_PAGE],
@@ -3077,6 +3082,8 @@ AC_MSG_RESULT([INCLUDEDIR         : ${INCLUDEDIR}])
 AC_MSG_RESULT([LIBDIR             : ${LIBDIR}])
 AC_MSG_RESULT([MANDIR             : ${MANDIR}])
 AC_MSG_RESULT([])
+AC_MSG_RESULT([LG_PAGE            : ${LG_PAGE}])
+AC_MSG_RESULT([])
 AC_MSG_RESULT([srcroot            : ${srcroot}])
 AC_MSG_RESULT([abs_srcroot        : ${abs_srcroot}])
 AC_MSG_RESULT([objroot            : ${objroot}])

From 5d5f76ee015696e0e086650e85722ceca9d191c1 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Tue, 26 Aug 2025 15:15:08 -0700
Subject: [PATCH 323/395] Remove pidfd_open call handling and rely on
 PIDFD_SELF

---
 configure.ac                      |  3 ++-
 include/jemalloc/internal/pages.h |  1 -
 src/jemalloc.c                    |  1 -
 src/pages.c                       | 41 +++++++------------------------
 4 files changed, 11 insertions(+), 35 deletions(-)

diff --git a/configure.ac b/configure.ac
index dd0c3cc8..8ea092d6 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2638,10 +2638,11 @@ if test "x${je_cv_madvise}" = "xyes" ; then
 
   dnl Check for process_madvise
   JE_COMPILABLE([process_madvise(2)], [
+#include <sys/pidfd.h>
 #include <sys/syscall.h>
 #include <unistd.h>
 ], [
-	syscall(SYS_process_madvise, 0, (void *)0, 0, 0, 0);
+	syscall(SYS_process_madvise, PIDFD_SELF, (void *)0, 0, 0, 0);
 ], [je_cv_process_madvise])
   if test "x${je_cv_process_madvise}" = "xyes" ; then
     AC_DEFINE([JEMALLOC_HAVE_PROCESS_MADVISE], [ ], [ ])
diff --git a/include/jemalloc/internal/pages.h b/include/jemalloc/internal/pages.h
index b0cc5bba..31909934 100644
--- a/include/jemalloc/internal/pages.h
+++ b/include/jemalloc/internal/pages.h
@@ -127,6 +127,5 @@ bool pages_boot(void);
 void pages_set_thp_state(void *ptr, size_t size);
 void pages_mark_guards(void *head, void *tail);
 void pages_unmark_guards(void *head, void *tail);
-void pages_postfork_child(void);
 
 #endif /* JEMALLOC_INTERNAL_PAGES_EXTERNS_H */
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 9f59a781..0fe69a1e 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -4535,7 +4535,6 @@ jemalloc_postfork_child(void) {
 	malloc_mutex_postfork_child(tsd_tsdn(tsd), &arenas_lock);
 	tcache_postfork_child(tsd_tsdn(tsd));
 	ctl_postfork_child(tsd_tsdn(tsd));
-	pages_postfork_child();
 }
 
 /******************************************************************************/
diff --git a/src/pages.c b/src/pages.c
index 1daab43b..44c57b28 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -621,7 +621,11 @@ pages_dodump(void *addr, size_t size) {
 #ifdef JEMALLOC_HAVE_PROCESS_MADVISE
 #	include <sys/mman.h>
 #	include <sys/syscall.h>
-static atomic_i_t process_madvise_pidfd = ATOMIC_INIT(-1);
+
+#ifndef PIDFD_SELF
+#define PIDFD_SELF -10000
+#endif
+
 static atomic_b_t process_madvise_gate = ATOMIC_INIT(true);
 
 static bool
@@ -650,33 +654,17 @@ pages_purge_process_madvise_impl(
 	if (!atomic_load_b(&process_madvise_gate, ATOMIC_RELAXED)) {
 		return true;
 	}
-	int pid_fd = atomic_load_i(&process_madvise_pidfd, ATOMIC_RELAXED);
-	while (pid_fd == -1) {
-		int newfd = (int) syscall(SYS_pidfd_open, getpid(), 0);
-		if (newfd == -1) {
-			return true;
-		}
-		if (!atomic_compare_exchange_strong_i(&process_madvise_pidfd,
-						      &pid_fd, newfd,
-						      ATOMIC_RELAXED,
-						      ATOMIC_RELAXED)) {
-			/* Someone else set the fd, so we close ours */
-			assert(pid_fd != -1);
-			close(newfd);
-		} else {
-			pid_fd = newfd;
-		}
-	}
 
 	/*
 	 * TODO: remove this save/restore of errno after supporting errno
 	 * preservation for free() call properly.
 	 */
 	int saved_errno = get_errno();
-	size_t purged_bytes = (size_t)syscall(JE_SYS_PROCESS_MADVISE_NR, pid_fd,
-	    (struct iovec *)vec, vec_len, MADV_DONTNEED, 0);
+	size_t purged_bytes = (size_t)syscall(JE_SYS_PROCESS_MADVISE_NR,
+	    PIDFD_SELF, (struct iovec *)vec, vec_len, MADV_DONTNEED, 0);
 	if (purged_bytes == (size_t) -1) {
-		if (errno == EPERM || errno == EINVAL || errno == ENOSYS) {
+		if (errno == EPERM || errno == EINVAL || errno == ENOSYS
+		    || errno == EBADF) {
 			/* Process madvise not supported the way we need it. */
 			atomic_store_b(&process_madvise_gate, false,
 				       ATOMIC_RELAXED);
@@ -687,15 +675,6 @@ pages_purge_process_madvise_impl(
 	return purged_bytes != total_bytes;
 }
 
-void pages_postfork_child(void) {
-	/* Reset the file descriptor we inherited from parent process */
-	int pid_fd = atomic_load_i(&process_madvise_pidfd, ATOMIC_RELAXED);
-	if (pid_fd != -1) {
-		atomic_store_i(&process_madvise_pidfd, -1, ATOMIC_RELAXED);
-		close(pid_fd);
-	}
-}
-
 #else
 
 static bool
@@ -710,8 +689,6 @@ pages_purge_process_madvise_impl(
 	return true;
 }
 
-void pages_postfork_child(void) {}
-
 #endif
 
 bool

From 5a634a8d0a1d853fc9905bc7b8908895f147322a Mon Sep 17 00:00:00 2001
From: Carl Shapiro <cshapiro@meta.com>
Date: Wed, 27 Aug 2025 16:48:40 -0700
Subject: [PATCH 324/395] Always use pthread_equal to compare thread IDs

This change replaces direct comparisons of Pthread thread IDs with
calls to pthread_equal.  Directly comparing thread IDs is neither
portable nor reliable since a thread ID is defined as an opaque type
that can be implemented using a structure.
---
 src/jemalloc.c | 3 ++-
 src/tsd.c      | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 0fe69a1e..5c77621c 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -220,7 +220,8 @@ static uint8_t malloc_slow_flags;
 /* Used to let the initializing thread recursively allocate. */
 #	define NO_INITIALIZER ((unsigned long)0)
 #	define INITIALIZER pthread_self()
-#	define IS_INITIALIZER (malloc_initializer == pthread_self())
+#	define IS_INITIALIZER                                                 \
+		(pthread_equal(malloc_initializer, pthread_self()))
 static pthread_t malloc_initializer = NO_INITIALIZER;
 #else
 #	define NO_INITIALIZER false
diff --git a/src/tsd.c b/src/tsd.c
index 20042c2d..30acad93 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -528,7 +528,7 @@ tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block) {
 	/* Check whether this thread has already inserted into the list. */
 	malloc_mutex_lock(TSDN_NULL, &head->lock);
 	ql_foreach (iter, &head->blocks, link) {
-		if (iter->thread == self) {
+		if (pthread_equal(iter->thread, self)) {
 			malloc_mutex_unlock(TSDN_NULL, &head->lock);
 			return iter->data;
 		}

From c51949ea3ee75c8e417b59b89334f225775d4e64 Mon Sep 17 00:00:00 2001
From: lexprfuncall <5360361+lexprfuncall@users.noreply.github.com>
Date: Tue, 26 Aug 2025 13:32:57 -0700
Subject: [PATCH 325/395] Update config.guess and config.sub to the latest
 versions

These files need to be refreshed periodically to support new platform
types.

The following command was used to retrieve the updates

curl -L -O https://git.savannah.gnu.org/cgit/config.git/plain/config.guess
curl -L -O https://git.savannah.gnu.org/cgit/config.git/plain/config.sub

Closes: #2814
---
 build-aux/config.guess | 1345 ++++++++++++++++++++++------------------
 build-aux/config.sub   | 1015 ++++++++++++++++++++++--------
 2 files changed, 1493 insertions(+), 867 deletions(-)

diff --git a/build-aux/config.guess b/build-aux/config.guess
index f7727026..a9d01fde 100755
--- a/build-aux/config.guess
+++ b/build-aux/config.guess
@@ -1,12 +1,14 @@
 #! /bin/sh
 # Attempt to guess a canonical system name.
-#   Copyright 1992-2021 Free Software Foundation, Inc.
+#   Copyright 1992-2025 Free Software Foundation, Inc.
 
-timestamp='2021-01-01'
+# shellcheck disable=SC2006,SC2268 # see below for rationale
+
+timestamp='2025-07-10'
 
 # This file is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or
+# the Free Software Foundation, either version 3 of the License, or
 # (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful, but
@@ -32,12 +34,20 @@ timestamp='2021-01-01'
 # Please send patches to <config-patches@gnu.org>.
 
 
-me=$(echo "$0" | sed -e 's,.*/,,')
+# The "shellcheck disable" line above the timestamp inhibits complaints
+# about features and limitations of the classic Bourne shell that were
+# superseded or lifted in POSIX.  However, this script identifies a wide
+# variety of pre-POSIX systems that do not have POSIX shells at all, and
+# even some reasonably current systems (Solaris 10 as case-in-point) still
+# have a pre-POSIX /bin/sh.
+
+
+me=`echo "$0" | sed -e 's,.*/,,'`
 
 usage="\
 Usage: $0 [OPTION]
 
-Output the configuration name of the system \`$me' is run on.
+Output the configuration name of the system '$me' is run on.
 
 Options:
   -h, --help         print this help, then exit
@@ -50,13 +60,13 @@ version="\
 GNU config.guess ($timestamp)
 
 Originally written by Per Bothner.
-Copyright 1992-2021 Free Software Foundation, Inc.
+Copyright 1992-2025 Free Software Foundation, Inc.
 
 This is free software; see the source for copying conditions.  There is NO
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
 
 help="
-Try \`$me --help' for more information."
+Try '$me --help' for more information."
 
 # Parse command line
 while test $# -gt 0 ; do
@@ -84,13 +94,16 @@ if test $# != 0; then
   exit 1
 fi
 
+# Just in case it came from the environment.
+GUESS=
+
 # CC_FOR_BUILD -- compiler used by this script. Note that the use of a
 # compiler to aid in system detection is discouraged as it requires
 # temporary files to be created and, as you can see below, it is a
 # headache to deal with in a portable fashion.
 
-# Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still
-# use `HOST_CC' if defined, but it is deprecated.
+# Historically, 'CC_FOR_BUILD' used to be named 'HOST_CC'. We still
+# use 'HOST_CC' if defined, but it is deprecated.
 
 # Portable tmp directory creation inspired by the Autoconf team.
 
@@ -102,17 +115,17 @@ set_cc_for_build() {
     # prevent multiple calls if $tmp is already set
     test "$tmp" && return 0
     : "${TMPDIR=/tmp}"
-    # shellcheck disable=SC2039
-    { tmp=$( (umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null) && test -n "$tmp" && test -d "$tmp" ; } ||
+    # shellcheck disable=SC2039,SC3028
+    { tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } ||
 	{ test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir "$tmp" 2>/dev/null) ; } ||
 	{ tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir "$tmp" 2>/dev/null) && echo "Warning: creating insecure temp directory" >&2 ; } ||
 	{ echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; }
     dummy=$tmp/dummy
     case ${CC_FOR_BUILD-},${HOST_CC-},${CC-} in
 	,,)    echo "int x;" > "$dummy.c"
-	       for driver in cc gcc c89 c99 ; do
+	       for driver in cc gcc c17 c99 c89 ; do
 		   if ($driver -c -o "$dummy.o" "$dummy.c") >/dev/null 2>&1 ; then
-		       CC_FOR_BUILD="$driver"
+		       CC_FOR_BUILD=$driver
 		       break
 		   fi
 	       done
@@ -131,17 +144,20 @@ if test -f /.attbin/uname ; then
 	PATH=$PATH:/.attbin ; export PATH
 fi
 
-UNAME_MACHINE=$( (uname -m) 2>/dev/null) || UNAME_MACHINE=unknown
-UNAME_RELEASE=$( (uname -r) 2>/dev/null) || UNAME_RELEASE=unknown
-UNAME_SYSTEM=$( (uname -s) 2>/dev/null) || UNAME_SYSTEM=unknown
-UNAME_VERSION=$( (uname -v) 2>/dev/null) || UNAME_VERSION=unknown
+UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown
+UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown
+UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown
+UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown
 
-case "$UNAME_SYSTEM" in
+case $UNAME_SYSTEM in
 Linux|GNU|GNU/*)
 	LIBC=unknown
 
 	set_cc_for_build
 	cat <<-EOF > "$dummy.c"
+	#if defined(__ANDROID__)
+	LIBC=android
+	#else
 	#include <features.h>
 	#if defined(__UCLIBC__)
 	LIBC=uclibc
@@ -149,6 +165,8 @@ Linux|GNU|GNU/*)
 	LIBC=dietlibc
 	#elif defined(__GLIBC__)
 	LIBC=gnu
+	#elif defined(__LLVM_LIBC__)
+	LIBC=llvm
 	#else
 	#include <stdarg.h>
 	/* First heuristic to detect musl libc.  */
@@ -156,8 +174,10 @@ Linux|GNU|GNU/*)
 	LIBC=musl
 	#endif
 	#endif
+	#endif
 	EOF
-	eval "$($CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^LIBC' | sed 's, ,,g')"
+	cc_set_libc=`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^LIBC' | sed 's, ,,g'`
+	eval "$cc_set_libc"
 
 	# Second heuristic to detect musl libc.
 	if [ "$LIBC" = unknown ] &&
@@ -176,7 +196,7 @@ esac
 
 # Note: order is significant - the case branches are not exclusive.
 
-case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
+case $UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION in
     *:NetBSD:*:*)
 	# NetBSD (nbsd) targets should (where applicable) match one or
 	# more of the tuples: *-*-netbsdelf*, *-*-netbsdaout*,
@@ -188,12 +208,11 @@ case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
 	#
 	# Note: NetBSD doesn't particularly care about the vendor
 	# portion of the name.  We always set it to "unknown".
-	sysctl="sysctl -n hw.machine_arch"
-	UNAME_MACHINE_ARCH=$( (uname -p 2>/dev/null || \
-	    "/sbin/$sysctl" 2>/dev/null || \
-	    "/usr/sbin/$sysctl" 2>/dev/null || \
-	    echo unknown))
-	case "$UNAME_MACHINE_ARCH" in
+	UNAME_MACHINE_ARCH=`(uname -p 2>/dev/null || \
+	    /sbin/sysctl -n hw.machine_arch 2>/dev/null || \
+	    /usr/sbin/sysctl -n hw.machine_arch 2>/dev/null || \
+	    echo unknown)`
+	case $UNAME_MACHINE_ARCH in
 	    aarch64eb) machine=aarch64_be-unknown ;;
 	    armeb) machine=armeb-unknown ;;
 	    arm*) machine=arm-unknown ;;
@@ -201,15 +220,15 @@ case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
 	    sh3eb) machine=sh-unknown ;;
 	    sh5el) machine=sh5le-unknown ;;
 	    earmv*)
-		arch=$(echo "$UNAME_MACHINE_ARCH" | sed -e 's,^e\(armv[0-9]\).*$,\1,')
-		endian=$(echo "$UNAME_MACHINE_ARCH" | sed -ne 's,^.*\(eb\)$,\1,p')
-		machine="${arch}${endian}"-unknown
+		arch=`echo "$UNAME_MACHINE_ARCH" | sed -e 's,^e\(armv[0-9]\).*$,\1,'`
+		endian=`echo "$UNAME_MACHINE_ARCH" | sed -ne 's,^.*\(eb\)$,\1,p'`
+		machine=${arch}${endian}-unknown
 		;;
-	    *) machine="$UNAME_MACHINE_ARCH"-unknown ;;
+	    *) machine=$UNAME_MACHINE_ARCH-unknown ;;
 	esac
 	# The Operating System including object format, if it has switched
 	# to ELF recently (or will in the future) and ABI.
-	case "$UNAME_MACHINE_ARCH" in
+	case $UNAME_MACHINE_ARCH in
 	    earm*)
 		os=netbsdelf
 		;;
@@ -230,10 +249,10 @@ case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
 		;;
 	esac
 	# Determine ABI tags.
-	case "$UNAME_MACHINE_ARCH" in
+	case $UNAME_MACHINE_ARCH in
 	    earm*)
 		expr='s/^earmv[0-9]/-eabi/;s/eb$//'
-		abi=$(echo "$UNAME_MACHINE_ARCH" | sed -e "$expr")
+		abi=`echo "$UNAME_MACHINE_ARCH" | sed -e "$expr"`
 		;;
 	esac
 	# The OS release
@@ -241,76 +260,82 @@ case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
 	# thus, need a distinct triplet. However, they do not need
 	# kernel version information, so it can be replaced with a
 	# suitable tag, in the style of linux-gnu.
-	case "$UNAME_VERSION" in
+	case $UNAME_VERSION in
 	    Debian*)
 		release='-gnu'
 		;;
 	    *)
-		release=$(echo "$UNAME_RELEASE" | sed -e 's/[-_].*//' | cut -d. -f1,2)
+		release=`echo "$UNAME_RELEASE" | sed -e 's/[-_].*//' | cut -d. -f1,2`
 		;;
 	esac
 	# Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM:
 	# contains redundant information, the shorter form:
 	# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used.
-	echo "$machine-${os}${release}${abi-}"
-	exit ;;
+	GUESS=$machine-${os}${release}${abi-}
+	;;
     *:Bitrig:*:*)
-	UNAME_MACHINE_ARCH=$(arch | sed 's/Bitrig.//')
-	echo "$UNAME_MACHINE_ARCH"-unknown-bitrig"$UNAME_RELEASE"
-	exit ;;
+	UNAME_MACHINE_ARCH=`arch | sed 's/Bitrig.//'`
+	GUESS=$UNAME_MACHINE_ARCH-unknown-bitrig$UNAME_RELEASE
+	;;
     *:OpenBSD:*:*)
-	UNAME_MACHINE_ARCH=$(arch | sed 's/OpenBSD.//')
-	echo "$UNAME_MACHINE_ARCH"-unknown-openbsd"$UNAME_RELEASE"
-	exit ;;
+	UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'`
+	GUESS=$UNAME_MACHINE_ARCH-unknown-openbsd$UNAME_RELEASE
+	;;
+    *:SecBSD:*:*)
+	UNAME_MACHINE_ARCH=`arch | sed 's/SecBSD.//'`
+	GUESS=$UNAME_MACHINE_ARCH-unknown-secbsd$UNAME_RELEASE
+	;;
     *:LibertyBSD:*:*)
-	UNAME_MACHINE_ARCH=$(arch | sed 's/^.*BSD\.//')
-	echo "$UNAME_MACHINE_ARCH"-unknown-libertybsd"$UNAME_RELEASE"
-	exit ;;
+	UNAME_MACHINE_ARCH=`arch | sed 's/^.*BSD\.//'`
+	GUESS=$UNAME_MACHINE_ARCH-unknown-libertybsd$UNAME_RELEASE
+	;;
     *:MidnightBSD:*:*)
-	echo "$UNAME_MACHINE"-unknown-midnightbsd"$UNAME_RELEASE"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-midnightbsd$UNAME_RELEASE
+	;;
     *:ekkoBSD:*:*)
-	echo "$UNAME_MACHINE"-unknown-ekkobsd"$UNAME_RELEASE"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-ekkobsd$UNAME_RELEASE
+	;;
     *:SolidBSD:*:*)
-	echo "$UNAME_MACHINE"-unknown-solidbsd"$UNAME_RELEASE"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-solidbsd$UNAME_RELEASE
+	;;
     *:OS108:*:*)
-	echo "$UNAME_MACHINE"-unknown-os108_"$UNAME_RELEASE"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-os108_$UNAME_RELEASE
+	;;
     macppc:MirBSD:*:*)
-	echo powerpc-unknown-mirbsd"$UNAME_RELEASE"
-	exit ;;
+	GUESS=powerpc-unknown-mirbsd$UNAME_RELEASE
+	;;
     *:MirBSD:*:*)
-	echo "$UNAME_MACHINE"-unknown-mirbsd"$UNAME_RELEASE"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-mirbsd$UNAME_RELEASE
+	;;
     *:Sortix:*:*)
-	echo "$UNAME_MACHINE"-unknown-sortix
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-sortix
+	;;
     *:Twizzler:*:*)
-	echo "$UNAME_MACHINE"-unknown-twizzler
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-twizzler
+	;;
     *:Redox:*:*)
-	echo "$UNAME_MACHINE"-unknown-redox
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-redox
+	;;
     mips:OSF1:*.*)
-	echo mips-dec-osf1
-	exit ;;
+	GUESS=mips-dec-osf1
+	;;
     alpha:OSF1:*:*)
+	# Reset EXIT trap before exiting to avoid spurious non-zero exit code.
+	trap '' 0
 	case $UNAME_RELEASE in
 	*4.0)
-		UNAME_RELEASE=$(/usr/sbin/sizer -v | awk '{print $3}')
+		UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'`
 		;;
 	*5.*)
-		UNAME_RELEASE=$(/usr/sbin/sizer -v | awk '{print $4}')
+		UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'`
 		;;
 	esac
 	# According to Compaq, /usr/sbin/psrinfo has been available on
 	# OSF/1 and Tru64 systems produced since 1995.  I hope that
 	# covers most systems running today.  This code pipes the CPU
 	# types through head -n 1, so we only detect the type of CPU 0.
-	ALPHA_CPU_TYPE=$(/usr/sbin/psrinfo -v | sed -n -e 's/^  The alpha \(.*\) processor.*$/\1/p' | head -n 1)
-	case "$ALPHA_CPU_TYPE" in
+	ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^  The alpha \(.*\) processor.*$/\1/p' | head -n 1`
+	case $ALPHA_CPU_TYPE in
 	    "EV4 (21064)")
 		UNAME_MACHINE=alpha ;;
 	    "EV4.5 (21064)")
@@ -347,68 +372,69 @@ case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
 	# A Tn.n version is a released field test version.
 	# A Xn.n version is an unreleased experimental baselevel.
 	# 1.2 uses "1.2" for uname -r.
-	echo "$UNAME_MACHINE"-dec-osf"$(echo "$UNAME_RELEASE" | sed -e 's/^[PVTX]//' | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz)"
-	# Reset EXIT trap before exiting to avoid spurious non-zero exit code.
-	exitcode=$?
-	trap '' 0
-	exit $exitcode ;;
+	OSF_REL=`echo "$UNAME_RELEASE" | sed -e 's/^[PVTX]//' | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz`
+	GUESS=$UNAME_MACHINE-dec-osf$OSF_REL
+	;;
     Amiga*:UNIX_System_V:4.0:*)
-	echo m68k-unknown-sysv4
-	exit ;;
+	GUESS=m68k-unknown-sysv4
+	;;
     *:[Aa]miga[Oo][Ss]:*:*)
-	echo "$UNAME_MACHINE"-unknown-amigaos
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-amigaos
+	;;
     *:[Mm]orph[Oo][Ss]:*:*)
-	echo "$UNAME_MACHINE"-unknown-morphos
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-morphos
+	;;
     *:OS/390:*:*)
-	echo i370-ibm-openedition
-	exit ;;
+	GUESS=i370-ibm-openedition
+	;;
     *:z/VM:*:*)
-	echo s390-ibm-zvmoe
-	exit ;;
+	GUESS=s390-ibm-zvmoe
+	;;
     *:OS400:*:*)
-	echo powerpc-ibm-os400
-	exit ;;
+	GUESS=powerpc-ibm-os400
+	;;
     arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*)
-	echo arm-acorn-riscix"$UNAME_RELEASE"
-	exit ;;
+	GUESS=arm-acorn-riscix$UNAME_RELEASE
+	;;
     arm*:riscos:*:*|arm*:RISCOS:*:*)
-	echo arm-unknown-riscos
-	exit ;;
+	GUESS=arm-unknown-riscos
+	;;
     SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*)
-	echo hppa1.1-hitachi-hiuxmpp
-	exit ;;
+	GUESS=hppa1.1-hitachi-hiuxmpp
+	;;
     Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*)
 	# akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE.
-	if test "$( (/bin/universe) 2>/dev/null)" = att ; then
-		echo pyramid-pyramid-sysv3
-	else
-		echo pyramid-pyramid-bsd
-	fi
-	exit ;;
+	case `(/bin/universe) 2>/dev/null` in
+	    att) GUESS=pyramid-pyramid-sysv3 ;;
+	    *)   GUESS=pyramid-pyramid-bsd   ;;
+	esac
+	;;
     NILE*:*:*:dcosx)
-	echo pyramid-pyramid-svr4
-	exit ;;
+	GUESS=pyramid-pyramid-svr4
+	;;
     DRS?6000:unix:4.0:6*)
-	echo sparc-icl-nx6
-	exit ;;
+	GUESS=sparc-icl-nx6
+	;;
     DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*)
-	case $(/usr/bin/uname -p) in
-	    sparc) echo sparc-icl-nx7; exit ;;
-	esac ;;
+	case `/usr/bin/uname -p` in
+	    sparc) GUESS=sparc-icl-nx7 ;;
+	esac
+	;;
     s390x:SunOS:*:*)
-	echo "$UNAME_MACHINE"-ibm-solaris2"$(echo "$UNAME_RELEASE" | sed -e 's/[^.]*//')"
-	exit ;;
+	SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'`
+	GUESS=$UNAME_MACHINE-ibm-solaris2$SUN_REL
+	;;
     sun4H:SunOS:5.*:*)
-	echo sparc-hal-solaris2"$(echo "$UNAME_RELEASE"|sed -e 's/[^.]*//')"
-	exit ;;
+	SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'`
+	GUESS=sparc-hal-solaris2$SUN_REL
+	;;
     sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*)
-	echo sparc-sun-solaris2"$(echo "$UNAME_RELEASE" | sed -e 's/[^.]*//')"
-	exit ;;
+	SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'`
+	GUESS=sparc-sun-solaris2$SUN_REL
+	;;
     i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*)
-	echo i386-pc-auroraux"$UNAME_RELEASE"
-	exit ;;
+	GUESS=i386-pc-auroraux$UNAME_RELEASE
+	;;
     i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*)
 	set_cc_for_build
 	SUN_ARCH=i386
@@ -417,47 +443,50 @@ case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
 	# This test works for both compilers.
 	if test "$CC_FOR_BUILD" != no_compiler_found; then
 	    if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \
-		(CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
+		(CCOPTS="" $CC_FOR_BUILD -m64 -E - 2>/dev/null) | \
 		grep IS_64BIT_ARCH >/dev/null
 	    then
 		SUN_ARCH=x86_64
 	    fi
 	fi
-	echo "$SUN_ARCH"-pc-solaris2"$(echo "$UNAME_RELEASE"|sed -e 's/[^.]*//')"
-	exit ;;
+	SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'`
+	GUESS=$SUN_ARCH-pc-solaris2$SUN_REL
+	;;
     sun4*:SunOS:6*:*)
 	# According to config.sub, this is the proper way to canonicalize
 	# SunOS6.  Hard to guess exactly what SunOS6 will be like, but
 	# it's likely to be more like Solaris than SunOS4.
-	echo sparc-sun-solaris3"$(echo "$UNAME_RELEASE"|sed -e 's/[^.]*//')"
-	exit ;;
+	SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'`
+	GUESS=sparc-sun-solaris3$SUN_REL
+	;;
     sun4*:SunOS:*:*)
-	case "$(/usr/bin/arch -k)" in
+	case `/usr/bin/arch -k` in
 	    Series*|S4*)
-		UNAME_RELEASE=$(uname -v)
+		UNAME_RELEASE=`uname -v`
 		;;
 	esac
-	# Japanese Language versions have a version number like `4.1.3-JL'.
-	echo sparc-sun-sunos"$(echo "$UNAME_RELEASE"|sed -e 's/-/_/')"
-	exit ;;
+	# Japanese Language versions have a version number like '4.1.3-JL'.
+	SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/-/_/'`
+	GUESS=sparc-sun-sunos$SUN_REL
+	;;
     sun3*:SunOS:*:*)
-	echo m68k-sun-sunos"$UNAME_RELEASE"
-	exit ;;
+	GUESS=m68k-sun-sunos$UNAME_RELEASE
+	;;
     sun*:*:4.2BSD:*)
-	UNAME_RELEASE=$( (sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null)
+	UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null`
 	test "x$UNAME_RELEASE" = x && UNAME_RELEASE=3
-	case "$(/bin/arch)" in
+	case `/bin/arch` in
 	    sun3)
-		echo m68k-sun-sunos"$UNAME_RELEASE"
+		GUESS=m68k-sun-sunos$UNAME_RELEASE
 		;;
 	    sun4)
-		echo sparc-sun-sunos"$UNAME_RELEASE"
+		GUESS=sparc-sun-sunos$UNAME_RELEASE
 		;;
 	esac
-	exit ;;
+	;;
     aushp:SunOS:*:*)
-	echo sparc-auspex-sunos"$UNAME_RELEASE"
-	exit ;;
+	GUESS=sparc-auspex-sunos$UNAME_RELEASE
+	;;
     # The situation for MiNT is a little confusing.  The machine name
     # can be virtually everything (everything which is not
     # "atarist" or "atariste" at least should have a processor
@@ -467,41 +496,41 @@ case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
     # MiNT.  But MiNT is downward compatible to TOS, so this should
     # be no problem.
     atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*)
-	echo m68k-atari-mint"$UNAME_RELEASE"
-	exit ;;
+	GUESS=m68k-atari-mint$UNAME_RELEASE
+	;;
     atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*)
-	echo m68k-atari-mint"$UNAME_RELEASE"
-	exit ;;
+	GUESS=m68k-atari-mint$UNAME_RELEASE
+	;;
     *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*)
-	echo m68k-atari-mint"$UNAME_RELEASE"
-	exit ;;
+	GUESS=m68k-atari-mint$UNAME_RELEASE
+	;;
     milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*)
-	echo m68k-milan-mint"$UNAME_RELEASE"
-	exit ;;
+	GUESS=m68k-milan-mint$UNAME_RELEASE
+	;;
     hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*)
-	echo m68k-hades-mint"$UNAME_RELEASE"
-	exit ;;
+	GUESS=m68k-hades-mint$UNAME_RELEASE
+	;;
     *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*)
-	echo m68k-unknown-mint"$UNAME_RELEASE"
-	exit ;;
+	GUESS=m68k-unknown-mint$UNAME_RELEASE
+	;;
     m68k:machten:*:*)
-	echo m68k-apple-machten"$UNAME_RELEASE"
-	exit ;;
+	GUESS=m68k-apple-machten$UNAME_RELEASE
+	;;
     powerpc:machten:*:*)
-	echo powerpc-apple-machten"$UNAME_RELEASE"
-	exit ;;
+	GUESS=powerpc-apple-machten$UNAME_RELEASE
+	;;
     RISC*:Mach:*:*)
-	echo mips-dec-mach_bsd4.3
-	exit ;;
+	GUESS=mips-dec-mach_bsd4.3
+	;;
     RISC*:ULTRIX:*:*)
-	echo mips-dec-ultrix"$UNAME_RELEASE"
-	exit ;;
+	GUESS=mips-dec-ultrix$UNAME_RELEASE
+	;;
     VAX*:ULTRIX*:*:*)
-	echo vax-dec-ultrix"$UNAME_RELEASE"
-	exit ;;
+	GUESS=vax-dec-ultrix$UNAME_RELEASE
+	;;
     2020:CLIX:*:* | 2430:CLIX:*:*)
-	echo clipper-intergraph-clix"$UNAME_RELEASE"
-	exit ;;
+	GUESS=clipper-intergraph-clix$UNAME_RELEASE
+	;;
     mips:*:*:UMIPS | mips:*:*:RISCos)
 	set_cc_for_build
 	sed 's/^	//' << EOF > "$dummy.c"
@@ -526,85 +555,87 @@ case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
 	}
 EOF
 	$CC_FOR_BUILD -o "$dummy" "$dummy.c" &&
-	  dummyarg=$(echo "$UNAME_RELEASE" | sed -n 's/\([0-9]*\).*/\1/p') &&
-	  SYSTEM_NAME=$("$dummy" "$dummyarg") &&
+	  dummyarg=`echo "$UNAME_RELEASE" | sed -n 's/\([0-9]*\).*/\1/p'` &&
+	  SYSTEM_NAME=`"$dummy" "$dummyarg"` &&
 	    { echo "$SYSTEM_NAME"; exit; }
-	echo mips-mips-riscos"$UNAME_RELEASE"
-	exit ;;
+	GUESS=mips-mips-riscos$UNAME_RELEASE
+	;;
     Motorola:PowerMAX_OS:*:*)
-	echo powerpc-motorola-powermax
-	exit ;;
+	GUESS=powerpc-motorola-powermax
+	;;
     Motorola:*:4.3:PL8-*)
-	echo powerpc-harris-powermax
-	exit ;;
+	GUESS=powerpc-harris-powermax
+	;;
     Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*)
-	echo powerpc-harris-powermax
-	exit ;;
+	GUESS=powerpc-harris-powermax
+	;;
     Night_Hawk:Power_UNIX:*:*)
-	echo powerpc-harris-powerunix
-	exit ;;
+	GUESS=powerpc-harris-powerunix
+	;;
     m88k:CX/UX:7*:*)
-	echo m88k-harris-cxux7
-	exit ;;
+	GUESS=m88k-harris-cxux7
+	;;
     m88k:*:4*:R4*)
-	echo m88k-motorola-sysv4
-	exit ;;
+	GUESS=m88k-motorola-sysv4
+	;;
     m88k:*:3*:R3*)
-	echo m88k-motorola-sysv3
-	exit ;;
+	GUESS=m88k-motorola-sysv3
+	;;
     AViiON:dgux:*:*)
 	# DG/UX returns AViiON for all architectures
-	UNAME_PROCESSOR=$(/usr/bin/uname -p)
+	UNAME_PROCESSOR=`/usr/bin/uname -p`
 	if test "$UNAME_PROCESSOR" = mc88100 || test "$UNAME_PROCESSOR" = mc88110
 	then
 	    if test "$TARGET_BINARY_INTERFACE"x = m88kdguxelfx || \
 	       test "$TARGET_BINARY_INTERFACE"x = x
 	    then
-		echo m88k-dg-dgux"$UNAME_RELEASE"
+		GUESS=m88k-dg-dgux$UNAME_RELEASE
 	    else
-		echo m88k-dg-dguxbcs"$UNAME_RELEASE"
+		GUESS=m88k-dg-dguxbcs$UNAME_RELEASE
 	    fi
 	else
-	    echo i586-dg-dgux"$UNAME_RELEASE"
+	    GUESS=i586-dg-dgux$UNAME_RELEASE
 	fi
-	exit ;;
+	;;
     M88*:DolphinOS:*:*)	# DolphinOS (SVR3)
-	echo m88k-dolphin-sysv3
-	exit ;;
+	GUESS=m88k-dolphin-sysv3
+	;;
     M88*:*:R3*:*)
 	# Delta 88k system running SVR3
-	echo m88k-motorola-sysv3
-	exit ;;
+	GUESS=m88k-motorola-sysv3
+	;;
     XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3)
-	echo m88k-tektronix-sysv3
-	exit ;;
+	GUESS=m88k-tektronix-sysv3
+	;;
     Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD)
-	echo m68k-tektronix-bsd
-	exit ;;
+	GUESS=m68k-tektronix-bsd
+	;;
     *:IRIX*:*:*)
-	echo mips-sgi-irix"$(echo "$UNAME_RELEASE"|sed -e 's/-/_/g')"
-	exit ;;
+	IRIX_REL=`echo "$UNAME_RELEASE" | sed -e 's/-/_/g'`
+	GUESS=mips-sgi-irix$IRIX_REL
+	;;
     ????????:AIX?:[12].1:2)   # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX.
-	echo romp-ibm-aix     # uname -m gives an 8 hex-code CPU id
-	exit ;;               # Note that: echo "'$(uname -s)'" gives 'AIX '
+	GUESS=romp-ibm-aix    # uname -m gives an 8 hex-code CPU id
+	;;                    # Note that: echo "'`uname -s`'" gives 'AIX '
     i*86:AIX:*:*)
-	echo i386-ibm-aix
-	exit ;;
+	GUESS=i386-ibm-aix
+	;;
     ia64:AIX:*:*)
 	if test -x /usr/bin/oslevel ; then
-		IBM_REV=$(/usr/bin/oslevel)
+		IBM_REV=`/usr/bin/oslevel`
 	else
-		IBM_REV="$UNAME_VERSION.$UNAME_RELEASE"
+		IBM_REV=$UNAME_VERSION.$UNAME_RELEASE
 	fi
-	echo "$UNAME_MACHINE"-ibm-aix"$IBM_REV"
-	exit ;;
+	GUESS=$UNAME_MACHINE-ibm-aix$IBM_REV
+	;;
     *:AIX:2:3)
 	if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then
 		set_cc_for_build
 		sed 's/^		//' << EOF > "$dummy.c"
 		#include <sys/systemcfg.h>
 
-		main()
+		int
+		main ()
 			{
 			if (!__power_pc())
 				exit(1);
@@ -612,68 +643,68 @@ EOF
 			exit(0);
 			}
 EOF
-		if $CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=$("$dummy")
+		if $CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=`"$dummy"`
 		then
-			echo "$SYSTEM_NAME"
+			GUESS=$SYSTEM_NAME
 		else
-			echo rs6000-ibm-aix3.2.5
+			GUESS=rs6000-ibm-aix3.2.5
 		fi
 	elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then
-		echo rs6000-ibm-aix3.2.4
+		GUESS=rs6000-ibm-aix3.2.4
 	else
-		echo rs6000-ibm-aix3.2
+		GUESS=rs6000-ibm-aix3.2
 	fi
-	exit ;;
+	;;
     *:AIX:*:[4567])
-	IBM_CPU_ID=$(/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }')
+	IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'`
 	if /usr/sbin/lsattr -El "$IBM_CPU_ID" | grep ' POWER' >/dev/null 2>&1; then
 		IBM_ARCH=rs6000
 	else
 		IBM_ARCH=powerpc
 	fi
 	if test -x /usr/bin/lslpp ; then
-		IBM_REV=$(/usr/bin/lslpp -Lqc bos.rte.libc |
-			   awk -F: '{ print $3 }' | sed s/[0-9]*$/0/)
+		IBM_REV=`/usr/bin/lslpp -Lqc bos.rte.libc | \
+			   awk -F: '{ print $3 }' | sed s/[0-9]*$/0/`
 	else
-		IBM_REV="$UNAME_VERSION.$UNAME_RELEASE"
+		IBM_REV=$UNAME_VERSION.$UNAME_RELEASE
 	fi
-	echo "$IBM_ARCH"-ibm-aix"$IBM_REV"
-	exit ;;
+	GUESS=$IBM_ARCH-ibm-aix$IBM_REV
+	;;
     *:AIX:*:*)
-	echo rs6000-ibm-aix
-	exit ;;
+	GUESS=rs6000-ibm-aix
+	;;
     ibmrt:4.4BSD:*|romp-ibm:4.4BSD:*)
-	echo romp-ibm-bsd4.4
-	exit ;;
+	GUESS=romp-ibm-bsd4.4
+	;;
     ibmrt:*BSD:*|romp-ibm:BSD:*)            # covers RT/PC BSD and
-	echo romp-ibm-bsd"$UNAME_RELEASE"   # 4.3 with uname added to
-	exit ;;                             # report: romp-ibm BSD 4.3
+	GUESS=romp-ibm-bsd$UNAME_RELEASE    # 4.3 with uname added to
+	;;                                  # report: romp-ibm BSD 4.3
     *:BOSX:*:*)
-	echo rs6000-bull-bosx
-	exit ;;
+	GUESS=rs6000-bull-bosx
+	;;
     DPX/2?00:B.O.S.:*:*)
-	echo m68k-bull-sysv3
-	exit ;;
+	GUESS=m68k-bull-sysv3
+	;;
     9000/[34]??:4.3bsd:1.*:*)
-	echo m68k-hp-bsd
-	exit ;;
+	GUESS=m68k-hp-bsd
+	;;
     hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*)
-	echo m68k-hp-bsd4.4
-	exit ;;
+	GUESS=m68k-hp-bsd4.4
+	;;
     9000/[34678]??:HP-UX:*:*)
-	HPUX_REV=$(echo "$UNAME_RELEASE"|sed -e 's/[^.]*.[0B]*//')
-	case "$UNAME_MACHINE" in
+	HPUX_REV=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*.[0B]*//'`
+	case $UNAME_MACHINE in
 	    9000/31?)            HP_ARCH=m68000 ;;
 	    9000/[34]??)         HP_ARCH=m68k ;;
 	    9000/[678][0-9][0-9])
 		if test -x /usr/bin/getconf; then
-		    sc_cpu_version=$(/usr/bin/getconf SC_CPU_VERSION 2>/dev/null)
-		    sc_kernel_bits=$(/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null)
-		    case "$sc_cpu_version" in
+		    sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null`
+		    sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null`
+		    case $sc_cpu_version in
 		      523) HP_ARCH=hppa1.0 ;; # CPU_PA_RISC1_0
 		      528) HP_ARCH=hppa1.1 ;; # CPU_PA_RISC1_1
 		      532)                      # CPU_PA_RISC2_0
-			case "$sc_kernel_bits" in
+			case $sc_kernel_bits in
 			  32) HP_ARCH=hppa2.0n ;;
 			  64) HP_ARCH=hppa2.0w ;;
 			  '') HP_ARCH=hppa2.0 ;;   # HP-UX 10.20
@@ -688,7 +719,8 @@ EOF
 		#include <stdlib.h>
 		#include <unistd.h>
 
-		int main ()
+		int
+		main ()
 		{
 		#if defined(_SC_KERNEL_BITS)
 		    long bits = sysconf(_SC_KERNEL_BITS);
@@ -715,7 +747,7 @@ EOF
 		    exit (0);
 		}
 EOF
-		    (CCOPTS="" $CC_FOR_BUILD -o "$dummy" "$dummy.c" 2>/dev/null) && HP_ARCH=$("$dummy")
+		    (CCOPTS="" $CC_FOR_BUILD -o "$dummy" "$dummy.c" 2>/dev/null) && HP_ARCH=`"$dummy"`
 		    test -z "$HP_ARCH" && HP_ARCH=hppa
 		fi ;;
 	esac
@@ -740,12 +772,12 @@ EOF
 		HP_ARCH=hppa64
 	    fi
 	fi
-	echo "$HP_ARCH"-hp-hpux"$HPUX_REV"
-	exit ;;
+	GUESS=$HP_ARCH-hp-hpux$HPUX_REV
+	;;
     ia64:HP-UX:*:*)
-	HPUX_REV=$(echo "$UNAME_RELEASE"|sed -e 's/[^.]*.[0B]*//')
-	echo ia64-hp-hpux"$HPUX_REV"
-	exit ;;
+	HPUX_REV=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*.[0B]*//'`
+	GUESS=ia64-hp-hpux$HPUX_REV
+	;;
     3050*:HI-UX:*:*)
 	set_cc_for_build
 	sed 's/^	//' << EOF > "$dummy.c"
@@ -773,38 +805,38 @@ EOF
 	  exit (0);
 	}
 EOF
-	$CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=$("$dummy") &&
+	$CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=`"$dummy"` &&
 		{ echo "$SYSTEM_NAME"; exit; }
-	echo unknown-hitachi-hiuxwe2
-	exit ;;
+	GUESS=unknown-hitachi-hiuxwe2
+	;;
     9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:*)
-	echo hppa1.1-hp-bsd
-	exit ;;
+	GUESS=hppa1.1-hp-bsd
+	;;
     9000/8??:4.3bsd:*:*)
-	echo hppa1.0-hp-bsd
-	exit ;;
+	GUESS=hppa1.0-hp-bsd
+	;;
     *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*)
-	echo hppa1.0-hp-mpeix
-	exit ;;
+	GUESS=hppa1.0-hp-mpeix
+	;;
     hp7??:OSF1:*:* | hp8?[79]:OSF1:*:*)
-	echo hppa1.1-hp-osf
-	exit ;;
+	GUESS=hppa1.1-hp-osf
+	;;
     hp8??:OSF1:*:*)
-	echo hppa1.0-hp-osf
-	exit ;;
+	GUESS=hppa1.0-hp-osf
+	;;
     i*86:OSF1:*:*)
 	if test -x /usr/sbin/sysversion ; then
-	    echo "$UNAME_MACHINE"-unknown-osf1mk
+	    GUESS=$UNAME_MACHINE-unknown-osf1mk
 	else
-	    echo "$UNAME_MACHINE"-unknown-osf1
+	    GUESS=$UNAME_MACHINE-unknown-osf1
 	fi
-	exit ;;
+	;;
     parisc*:Lites*:*:*)
-	echo hppa1.1-hp-lites
-	exit ;;
+	GUESS=hppa1.1-hp-lites
+	;;
     C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*)
-	echo c1-convex-bsd
-	exit ;;
+	GUESS=c1-convex-bsd
+	;;
     C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*)
 	if getsysinfo -f scalar_acc
 	then echo c32-convex-bsd
@@ -812,17 +844,18 @@ EOF
 	fi
 	exit ;;
     C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*)
-	echo c34-convex-bsd
-	exit ;;
+	GUESS=c34-convex-bsd
+	;;
     C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*)
-	echo c38-convex-bsd
-	exit ;;
+	GUESS=c38-convex-bsd
+	;;
     C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*)
-	echo c4-convex-bsd
-	exit ;;
+	GUESS=c4-convex-bsd
+	;;
     CRAY*Y-MP:*:*:*)
-	echo ymp-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'
-	exit ;;
+	CRAY_REL=`echo "$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'`
+	GUESS=ymp-cray-unicos$CRAY_REL
+	;;
     CRAY*[A-Z]90:*:*:*)
 	echo "$UNAME_MACHINE"-cray-unicos"$UNAME_RELEASE" \
 	| sed -e 's/CRAY.*\([A-Z]90\)/\1/' \
@@ -830,114 +863,155 @@ EOF
 	      -e 's/\.[^.]*$/.X/'
 	exit ;;
     CRAY*TS:*:*:*)
-	echo t90-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'
-	exit ;;
+	CRAY_REL=`echo "$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'`
+	GUESS=t90-cray-unicos$CRAY_REL
+	;;
     CRAY*T3E:*:*:*)
-	echo alphaev5-cray-unicosmk"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'
-	exit ;;
+	CRAY_REL=`echo "$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'`
+	GUESS=alphaev5-cray-unicosmk$CRAY_REL
+	;;
     CRAY*SV1:*:*:*)
-	echo sv1-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'
-	exit ;;
+	CRAY_REL=`echo "$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'`
+	GUESS=sv1-cray-unicos$CRAY_REL
+	;;
     *:UNICOS/mp:*:*)
-	echo craynv-cray-unicosmp"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'
-	exit ;;
+	CRAY_REL=`echo "$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'`
+	GUESS=craynv-cray-unicosmp$CRAY_REL
+	;;
     F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*)
-	FUJITSU_PROC=$(uname -m | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz)
-	FUJITSU_SYS=$(uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///')
-	FUJITSU_REL=$(echo "$UNAME_RELEASE" | sed -e 's/ /_/')
-	echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
-	exit ;;
+	FUJITSU_PROC=`uname -m | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz`
+	FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'`
+	FUJITSU_REL=`echo "$UNAME_RELEASE" | sed -e 's/ /_/'`
+	GUESS=${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}
+	;;
     5000:UNIX_System_V:4.*:*)
-	FUJITSU_SYS=$(uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///')
-	FUJITSU_REL=$(echo "$UNAME_RELEASE" | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/ /_/')
-	echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
-	exit ;;
+	FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'`
+	FUJITSU_REL=`echo "$UNAME_RELEASE" | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/ /_/'`
+	GUESS=sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}
+	;;
     i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*)
-	echo "$UNAME_MACHINE"-pc-bsdi"$UNAME_RELEASE"
-	exit ;;
+	GUESS=$UNAME_MACHINE-pc-bsdi$UNAME_RELEASE
+	;;
     sparc*:BSD/OS:*:*)
-	echo sparc-unknown-bsdi"$UNAME_RELEASE"
-	exit ;;
+	GUESS=sparc-unknown-bsdi$UNAME_RELEASE
+	;;
     *:BSD/OS:*:*)
-	echo "$UNAME_MACHINE"-unknown-bsdi"$UNAME_RELEASE"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-bsdi$UNAME_RELEASE
+	;;
     arm:FreeBSD:*:*)
-	UNAME_PROCESSOR=$(uname -p)
+	UNAME_PROCESSOR=`uname -p`
 	set_cc_for_build
 	if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \
 	    | grep -q __ARM_PCS_VFP
 	then
-	    echo "${UNAME_PROCESSOR}"-unknown-freebsd"$(echo ${UNAME_RELEASE}|sed -e 's/[-(].*//')"-gnueabi
+	    FREEBSD_REL=`echo "$UNAME_RELEASE" | sed -e 's/[-(].*//'`
+	    GUESS=$UNAME_PROCESSOR-unknown-freebsd$FREEBSD_REL-gnueabi
 	else
-	    echo "${UNAME_PROCESSOR}"-unknown-freebsd"$(echo ${UNAME_RELEASE}|sed -e 's/[-(].*//')"-gnueabihf
+	    FREEBSD_REL=`echo "$UNAME_RELEASE" | sed -e 's/[-(].*//'`
+	    GUESS=$UNAME_PROCESSOR-unknown-freebsd$FREEBSD_REL-gnueabihf
 	fi
-	exit ;;
+	;;
     *:FreeBSD:*:*)
-	UNAME_PROCESSOR=$(/usr/bin/uname -p)
-	case "$UNAME_PROCESSOR" in
+	UNAME_PROCESSOR=`uname -p`
+	case $UNAME_PROCESSOR in
 	    amd64)
 		UNAME_PROCESSOR=x86_64 ;;
 	    i386)
 		UNAME_PROCESSOR=i586 ;;
 	esac
-	echo "$UNAME_PROCESSOR"-unknown-freebsd"$(echo "$UNAME_RELEASE"|sed -e 's/[-(].*//')"
-	exit ;;
+	FREEBSD_REL=`echo "$UNAME_RELEASE" | sed -e 's/[-(].*//'`
+	GUESS=$UNAME_PROCESSOR-unknown-freebsd$FREEBSD_REL
+	;;
     i*:CYGWIN*:*)
-	echo "$UNAME_MACHINE"-pc-cygwin
-	exit ;;
+	GUESS=$UNAME_MACHINE-pc-cygwin
+	;;
     *:MINGW64*:*)
-	echo "$UNAME_MACHINE"-pc-mingw64
-	exit ;;
+	GUESS=$UNAME_MACHINE-pc-mingw64
+	;;
     *:MINGW*:*)
-	echo "$UNAME_MACHINE"-pc-mingw32
-	exit ;;
+	GUESS=$UNAME_MACHINE-pc-mingw32
+	;;
     *:MSYS*:*)
-	echo "$UNAME_MACHINE"-pc-msys
-	exit ;;
+	GUESS=$UNAME_MACHINE-pc-msys
+	;;
     i*:PW*:*)
-	echo "$UNAME_MACHINE"-pc-pw32
-	exit ;;
+	GUESS=$UNAME_MACHINE-pc-pw32
+	;;
+    *:SerenityOS:*:*)
+        GUESS=$UNAME_MACHINE-pc-serenity
+        ;;
     *:Interix*:*)
-	case "$UNAME_MACHINE" in
+	case $UNAME_MACHINE in
 	    x86)
-		echo i586-pc-interix"$UNAME_RELEASE"
-		exit ;;
+		GUESS=i586-pc-interix$UNAME_RELEASE
+		;;
 	    authenticamd | genuineintel | EM64T)
-		echo x86_64-unknown-interix"$UNAME_RELEASE"
-		exit ;;
+		GUESS=x86_64-unknown-interix$UNAME_RELEASE
+		;;
 	    IA64)
-		echo ia64-unknown-interix"$UNAME_RELEASE"
-		exit ;;
+		GUESS=ia64-unknown-interix$UNAME_RELEASE
+		;;
 	esac ;;
     i*:UWIN*:*)
-	echo "$UNAME_MACHINE"-pc-uwin
-	exit ;;
+	GUESS=$UNAME_MACHINE-pc-uwin
+	;;
     amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*)
-	echo x86_64-pc-cygwin
-	exit ;;
+	GUESS=x86_64-pc-cygwin
+	;;
     prep*:SunOS:5.*:*)
-	echo powerpcle-unknown-solaris2"$(echo "$UNAME_RELEASE"|sed -e 's/[^.]*//')"
-	exit ;;
+	SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'`
+	GUESS=powerpcle-unknown-solaris2$SUN_REL
+	;;
     *:GNU:*:*)
 	# the GNU system
-	echo "$(echo "$UNAME_MACHINE"|sed -e 's,[-/].*$,,')-unknown-$LIBC$(echo "$UNAME_RELEASE"|sed -e 's,/.*$,,')"
-	exit ;;
+	GNU_ARCH=`echo "$UNAME_MACHINE" | sed -e 's,[-/].*$,,'`
+	GNU_REL=`echo "$UNAME_RELEASE" | sed -e 's,/.*$,,'`
+	GUESS=$GNU_ARCH-unknown-$LIBC$GNU_REL
+	;;
     *:GNU/*:*:*)
 	# other systems with GNU libc and userland
-	echo "$UNAME_MACHINE-unknown-$(echo "$UNAME_SYSTEM" | sed 's,^[^/]*/,,' | tr "[:upper:]" "[:lower:]")$(echo "$UNAME_RELEASE"|sed -e 's/[-(].*//')-$LIBC"
-	exit ;;
+	GNU_SYS=`echo "$UNAME_SYSTEM" | sed 's,^[^/]*/,,' | tr "[:upper:]" "[:lower:]"`
+	GNU_REL=`echo "$UNAME_RELEASE" | sed -e 's/[-(].*//'`
+	GUESS=$UNAME_MACHINE-unknown-$GNU_SYS$GNU_REL-$LIBC
+	;;
+    x86_64:[Mm]anagarm:*:*|i?86:[Mm]anagarm:*:*)
+	GUESS="$UNAME_MACHINE-pc-managarm-mlibc"
+	;;
+    *:[Mm]anagarm:*:*)
+	GUESS="$UNAME_MACHINE-unknown-managarm-mlibc"
+	;;
     *:Minix:*:*)
-	echo "$UNAME_MACHINE"-unknown-minix
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-minix
+	;;
     aarch64:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	set_cc_for_build
+	CPU=$UNAME_MACHINE
+	LIBCABI=$LIBC
+	if test "$CC_FOR_BUILD" != no_compiler_found; then
+	    ABI=64
+	    sed 's/^	    //' << EOF > "$dummy.c"
+	    #ifdef __ARM_EABI__
+	    #ifdef __ARM_PCS_VFP
+	    ABI=eabihf
+	    #else
+	    ABI=eabi
+	    #endif
+	    #endif
+EOF
+	    cc_set_abi=`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^ABI' | sed 's, ,,g'`
+	    eval "$cc_set_abi"
+	    case $ABI in
+		eabi | eabihf) CPU=armv8l; LIBCABI=$LIBC$ABI ;;
+	    esac
+	fi
+	GUESS=$CPU-unknown-linux-$LIBCABI
+	;;
     aarch64_be:Linux:*:*)
 	UNAME_MACHINE=aarch64_be
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
     alpha:Linux:*:*)
-	case $(sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' /proc/cpuinfo 2>/dev/null) in
+	case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' /proc/cpuinfo 2>/dev/null` in
 	  EV5)   UNAME_MACHINE=alphaev5 ;;
 	  EV56)  UNAME_MACHINE=alphaev56 ;;
 	  PCA56) UNAME_MACHINE=alphapca56 ;;
@@ -948,63 +1022,72 @@ EOF
 	esac
 	objdump --private-headers /bin/sh | grep -q ld.so.1
 	if test "$?" = 0 ; then LIBC=gnulibc1 ; fi
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
-    arc:Linux:*:* | arceb:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
+    arc:Linux:*:* | arceb:Linux:*:* | arc32:Linux:*:* | arc64:Linux:*:*)
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
     arm*:Linux:*:*)
 	set_cc_for_build
 	if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \
 	    | grep -q __ARM_EABI__
 	then
-	    echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	    GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
 	else
 	    if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \
 		| grep -q __ARM_PCS_VFP
 	    then
-		echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"eabi
+		GUESS=$UNAME_MACHINE-unknown-linux-${LIBC}eabi
 	    else
-		echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"eabihf
+		GUESS=$UNAME_MACHINE-unknown-linux-${LIBC}eabihf
 	    fi
 	fi
-	exit ;;
+	;;
     avr32*:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
     cris:Linux:*:*)
-	echo "$UNAME_MACHINE"-axis-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-axis-linux-$LIBC
+	;;
     crisv32:Linux:*:*)
-	echo "$UNAME_MACHINE"-axis-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-axis-linux-$LIBC
+	;;
     e2k:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
     frv:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
     hexagon:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
     i*86:Linux:*:*)
-	echo "$UNAME_MACHINE"-pc-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-pc-linux-$LIBC
+	;;
     ia64:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
     k1om:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
-    loongarch32:Linux:*:* | loongarch64:Linux:*:* | loongarchx32:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
+    kvx:Linux:*:*)
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
+    kvx:cos:*:*)
+	GUESS=$UNAME_MACHINE-unknown-cos
+	;;
+    kvx:mbr:*:*)
+	GUESS=$UNAME_MACHINE-unknown-mbr
+	;;
+    loongarch32:Linux:*:* | loongarch64:Linux:*:*)
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
     m32r*:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
     m68*:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
     mips:Linux:*:* | mips64:Linux:*:*)
 	set_cc_for_build
 	IS_GLIBC=0
@@ -1049,138 +1132,150 @@ EOF
 	#endif
 	#endif
 EOF
-	eval "$($CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^CPU\|^MIPS_ENDIAN\|^LIBCABI')"
+	cc_set_vars=`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^CPU\|^MIPS_ENDIAN\|^LIBCABI'`
+	eval "$cc_set_vars"
 	test "x$CPU" != x && { echo "$CPU${MIPS_ENDIAN}-unknown-linux-$LIBCABI"; exit; }
 	;;
     mips64el:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
     openrisc*:Linux:*:*)
-	echo or1k-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=or1k-unknown-linux-$LIBC
+	;;
     or32:Linux:*:* | or1k*:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
     padre:Linux:*:*)
-	echo sparc-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=sparc-unknown-linux-$LIBC
+	;;
     parisc64:Linux:*:* | hppa64:Linux:*:*)
-	echo hppa64-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=hppa64-unknown-linux-$LIBC
+	;;
     parisc:Linux:*:* | hppa:Linux:*:*)
 	# Look for CPU level
-	case $(grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2) in
-	  PA7*) echo hppa1.1-unknown-linux-"$LIBC" ;;
-	  PA8*) echo hppa2.0-unknown-linux-"$LIBC" ;;
-	  *)    echo hppa-unknown-linux-"$LIBC" ;;
+	case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in
+	  PA7*) GUESS=hppa1.1-unknown-linux-$LIBC ;;
+	  PA8*) GUESS=hppa2.0-unknown-linux-$LIBC ;;
+	  *)    GUESS=hppa-unknown-linux-$LIBC ;;
 	esac
-	exit ;;
+	;;
     ppc64:Linux:*:*)
-	echo powerpc64-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=powerpc64-unknown-linux-$LIBC
+	;;
     ppc:Linux:*:*)
-	echo powerpc-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=powerpc-unknown-linux-$LIBC
+	;;
     ppc64le:Linux:*:*)
-	echo powerpc64le-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=powerpc64le-unknown-linux-$LIBC
+	;;
     ppcle:Linux:*:*)
-	echo powerpcle-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=powerpcle-unknown-linux-$LIBC
+	;;
     riscv32:Linux:*:* | riscv32be:Linux:*:* | riscv64:Linux:*:* | riscv64be:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
     s390:Linux:*:* | s390x:Linux:*:*)
-	echo "$UNAME_MACHINE"-ibm-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-ibm-linux-$LIBC
+	;;
     sh64*:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
     sh*:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
     sparc:Linux:*:* | sparc64:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
     tile*:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
     vax:Linux:*:*)
-	echo "$UNAME_MACHINE"-dec-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-dec-linux-$LIBC
+	;;
     x86_64:Linux:*:*)
 	set_cc_for_build
+	CPU=$UNAME_MACHINE
 	LIBCABI=$LIBC
 	if test "$CC_FOR_BUILD" != no_compiler_found; then
-	    if (echo '#ifdef __ILP32__'; echo IS_X32; echo '#endif') | \
-		(CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
-		grep IS_X32 >/dev/null
-	    then
-		LIBCABI="$LIBC"x32
-	    fi
+	    ABI=64
+	    sed 's/^	    //' << EOF > "$dummy.c"
+	    #ifdef __i386__
+	    ABI=x86
+	    #else
+	    #ifdef __ILP32__
+	    ABI=x32
+	    #endif
+	    #endif
+EOF
+	    cc_set_abi=`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^ABI' | sed 's, ,,g'`
+	    eval "$cc_set_abi"
+	    case $ABI in
+		x86) CPU=i686 ;;
+		x32) LIBCABI=${LIBC}x32 ;;
+	    esac
 	fi
-	echo "$UNAME_MACHINE"-pc-linux-"$LIBCABI"
-	exit ;;
+	GUESS=$CPU-pc-linux-$LIBCABI
+	;;
     xtensa*:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
     i*86:DYNIX/ptx:4*:*)
 	# ptx 4.0 does uname -s correctly, with DYNIX/ptx in there.
 	# earlier versions are messed up and put the nodename in both
 	# sysname and nodename.
-	echo i386-sequent-sysv4
-	exit ;;
+	GUESS=i386-sequent-sysv4
+	;;
     i*86:UNIX_SV:4.2MP:2.*)
 	# Unixware is an offshoot of SVR4, but it has its own version
 	# number series starting with 2...
 	# I am not positive that other SVR4 systems won't match this,
 	# I just have to hope.  -- rms.
 	# Use sysv4.2uw... so that sysv4* matches it.
-	echo "$UNAME_MACHINE"-pc-sysv4.2uw"$UNAME_VERSION"
-	exit ;;
+	GUESS=$UNAME_MACHINE-pc-sysv4.2uw$UNAME_VERSION
+	;;
     i*86:OS/2:*:*)
-	# If we were able to find `uname', then EMX Unix compatibility
+	# If we were able to find 'uname', then EMX Unix compatibility
 	# is probably installed.
-	echo "$UNAME_MACHINE"-pc-os2-emx
-	exit ;;
+	GUESS=$UNAME_MACHINE-pc-os2-emx
+	;;
     i*86:XTS-300:*:STOP)
-	echo "$UNAME_MACHINE"-unknown-stop
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-stop
+	;;
     i*86:atheos:*:*)
-	echo "$UNAME_MACHINE"-unknown-atheos
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-atheos
+	;;
     i*86:syllable:*:*)
-	echo "$UNAME_MACHINE"-pc-syllable
-	exit ;;
+	GUESS=$UNAME_MACHINE-pc-syllable
+	;;
     i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*)
-	echo i386-unknown-lynxos"$UNAME_RELEASE"
-	exit ;;
+	GUESS=i386-unknown-lynxos$UNAME_RELEASE
+	;;
     i*86:*DOS:*:*)
-	echo "$UNAME_MACHINE"-pc-msdosdjgpp
-	exit ;;
+	GUESS=$UNAME_MACHINE-pc-msdosdjgpp
+	;;
     i*86:*:4.*:*)
-	UNAME_REL=$(echo "$UNAME_RELEASE" | sed 's/\/MP$//')
+	UNAME_REL=`echo "$UNAME_RELEASE" | sed 's/\/MP$//'`
 	if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then
-		echo "$UNAME_MACHINE"-univel-sysv"$UNAME_REL"
+		GUESS=$UNAME_MACHINE-univel-sysv$UNAME_REL
 	else
-		echo "$UNAME_MACHINE"-pc-sysv"$UNAME_REL"
+		GUESS=$UNAME_MACHINE-pc-sysv$UNAME_REL
 	fi
-	exit ;;
+	;;
     i*86:*:5:[678]*)
 	# UnixWare 7.x, OpenUNIX and OpenServer 6.
-	case $(/bin/uname -X | grep "^Machine") in
+	case `/bin/uname -X | grep "^Machine"` in
 	    *486*)	     UNAME_MACHINE=i486 ;;
 	    *Pentium)	     UNAME_MACHINE=i586 ;;
 	    *Pent*|*Celeron) UNAME_MACHINE=i686 ;;
 	esac
-	echo "$UNAME_MACHINE-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION}"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION}
+	;;
     i*86:*:3.2:*)
 	if test -f /usr/options/cb.name; then
-		UNAME_REL=$(sed -n 's/.*Version //p' </usr/options/cb.name)
-		echo "$UNAME_MACHINE"-pc-isc"$UNAME_REL"
+		UNAME_REL=`sed -n 's/.*Version //p' </usr/options/cb.name`
+		GUESS=$UNAME_MACHINE-pc-isc$UNAME_REL
 	elif /bin/uname -X 2>/dev/null >/dev/null ; then
-		UNAME_REL=$( (/bin/uname -X|grep Release|sed -e 's/.*= //'))
+		UNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')`
 		(/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486
 		(/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \
 			&& UNAME_MACHINE=i586
@@ -1188,11 +1283,11 @@ EOF
 			&& UNAME_MACHINE=i686
 		(/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \
 			&& UNAME_MACHINE=i686
-		echo "$UNAME_MACHINE"-pc-sco"$UNAME_REL"
+		GUESS=$UNAME_MACHINE-pc-sco$UNAME_REL
 	else
-		echo "$UNAME_MACHINE"-pc-sysv32
+		GUESS=$UNAME_MACHINE-pc-sysv32
 	fi
-	exit ;;
+	;;
     pc:*:*:*)
 	# Left here for compatibility:
 	# uname -m prints for DJGPP always 'pc', but it prints nothing about
@@ -1200,37 +1295,37 @@ EOF
 	# Note: whatever this is, it MUST be the same as what config.sub
 	# prints for the "djgpp" host, or else GDB configure will decide that
 	# this is a cross-build.
-	echo i586-pc-msdosdjgpp
-	exit ;;
+	GUESS=i586-pc-msdosdjgpp
+	;;
     Intel:Mach:3*:*)
-	echo i386-pc-mach3
-	exit ;;
+	GUESS=i386-pc-mach3
+	;;
     paragon:*:*:*)
-	echo i860-intel-osf1
-	exit ;;
+	GUESS=i860-intel-osf1
+	;;
     i860:*:4.*:*) # i860-SVR4
 	if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then
-	  echo i860-stardent-sysv"$UNAME_RELEASE" # Stardent Vistra i860-SVR4
+	  GUESS=i860-stardent-sysv$UNAME_RELEASE    # Stardent Vistra i860-SVR4
 	else # Add other i860-SVR4 vendors below as they are discovered.
-	  echo i860-unknown-sysv"$UNAME_RELEASE"  # Unknown i860-SVR4
+	  GUESS=i860-unknown-sysv$UNAME_RELEASE     # Unknown i860-SVR4
 	fi
-	exit ;;
+	;;
     mini*:CTIX:SYS*5:*)
 	# "miniframe"
-	echo m68010-convergent-sysv
-	exit ;;
+	GUESS=m68010-convergent-sysv
+	;;
     mc68k:UNIX:SYSTEM5:3.51m)
-	echo m68k-convergent-sysv
-	exit ;;
+	GUESS=m68k-convergent-sysv
+	;;
     M680?0:D-NIX:5.3:*)
-	echo m68k-diab-dnix
-	exit ;;
+	GUESS=m68k-diab-dnix
+	;;
     M68*:*:R3V[5678]*:*)
 	test -r /sysV68 && { echo 'm68k-motorola-sysv'; exit; } ;;
     3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0)
 	OS_REL=''
 	test -r /etc/.relid \
-	&& OS_REL=.$(sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid)
+	&& OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid`
 	/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
 	  && { echo i486-ncr-sysv4.3"$OS_REL"; exit; }
 	/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
@@ -1241,7 +1336,7 @@ EOF
     NCR*:*:4.2:* | MPRAS*:*:4.2:*)
 	OS_REL='.3'
 	test -r /etc/.relid \
-	    && OS_REL=.$(sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid)
+	    && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid`
 	/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
 	    && { echo i486-ncr-sysv4.3"$OS_REL"; exit; }
 	/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
@@ -1249,118 +1344,121 @@ EOF
 	/bin/uname -p 2>/dev/null | /bin/grep pteron >/dev/null \
 	    && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } ;;
     m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*)
-	echo m68k-unknown-lynxos"$UNAME_RELEASE"
-	exit ;;
+	GUESS=m68k-unknown-lynxos$UNAME_RELEASE
+	;;
     mc68030:UNIX_System_V:4.*:*)
-	echo m68k-atari-sysv4
-	exit ;;
+	GUESS=m68k-atari-sysv4
+	;;
     TSUNAMI:LynxOS:2.*:*)
-	echo sparc-unknown-lynxos"$UNAME_RELEASE"
-	exit ;;
+	GUESS=sparc-unknown-lynxos$UNAME_RELEASE
+	;;
     rs6000:LynxOS:2.*:*)
-	echo rs6000-unknown-lynxos"$UNAME_RELEASE"
-	exit ;;
+	GUESS=rs6000-unknown-lynxos$UNAME_RELEASE
+	;;
     PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*)
-	echo powerpc-unknown-lynxos"$UNAME_RELEASE"
-	exit ;;
+	GUESS=powerpc-unknown-lynxos$UNAME_RELEASE
+	;;
     SM[BE]S:UNIX_SV:*:*)
-	echo mips-dde-sysv"$UNAME_RELEASE"
-	exit ;;
+	GUESS=mips-dde-sysv$UNAME_RELEASE
+	;;
     RM*:ReliantUNIX-*:*:*)
-	echo mips-sni-sysv4
-	exit ;;
+	GUESS=mips-sni-sysv4
+	;;
     RM*:SINIX-*:*:*)
-	echo mips-sni-sysv4
-	exit ;;
+	GUESS=mips-sni-sysv4
+	;;
     *:SINIX-*:*:*)
 	if uname -p 2>/dev/null >/dev/null ; then
-		UNAME_MACHINE=$( (uname -p) 2>/dev/null)
-		echo "$UNAME_MACHINE"-sni-sysv4
+		UNAME_MACHINE=`(uname -p) 2>/dev/null`
+		GUESS=$UNAME_MACHINE-sni-sysv4
 	else
-		echo ns32k-sni-sysv
+		GUESS=ns32k-sni-sysv
 	fi
-	exit ;;
-    PENTIUM:*:4.0*:*)	# Unisys `ClearPath HMP IX 4000' SVR4/MP effort
+	;;
+    PENTIUM:*:4.0*:*)	# Unisys 'ClearPath HMP IX 4000' SVR4/MP effort
 			# says <Richard.M.Bartel@ccMail.Census.GOV>
-	echo i586-unisys-sysv4
-	exit ;;
+	GUESS=i586-unisys-sysv4
+	;;
     *:UNIX_System_V:4*:FTX*)
 	# From Gerald Hewes <hewes@openmarket.com>.
 	# How about differentiating between stratus architectures? -djm
-	echo hppa1.1-stratus-sysv4
-	exit ;;
+	GUESS=hppa1.1-stratus-sysv4
+	;;
     *:*:*:FTX*)
 	# From seanf@swdc.stratus.com.
-	echo i860-stratus-sysv4
-	exit ;;
+	GUESS=i860-stratus-sysv4
+	;;
     i*86:VOS:*:*)
 	# From Paul.Green@stratus.com.
-	echo "$UNAME_MACHINE"-stratus-vos
-	exit ;;
+	GUESS=$UNAME_MACHINE-stratus-vos
+	;;
     *:VOS:*:*)
 	# From Paul.Green@stratus.com.
-	echo hppa1.1-stratus-vos
-	exit ;;
+	GUESS=hppa1.1-stratus-vos
+	;;
     mc68*:A/UX:*:*)
-	echo m68k-apple-aux"$UNAME_RELEASE"
-	exit ;;
+	GUESS=m68k-apple-aux$UNAME_RELEASE
+	;;
     news*:NEWS-OS:6*:*)
-	echo mips-sony-newsos6
-	exit ;;
+	GUESS=mips-sony-newsos6
+	;;
     R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*)
 	if test -d /usr/nec; then
-		echo mips-nec-sysv"$UNAME_RELEASE"
+		GUESS=mips-nec-sysv$UNAME_RELEASE
 	else
-		echo mips-unknown-sysv"$UNAME_RELEASE"
+		GUESS=mips-unknown-sysv$UNAME_RELEASE
 	fi
-	exit ;;
+	;;
     BeBox:BeOS:*:*)	# BeOS running on hardware made by Be, PPC only.
-	echo powerpc-be-beos
-	exit ;;
+	GUESS=powerpc-be-beos
+	;;
     BeMac:BeOS:*:*)	# BeOS running on Mac or Mac clone, PPC only.
-	echo powerpc-apple-beos
-	exit ;;
+	GUESS=powerpc-apple-beos
+	;;
     BePC:BeOS:*:*)	# BeOS running on Intel PC compatible.
-	echo i586-pc-beos
-	exit ;;
+	GUESS=i586-pc-beos
+	;;
     BePC:Haiku:*:*)	# Haiku running on Intel PC compatible.
-	echo i586-pc-haiku
-	exit ;;
-    x86_64:Haiku:*:*)
-	echo x86_64-unknown-haiku
-	exit ;;
+	GUESS=i586-pc-haiku
+	;;
+    ppc:Haiku:*:*)	# Haiku running on Apple PowerPC
+	GUESS=powerpc-apple-haiku
+	;;
+    *:Haiku:*:*)	# Haiku modern gcc (not bound by BeOS compat)
+	GUESS=$UNAME_MACHINE-unknown-haiku
+	;;
     SX-4:SUPER-UX:*:*)
-	echo sx4-nec-superux"$UNAME_RELEASE"
-	exit ;;
+	GUESS=sx4-nec-superux$UNAME_RELEASE
+	;;
     SX-5:SUPER-UX:*:*)
-	echo sx5-nec-superux"$UNAME_RELEASE"
-	exit ;;
+	GUESS=sx5-nec-superux$UNAME_RELEASE
+	;;
     SX-6:SUPER-UX:*:*)
-	echo sx6-nec-superux"$UNAME_RELEASE"
-	exit ;;
+	GUESS=sx6-nec-superux$UNAME_RELEASE
+	;;
     SX-7:SUPER-UX:*:*)
-	echo sx7-nec-superux"$UNAME_RELEASE"
-	exit ;;
+	GUESS=sx7-nec-superux$UNAME_RELEASE
+	;;
     SX-8:SUPER-UX:*:*)
-	echo sx8-nec-superux"$UNAME_RELEASE"
-	exit ;;
+	GUESS=sx8-nec-superux$UNAME_RELEASE
+	;;
     SX-8R:SUPER-UX:*:*)
-	echo sx8r-nec-superux"$UNAME_RELEASE"
-	exit ;;
+	GUESS=sx8r-nec-superux$UNAME_RELEASE
+	;;
     SX-ACE:SUPER-UX:*:*)
-	echo sxace-nec-superux"$UNAME_RELEASE"
-	exit ;;
+	GUESS=sxace-nec-superux$UNAME_RELEASE
+	;;
     Power*:Rhapsody:*:*)
-	echo powerpc-apple-rhapsody"$UNAME_RELEASE"
-	exit ;;
+	GUESS=powerpc-apple-rhapsody$UNAME_RELEASE
+	;;
     *:Rhapsody:*:*)
-	echo "$UNAME_MACHINE"-apple-rhapsody"$UNAME_RELEASE"
-	exit ;;
+	GUESS=$UNAME_MACHINE-apple-rhapsody$UNAME_RELEASE
+	;;
     arm64:Darwin:*:*)
-	echo aarch64-apple-darwin"$UNAME_RELEASE"
-	exit ;;
+	GUESS=aarch64-apple-darwin$UNAME_RELEASE
+	;;
     *:Darwin:*:*)
-	UNAME_PROCESSOR=$(uname -p)
+	UNAME_PROCESSOR=`uname -p`
 	case $UNAME_PROCESSOR in
 	    unknown) UNAME_PROCESSOR=powerpc ;;
 	esac
@@ -1394,109 +1492,125 @@ EOF
 	    # uname -m returns i386 or x86_64
 	    UNAME_PROCESSOR=$UNAME_MACHINE
 	fi
-	echo "$UNAME_PROCESSOR"-apple-darwin"$UNAME_RELEASE"
-	exit ;;
+	GUESS=$UNAME_PROCESSOR-apple-darwin$UNAME_RELEASE
+	;;
     *:procnto*:*:* | *:QNX:[0123456789]*:*)
-	UNAME_PROCESSOR=$(uname -p)
+	UNAME_PROCESSOR=`uname -p`
 	if test "$UNAME_PROCESSOR" = x86; then
 		UNAME_PROCESSOR=i386
 		UNAME_MACHINE=pc
 	fi
-	echo "$UNAME_PROCESSOR"-"$UNAME_MACHINE"-nto-qnx"$UNAME_RELEASE"
-	exit ;;
+	GUESS=$UNAME_PROCESSOR-$UNAME_MACHINE-nto-qnx$UNAME_RELEASE
+	;;
     *:QNX:*:4*)
-	echo i386-pc-qnx
-	exit ;;
+	GUESS=i386-pc-qnx
+	;;
     NEO-*:NONSTOP_KERNEL:*:*)
-	echo neo-tandem-nsk"$UNAME_RELEASE"
-	exit ;;
+	GUESS=neo-tandem-nsk$UNAME_RELEASE
+	;;
     NSE-*:NONSTOP_KERNEL:*:*)
-	echo nse-tandem-nsk"$UNAME_RELEASE"
-	exit ;;
+	GUESS=nse-tandem-nsk$UNAME_RELEASE
+	;;
     NSR-*:NONSTOP_KERNEL:*:*)
-	echo nsr-tandem-nsk"$UNAME_RELEASE"
-	exit ;;
+	GUESS=nsr-tandem-nsk$UNAME_RELEASE
+	;;
     NSV-*:NONSTOP_KERNEL:*:*)
-	echo nsv-tandem-nsk"$UNAME_RELEASE"
-	exit ;;
+	GUESS=nsv-tandem-nsk$UNAME_RELEASE
+	;;
     NSX-*:NONSTOP_KERNEL:*:*)
-	echo nsx-tandem-nsk"$UNAME_RELEASE"
-	exit ;;
+	GUESS=nsx-tandem-nsk$UNAME_RELEASE
+	;;
     *:NonStop-UX:*:*)
-	echo mips-compaq-nonstopux
-	exit ;;
+	GUESS=mips-compaq-nonstopux
+	;;
     BS2000:POSIX*:*:*)
-	echo bs2000-siemens-sysv
-	exit ;;
+	GUESS=bs2000-siemens-sysv
+	;;
     DS/*:UNIX_System_V:*:*)
-	echo "$UNAME_MACHINE"-"$UNAME_SYSTEM"-"$UNAME_RELEASE"
-	exit ;;
+	GUESS=$UNAME_MACHINE-$UNAME_SYSTEM-$UNAME_RELEASE
+	;;
     *:Plan9:*:*)
 	# "uname -m" is not consistent, so use $cputype instead. 386
 	# is converted to i386 for consistency with other x86
 	# operating systems.
-	# shellcheck disable=SC2154
-	if test "$cputype" = 386; then
+	if test "${cputype-}" = 386; then
 	    UNAME_MACHINE=i386
-	else
-	    UNAME_MACHINE="$cputype"
+	elif test "x${cputype-}" != x; then
+	    UNAME_MACHINE=$cputype
 	fi
-	echo "$UNAME_MACHINE"-unknown-plan9
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-plan9
+	;;
     *:TOPS-10:*:*)
-	echo pdp10-unknown-tops10
-	exit ;;
+	GUESS=pdp10-unknown-tops10
+	;;
     *:TENEX:*:*)
-	echo pdp10-unknown-tenex
-	exit ;;
+	GUESS=pdp10-unknown-tenex
+	;;
     KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*)
-	echo pdp10-dec-tops20
-	exit ;;
+	GUESS=pdp10-dec-tops20
+	;;
     XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*)
-	echo pdp10-xkl-tops20
-	exit ;;
+	GUESS=pdp10-xkl-tops20
+	;;
     *:TOPS-20:*:*)
-	echo pdp10-unknown-tops20
-	exit ;;
+	GUESS=pdp10-unknown-tops20
+	;;
     *:ITS:*:*)
-	echo pdp10-unknown-its
-	exit ;;
+	GUESS=pdp10-unknown-its
+	;;
     SEI:*:*:SEIUX)
-	echo mips-sei-seiux"$UNAME_RELEASE"
-	exit ;;
+	GUESS=mips-sei-seiux$UNAME_RELEASE
+	;;
     *:DragonFly:*:*)
-	echo "$UNAME_MACHINE"-unknown-dragonfly"$(echo "$UNAME_RELEASE"|sed -e 's/[-(].*//')"
-	exit ;;
+	DRAGONFLY_REL=`echo "$UNAME_RELEASE" | sed -e 's/[-(].*//'`
+	GUESS=$UNAME_MACHINE-unknown-dragonfly$DRAGONFLY_REL
+	;;
     *:*VMS:*:*)
-	UNAME_MACHINE=$( (uname -p) 2>/dev/null)
-	case "$UNAME_MACHINE" in
-	    A*) echo alpha-dec-vms ; exit ;;
-	    I*) echo ia64-dec-vms ; exit ;;
-	    V*) echo vax-dec-vms ; exit ;;
+	UNAME_MACHINE=`(uname -p) 2>/dev/null`
+	case $UNAME_MACHINE in
+	    A*) GUESS=alpha-dec-vms ;;
+	    I*) GUESS=ia64-dec-vms ;;
+	    V*) GUESS=vax-dec-vms ;;
 	esac ;;
     *:XENIX:*:SysV)
-	echo i386-pc-xenix
-	exit ;;
+	GUESS=i386-pc-xenix
+	;;
     i*86:skyos:*:*)
-	echo "$UNAME_MACHINE"-pc-skyos"$(echo "$UNAME_RELEASE" | sed -e 's/ .*$//')"
-	exit ;;
+	SKYOS_REL=`echo "$UNAME_RELEASE" | sed -e 's/ .*$//'`
+	GUESS=$UNAME_MACHINE-pc-skyos$SKYOS_REL
+	;;
     i*86:rdos:*:*)
-	echo "$UNAME_MACHINE"-pc-rdos
-	exit ;;
-    i*86:AROS:*:*)
-	echo "$UNAME_MACHINE"-pc-aros
-	exit ;;
+	GUESS=$UNAME_MACHINE-pc-rdos
+	;;
+    i*86:Fiwix:*:*)
+	GUESS=$UNAME_MACHINE-pc-fiwix
+	;;
+    *:AROS:*:*)
+	GUESS=$UNAME_MACHINE-unknown-aros
+	;;
     x86_64:VMkernel:*:*)
-	echo "$UNAME_MACHINE"-unknown-esx
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-esx
+	;;
     amd64:Isilon\ OneFS:*:*)
-	echo x86_64-unknown-onefs
-	exit ;;
+	GUESS=x86_64-unknown-onefs
+	;;
     *:Unleashed:*:*)
-	echo "$UNAME_MACHINE"-unknown-unleashed"$UNAME_RELEASE"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-unleashed$UNAME_RELEASE
+	;;
+    x86_64:[Ii]ronclad:*:*|i?86:[Ii]ronclad:*:*)
+	GUESS=$UNAME_MACHINE-pc-ironclad-mlibc
+	;;
+    *:[Ii]ronclad:*:*)
+	GUESS=$UNAME_MACHINE-unknown-ironclad-mlibc
+	;;
 esac
 
+# Do we have a guess based on uname results?
+if test "x$GUESS" != x; then
+    echo "$GUESS"
+    exit
+fi
+
 # No uname command or uname output not recognized.
 set_cc_for_build
 cat > "$dummy.c" <<EOF
@@ -1512,6 +1626,7 @@ cat > "$dummy.c" <<EOF
 #endif
 #endif
 #endif
+int
 main ()
 {
 #if defined (sony)
@@ -1536,7 +1651,7 @@ main ()
 #define __ARCHITECTURE__ "m68k"
 #endif
   int version;
-  version=$( (hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null);
+  version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`;
   if (version < 4)
     printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version);
   else
@@ -1628,7 +1743,7 @@ main ()
 }
 EOF
 
-$CC_FOR_BUILD -o "$dummy" "$dummy.c" 2>/dev/null && SYSTEM_NAME=$($dummy) &&
+$CC_FOR_BUILD -o "$dummy" "$dummy.c" 2>/dev/null && SYSTEM_NAME=`"$dummy"` &&
 	{ echo "$SYSTEM_NAME"; exit; }
 
 # Apollos put the system type in the environment.
@@ -1636,7 +1751,7 @@ test -d /usr/apollo && { echo "$ISP-apollo-$SYSTYPE"; exit; }
 
 echo "$0: unable to guess system type" >&2
 
-case "$UNAME_MACHINE:$UNAME_SYSTEM" in
+case $UNAME_MACHINE:$UNAME_SYSTEM in
     mips:Linux | mips64:Linux)
 	# If we got here on MIPS GNU/Linux, output extra information.
 	cat >&2 <<EOF
@@ -1658,9 +1773,11 @@ and
   https://git.savannah.gnu.org/cgit/config.git/plain/config.sub
 EOF
 
-year=$(echo $timestamp | sed 's,-.*,,')
+our_year=`echo $timestamp | sed 's,-.*,,'`
+thisyear=`date +%Y`
 # shellcheck disable=SC2003
-if test "$(expr "$(date +%Y)" - "$year")" -lt 3 ; then
+script_age=`expr "$thisyear" - "$our_year"`
+if test "$script_age" -lt 3 ; then
    cat >&2 <<EOF
 
 If $0 has already been updated, send the following data and any
@@ -1669,20 +1786,20 @@ provide the necessary information to handle your system.
 
 config.guess timestamp = $timestamp
 
-uname -m = $( (uname -m) 2>/dev/null || echo unknown)
-uname -r = $( (uname -r) 2>/dev/null || echo unknown)
-uname -s = $( (uname -s) 2>/dev/null || echo unknown)
-uname -v = $( (uname -v) 2>/dev/null || echo unknown)
+uname -m = `(uname -m) 2>/dev/null || echo unknown`
+uname -r = `(uname -r) 2>/dev/null || echo unknown`
+uname -s = `(uname -s) 2>/dev/null || echo unknown`
+uname -v = `(uname -v) 2>/dev/null || echo unknown`
 
-/usr/bin/uname -p = $( (/usr/bin/uname -p) 2>/dev/null)
-/bin/uname -X     = $( (/bin/uname -X) 2>/dev/null)
+/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null`
+/bin/uname -X     = `(/bin/uname -X) 2>/dev/null`
 
-hostinfo               = $( (hostinfo) 2>/dev/null)
-/bin/universe          = $( (/bin/universe) 2>/dev/null)
-/usr/bin/arch -k       = $( (/usr/bin/arch -k) 2>/dev/null)
-/bin/arch              = $( (/bin/arch) 2>/dev/null)
-/usr/bin/oslevel       = $( (/usr/bin/oslevel) 2>/dev/null)
-/usr/convex/getsysinfo = $( (/usr/convex/getsysinfo) 2>/dev/null)
+hostinfo               = `(hostinfo) 2>/dev/null`
+/bin/universe          = `(/bin/universe) 2>/dev/null`
+/usr/bin/arch -k       = `(/usr/bin/arch -k) 2>/dev/null`
+/bin/arch              = `(/bin/arch) 2>/dev/null`
+/usr/bin/oslevel       = `(/usr/bin/oslevel) 2>/dev/null`
+/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null`
 
 UNAME_MACHINE = "$UNAME_MACHINE"
 UNAME_RELEASE = "$UNAME_RELEASE"
@@ -1694,8 +1811,8 @@ fi
 exit 1
 
 # Local variables:
-# eval: (add-hook 'before-save-hook 'time-stamp)
+# eval: (add-hook 'before-save-hook 'time-stamp nil t)
 # time-stamp-start: "timestamp='"
-# time-stamp-format: "%:y-%02m-%02d"
+# time-stamp-format: "%Y-%02m-%02d"
 # time-stamp-end: "'"
 # End:
diff --git a/build-aux/config.sub b/build-aux/config.sub
index b0f84923..3d35cde1 100755
--- a/build-aux/config.sub
+++ b/build-aux/config.sub
@@ -1,12 +1,14 @@
 #! /bin/sh
 # Configuration validation subroutine script.
-#   Copyright 1992-2021 Free Software Foundation, Inc.
+#   Copyright 1992-2025 Free Software Foundation, Inc.
 
-timestamp='2021-01-07'
+# shellcheck disable=SC2006,SC2268,SC2162 # see below for rationale
+
+timestamp='2025-07-10'
 
 # This file is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or
+# the Free Software Foundation, either version 3 of the License, or
 # (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful, but
@@ -50,7 +52,14 @@ timestamp='2021-01-07'
 #	CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM
 # It is wrong to echo any other type of specification.
 
-me=$(echo "$0" | sed -e 's,.*/,,')
+# The "shellcheck disable" line above the timestamp inhibits complaints
+# about features and limitations of the classic Bourne shell that were
+# superseded or lifted in POSIX.  However, this script identifies a wide
+# variety of pre-POSIX systems that do not have POSIX shells at all, and
+# even some reasonably current systems (Solaris 10 as case-in-point) still
+# have a pre-POSIX /bin/sh.
+
+me=`echo "$0" | sed -e 's,.*/,,'`
 
 usage="\
 Usage: $0 [OPTION] CPU-MFR-OPSYS or ALIAS
@@ -67,13 +76,13 @@ Report bugs and patches to <config-patches@gnu.org>."
 version="\
 GNU config.sub ($timestamp)
 
-Copyright 1992-2021 Free Software Foundation, Inc.
+Copyright 1992-2025 Free Software Foundation, Inc.
 
 This is free software; see the source for copying conditions.  There is NO
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
 
 help="
-Try \`$me --help' for more information."
+Try '$me --help' for more information."
 
 # Parse command line
 while test $# -gt 0 ; do
@@ -111,15 +120,16 @@ case $# in
 esac
 
 # Split fields of configuration type
-# shellcheck disable=SC2162
+saved_IFS=$IFS
 IFS="-" read field1 field2 field3 field4 <<EOF
 $1
 EOF
+IFS=$saved_IFS
 
 # Separate into logical components for further validation
 case $1 in
 	*-*-*-*-*)
-		echo Invalid configuration \`"$1"\': more than four components >&2
+		echo "Invalid configuration '$1': more than four components" >&2
 		exit 1
 		;;
 	*-*-*-*)
@@ -131,10 +141,22 @@ case $1 in
 		# parts
 		maybe_os=$field2-$field3
 		case $maybe_os in
-			nto-qnx* | linux-* | uclinux-uclibc* \
-			| uclinux-gnu* | kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* \
-			| netbsd*-eabi* | kopensolaris*-gnu* | cloudabi*-eabi* \
-			| storm-chaos* | os2-emx* | rtmk-nova*)
+			  cloudabi*-eabi* \
+			| kfreebsd*-gnu* \
+			| knetbsd*-gnu* \
+			| kopensolaris*-gnu* \
+			| ironclad-* \
+			| linux-* \
+			| managarm-* \
+			| netbsd*-eabi* \
+			| netbsd*-gnu* \
+			| nto-qnx* \
+			| os2-emx* \
+			| rtmk-nova* \
+			| storm-chaos* \
+			| uclinux-gnu* \
+			| uclinux-uclibc* \
+			| windows-* )
 				basic_machine=$field1
 				basic_os=$maybe_os
 				;;
@@ -149,8 +171,12 @@ case $1 in
 		esac
 		;;
 	*-*)
-		# A lone config we happen to match not fitting any pattern
 		case $field1-$field2 in
+			# Shorthands that happen to contain a single dash
+			convex-c[12] | convex-c3[248])
+				basic_machine=$field2-convex
+				basic_os=
+				;;
 			decstation-3100)
 				basic_machine=mips-dec
 				basic_os=
@@ -158,24 +184,87 @@ case $1 in
 			*-*)
 				# Second component is usually, but not always the OS
 				case $field2 in
-					# Prevent following clause from handling this valid os
+					# Do not treat sunos as a manufacturer
 					sun*os*)
 						basic_machine=$field1
 						basic_os=$field2
 						;;
 					# Manufacturers
-					dec* | mips* | sequent* | encore* | pc533* | sgi* | sony* \
-					| att* | 7300* | 3300* | delta* | motorola* | sun[234]* \
-					| unicom* | ibm* | next | hp | isi* | apollo | altos* \
-					| convergent* | ncr* | news | 32* | 3600* | 3100* \
-					| hitachi* | c[123]* | convex* | sun | crds | omron* | dg \
-					| ultra | tti* | harris | dolphin | highlevel | gould \
-					| cbm | ns | masscomp | apple | axis | knuth | cray \
-					| microblaze* | sim | cisco \
-					| oki | wec | wrs | winbond)
+					  3100* \
+					| 32* \
+					| 3300* \
+					| 3600* \
+					| 7300* \
+					| acorn \
+					| altos* \
+					| apollo \
+					| apple \
+					| atari \
+					| att* \
+					| axis \
+					| be \
+					| bull \
+					| cbm \
+					| ccur \
+					| cisco \
+					| commodore \
+					| convergent* \
+					| convex* \
+					| cray \
+					| crds \
+					| dec* \
+					| delta* \
+					| dg \
+					| digital \
+					| dolphin \
+					| encore* \
+					| gould \
+					| harris \
+					| highlevel \
+					| hitachi* \
+					| hp \
+					| ibm* \
+					| intergraph \
+					| isi* \
+					| knuth \
+					| masscomp \
+					| microblaze* \
+					| mips* \
+					| motorola* \
+					| ncr* \
+					| news \
+					| next \
+					| ns \
+					| oki \
+					| omron* \
+					| pc533* \
+					| rebel \
+					| rom68k \
+					| rombug \
+					| semi \
+					| sequent* \
+					| sgi* \
+					| siemens \
+					| sim \
+					| sni \
+					| sony* \
+					| stratus \
+					| sun \
+					| sun[234]* \
+					| tektronix \
+					| tti* \
+					| ultra \
+					| unicom* \
+					| wec \
+					| winbond \
+					| wrs)
 						basic_machine=$field1-$field2
 						basic_os=
 						;;
+					tock* | zephyr*)
+						basic_machine=$field1-unknown
+						basic_os=$field2
+						;;
 					*)
 						basic_machine=$field1
 						basic_os=$field2
@@ -256,26 +345,6 @@ case $1 in
 				basic_machine=arm-unknown
 				basic_os=cegcc
 				;;
-			convex-c1)
-				basic_machine=c1-convex
-				basic_os=bsd
-				;;
-			convex-c2)
-				basic_machine=c2-convex
-				basic_os=bsd
-				;;
-			convex-c32)
-				basic_machine=c32-convex
-				basic_os=bsd
-				;;
-			convex-c34)
-				basic_machine=c34-convex
-				basic_os=bsd
-				;;
-			convex-c38)
-				basic_machine=c38-convex
-				basic_os=bsd
-				;;
 			cray)
 				basic_machine=j90-cray
 				basic_os=unicos
@@ -698,15 +767,26 @@ case $basic_machine in
 		vendor=dec
 		basic_os=tops20
 		;;
-	delta | 3300 | motorola-3300 | motorola-delta \
-	      | 3300-motorola | delta-motorola)
+	delta | 3300 | delta-motorola | 3300-motorola | motorola-delta | motorola-3300)
 		cpu=m68k
 		vendor=motorola
 		;;
-	dpx2*)
+	# This used to be dpx2*, but that gets the RS6000-based
+	# DPX/20 and the x86-based DPX/2-100 wrong.  See
+	# https://oldskool.silicium.org/stations/bull_dpx20.htm
+	# https://www.feb-patrimoine.com/english/bull_dpx2.htm
+	# https://www.feb-patrimoine.com/english/unix_and_bull.htm
+	dpx2 | dpx2[23]00 | dpx2[23]xx)
 		cpu=m68k
 		vendor=bull
-		basic_os=sysv3
+		;;
+	dpx2100 | dpx21xx)
+		cpu=i386
+		vendor=bull
+		;;
+	dpx20)
+		cpu=rs6000
+		vendor=bull
 		;;
 	encore | umax | mmax)
 		cpu=ns32k
@@ -769,22 +849,22 @@ case $basic_machine in
 		vendor=hp
 		;;
 	i*86v32)
-		cpu=$(echo "$1" | sed -e 's/86.*/86/')
+		cpu=`echo "$1" | sed -e 's/86.*/86/'`
 		vendor=pc
 		basic_os=sysv32
 		;;
 	i*86v4*)
-		cpu=$(echo "$1" | sed -e 's/86.*/86/')
+		cpu=`echo "$1" | sed -e 's/86.*/86/'`
 		vendor=pc
 		basic_os=sysv4
 		;;
 	i*86v)
-		cpu=$(echo "$1" | sed -e 's/86.*/86/')
+		cpu=`echo "$1" | sed -e 's/86.*/86/'`
 		vendor=pc
 		basic_os=sysv
 		;;
 	i*86sol2)
-		cpu=$(echo "$1" | sed -e 's/86.*/86/')
+		cpu=`echo "$1" | sed -e 's/86.*/86/'`
 		vendor=pc
 		basic_os=solaris2
 		;;
@@ -821,18 +901,6 @@ case $basic_machine in
 	next | m*-next)
 		cpu=m68k
 		vendor=next
-		case $basic_os in
-		    openstep*)
-		        ;;
-		    nextstep*)
-			;;
-		    ns2*)
-		      basic_os=nextstep2
-			;;
-		    *)
-		      basic_os=nextstep3
-			;;
-		esac
 		;;
 	np1)
 		cpu=np1
@@ -917,16 +985,17 @@ case $basic_machine in
 		;;
 	leon-*|leon[3-9]-*)
 		cpu=sparc
-		vendor=$(echo "$basic_machine" | sed 's/-.*//')
+		vendor=`echo "$basic_machine" | sed 's/-.*//'`
 		;;
 
 	*-*)
-		# shellcheck disable=SC2162
+		saved_IFS=$IFS
 		IFS="-" read cpu vendor <<EOF
 $basic_machine
 EOF
+		IFS=$saved_IFS
 		;;
-	# We use `pc' rather than `unknown'
+	# We use 'pc' rather than 'unknown'
 	# because (1) that's what they normally are, and
 	# (2) the word "unknown" tends to confuse beginning users.
 	i*86 | x86_64)
@@ -954,15 +1023,19 @@ unset -v basic_machine
 
 # Decode basic machines in the full and proper CPU-Company form.
 case $cpu-$vendor in
-	# Here we handle the default manufacturer of certain CPU types in canonical form. It is in
-	# some cases the only manufacturer, in others, it is the most popular.
+	# Here we handle the default manufacturer of certain CPU types in canonical form.
+	# It is in some cases the only manufacturer, in others, it is the most popular.
+	c[12]-convex | c[12]-unknown | c3[248]-convex | c3[248]-unknown)
+		vendor=convex
+		basic_os=${basic_os:-bsd}
+		;;
 	craynv-unknown)
 		vendor=cray
 		basic_os=${basic_os:-unicosmp}
 		;;
 	c90-unknown | c90-cray)
 		vendor=cray
-		basic_os=${Basic_os:-unicos}
+		basic_os=${basic_os:-unicos}
 		;;
 	fx80-unknown)
 		vendor=alliant
@@ -1003,11 +1076,34 @@ case $cpu-$vendor in
 		;;
 
 	# Here we normalize CPU types with a missing or matching vendor
-	dpx20-unknown | dpx20-bull)
-		cpu=rs6000
-		vendor=bull
+	armh-unknown | armh-alt)
+		cpu=armv7l
+		vendor=alt
+		basic_os=${basic_os:-linux-gnueabihf}
+		;;
+
+	# Normalized CPU+vendor pairs that imply an OS, if not otherwise specified
+	m68k-isi)
+		basic_os=${basic_os:-sysv}
+		;;
+	m68k-sony)
+		basic_os=${basic_os:-newsos}
+		;;
+	m68k-tektronix)
+		basic_os=${basic_os:-bsd}
+		;;
+	m88k-harris)
+		basic_os=${basic_os:-sysv3}
+		;;
+	i386-bull | m68k-bull)
+		basic_os=${basic_os:-sysv3}
+		;;
+	rs6000-bull)
 		basic_os=${basic_os:-bosx}
 		;;
+	mips-sni)
+		basic_os=${basic_os:-sysv4}
+		;;
 
 	# Here we normalize CPU types irrespective of the vendor
 	amd64-*)
@@ -1015,7 +1111,7 @@ case $cpu-$vendor in
 		;;
 	blackfin-*)
 		cpu=bfin
-		basic_os=linux
+		basic_os=${basic_os:-linux}
 		;;
 	c54x-*)
 		cpu=tic54x
@@ -1038,7 +1134,7 @@ case $cpu-$vendor in
 		;;
 	m68knommu-*)
 		cpu=m68k
-		basic_os=linux
+		basic_os=${basic_os:-linux}
 		;;
 	m9s12z-* | m68hcs12z-* | hcs12z-* | s12z-*)
 		cpu=s12z
@@ -1048,12 +1144,12 @@ case $cpu-$vendor in
 		;;
 	parisc-*)
 		cpu=hppa
-		basic_os=linux
+		basic_os=${basic_os:-linux}
 		;;
 	pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*)
 		cpu=i586
 		;;
-	pentiumpro-* | p6-* | 6x86-* | athlon-* | athalon_*-*)
+	pentiumpro-* | p6-* | 6x86-* | athlon-* | athlon_*-*)
 		cpu=i686
 		;;
 	pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*)
@@ -1062,9 +1158,6 @@ case $cpu-$vendor in
 	pentium4-*)
 		cpu=i786
 		;;
-	pc98-*)
-		cpu=i386
-		;;
 	ppc-* | ppcbe-*)
 		cpu=powerpc
 		;;
@@ -1084,7 +1177,7 @@ case $cpu-$vendor in
 		cpu=mipsisa64sb1el
 		;;
 	sh5e[lb]-*)
-		cpu=$(echo "$cpu" | sed 's/^\(sh.\)e\(.\)$/\1\2e/')
+		cpu=`echo "$cpu" | sed 's/^\(sh.\)e\(.\)$/\1\2e/'`
 		;;
 	spur-*)
 		cpu=spur
@@ -1098,13 +1191,10 @@ case $cpu-$vendor in
 	tx39el-*)
 		cpu=mipstx39el
 		;;
-	x64-*)
-		cpu=x86_64
-		;;
 	xscale-* | xscalee[bl]-*)
-		cpu=$(echo "$cpu" | sed 's/^xscale/arm/')
+		cpu=`echo "$cpu" | sed 's/^xscale/arm/'`
 		;;
-	arm64-*)
+	arm64-* | aarch64le-* | arm64_32-*)
 		cpu=aarch64
 		;;
 
@@ -1156,110 +1246,232 @@ case $cpu-$vendor in
 		# Recognize the canonical CPU types that are allowed with any
 		# company name.
 		case $cpu in
-			1750a | 580 \
+			  1750a \
+			| 580 \
+			| [cjt]90 \
 			| a29k \
-			| aarch64 | aarch64_be \
+			| aarch64 \
+			| aarch64_be \
+			| aarch64c \
 			| abacus \
-			| alpha | alphaev[4-8] | alphaev56 | alphaev6[78] \
-			| alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] \
-			| alphapca5[67] | alpha64pca5[67] \
+			| alpha \
+			| alpha64 \
+			| alpha64ev56 \
+			| alpha64ev6[78] \
+			| alpha64ev[4-8] \
+			| alpha64pca5[67] \
+			| alphaev56 \
+			| alphaev6[78] \
+			| alphaev[4-8] \
+			| alphapca5[67] \
 			| am33_2.0 \
 			| amdgcn \
-			| arc | arceb \
-			| arm | arm[lb]e | arme[lb] | armv* \
-			| avr | avr32 \
+			| arc \
+			| arc32 \
+			| arc64 \
+			| arceb \
+			| arm \
+			| arm64e \
+			| arm64ec \
+			| arm[lb]e \
+			| arme[lb] \
+			| armv* \
 			| asmjs \
+			| avr \
+			| avr32 \
 			| ba \
-			| be32 | be64 \
-			| bfin | bpf | bs2000 \
-			| c[123]* | c30 | [cjt]90 | c4x \
-			| c8051 | clipper | craynv | csky | cydra \
-			| d10v | d30v | dlx | dsp16xx \
-			| e2k | elxsi | epiphany \
-			| f30[01] | f700 | fido | fr30 | frv | ft32 | fx80 \
-			| h8300 | h8500 \
-			| hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
+			| be32 \
+			| be64 \
+			| bfin \
+			| bpf \
+			| bs2000 \
+			| c30 \
+			| c4x \
+			| c8051 \
+			| c[123]* \
+			| clipper \
+			| craynv \
+			| csky \
+			| cydra \
+			| d10v \
+			| d30v \
+			| dlx \
+			| dsp16xx \
+			| e2k \
+			| elxsi \
+			| epiphany \
+			| f30[01] \
+			| f700 \
+			| fido \
+			| fr30 \
+			| frv \
+			| ft32 \
+			| fx80 \
+			| h8300 \
+			| h8500 \
 			| hexagon \
-			| i370 | i*86 | i860 | i960 | ia16 | ia64 \
-			| ip2k | iq2000 \
+			| hppa \
+			| hppa1.[01] \
+			| hppa2.0 \
+			| hppa2.0[nw] \
+			| hppa64 \
+			| i*86 \
+			| i370 \
+			| i860 \
+			| i960 \
+			| ia16 \
+			| ia64 \
+			| intelgt \
+			| ip2k \
+			| iq2000 \
+			| javascript \
 			| k1om \
-			| le32 | le64 \
+			| kvx \
+			| le32 \
+			| le64 \
 			| lm32 \
-			| loongarch32 | loongarch64 | loongarchx32 \
-			| m32c | m32r | m32rle \
-			| m5200 | m68000 | m680[012346]0 | m68360 | m683?2 | m68k \
-			| m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x \
-			| m88110 | m88k | maxq | mb | mcore | mep | metag \
-			| microblaze | microblazeel \
-			| mips | mipsbe | mipseb | mipsel | mipsle \
-			| mips16 \
-			| mips64 | mips64eb | mips64el \
-			| mips64octeon | mips64octeonel \
-			| mips64orion | mips64orionel \
-			| mips64r5900 | mips64r5900el \
-			| mips64vr | mips64vrel \
-			| mips64vr4100 | mips64vr4100el \
-			| mips64vr4300 | mips64vr4300el \
-			| mips64vr5000 | mips64vr5000el \
-			| mips64vr5900 | mips64vr5900el \
-			| mipsisa32 | mipsisa32el \
-			| mipsisa32r2 | mipsisa32r2el \
-			| mipsisa32r6 | mipsisa32r6el \
-			| mipsisa64 | mipsisa64el \
-			| mipsisa64r2 | mipsisa64r2el \
-			| mipsisa64r6 | mipsisa64r6el \
-			| mipsisa64sb1 | mipsisa64sb1el \
-			| mipsisa64sr71k | mipsisa64sr71kel \
-			| mipsr5900 | mipsr5900el \
-			| mipstx39 | mipstx39el \
+			| loongarch32 \
+			| loongarch64 \
+			| m32c \
+			| m32r \
+			| m32rle \
+			| m5200 \
+			| m68000 \
+			| m680[012346]0 \
+			| m6811 \
+			| m6812 \
+			| m68360 \
+			| m683?2 \
+			| m68hc11 \
+			| m68hc12 \
+			| m68hcs12x \
+			| m68k \
+			| m88110 \
+			| m88k \
+			| maxq \
+			| mb \
+			| mcore \
+			| mep \
+			| metag \
+			| microblaze \
+			| microblazeel \
+			| mips* \
 			| mmix \
-			| mn10200 | mn10300 \
+			| mn10200 \
+			| mn10300 \
 			| moxie \
-			| mt \
 			| msp430 \
-			| nds32 | nds32le | nds32be \
+			| mt \
+			| nanomips* \
+			| nds32 \
+			| nds32be \
+			| nds32le \
 			| nfp \
-			| nios | nios2 | nios2eb | nios2el \
-			| none | np1 | ns16k | ns32k | nvptx \
+			| nios \
+			| nios2 \
+			| nios2eb \
+			| nios2el \
+			| none \
+			| np1 \
+			| ns16k \
+			| ns32k \
+			| nvptx \
 			| open8 \
 			| or1k* \
 			| or32 \
 			| orion \
+			| pdp10 \
+			| pdp11 \
 			| picochip \
-			| pdp10 | pdp11 | pj | pjl | pn | power \
-			| powerpc | powerpc64 | powerpc64le | powerpcle | powerpcspe \
+			| pj \
+			| pjl \
+			| pn \
+			| power \
+			| powerpc \
+			| powerpc64 \
+			| powerpc64le \
+			| powerpcle \
+			| powerpcspe \
 			| pru \
 			| pyramid \
-			| riscv | riscv32 | riscv32be | riscv64 | riscv64be \
-			| rl78 | romp | rs6000 | rx \
-			| s390 | s390x \
+			| riscv \
+			| riscv32 \
+			| riscv32be \
+			| riscv64 \
+			| riscv64be \
+			| rl78 \
+			| romp \
+			| rs6000 \
+			| rx \
+			| s390 \
+			| s390x \
 			| score \
-			| sh | shl \
-			| sh[1234] | sh[24]a | sh[24]ae[lb] | sh[23]e | she[lb] | sh[lb]e \
-			| sh[1234]e[lb] |  sh[12345][lb]e | sh[23]ele | sh64 | sh64le \
-			| sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet \
+			| sh \
+			| sh64 \
+			| sh64le \
+			| sh[12345][lb]e \
+			| sh[1234] \
+			| sh[1234]e[lb] \
+			| sh[23]e \
+			| sh[23]ele \
+			| sh[24]a \
+			| sh[24]ae[lb] \
+			| sh[lb]e \
+			| she[lb] \
+			| shl \
+			| sparc \
+			| sparc64 \
+			| sparc64b \
+			| sparc64v \
+			| sparc86x \
+			| sparclet \
 			| sparclite \
-			| sparcv8 | sparcv9 | sparcv9b | sparcv9v | sv1 | sx* \
+			| sparcv8 \
+			| sparcv9 \
+			| sparcv9b \
+			| sparcv9v \
 			| spu \
+			| sv1 \
+			| sx* \
 			| tahoe \
 			| thumbv7* \
-			| tic30 | tic4x | tic54x | tic55x | tic6x | tic80 \
+			| tic30 \
+			| tic4x \
+			| tic54x \
+			| tic55x \
+			| tic6x \
+			| tic80 \
 			| tron \
 			| ubicom32 \
-			| v70 | v850 | v850e | v850e1 | v850es | v850e2 | v850e2v3 \
+			| v70 \
+			| v810 \
+			| v850 \
+			| v850e \
+			| v850e1 \
+			| v850e2 \
+			| v850e2v3 \
+			| v850es \
 			| vax \
+			| vc4 \
 			| visium \
 			| w65 \
-			| wasm32 | wasm64 \
+			| wasm32 \
+			| wasm64 \
 			| we32k \
-			| x86 | x86_64 | xc16x | xgate | xps100 \
-			| xstormy16 | xtensa* \
+			| x86 \
+			| x86_64 \
+			| xc16x \
+			| xgate \
+			| xps100 \
+			| xstormy16 \
+			| xtensa* \
 			| ymp \
-			| z8k | z80)
+			| z80 \
+			| z8k)
 				;;
 
 			*)
-				echo Invalid configuration \`"$1"\': machine \`"$cpu-$vendor"\' not recognized 1>&2
+				echo "Invalid configuration '$1': machine '$cpu-$vendor' not recognized" 1>&2
 				exit 1
 				;;
 		esac
@@ -1280,38 +1492,48 @@ esac
 
 # Decode manufacturer-specific aliases for certain operating systems.
 
-if test x$basic_os != x
+if test x"$basic_os" != x
 then
 
-# First recognize some ad-hoc caes, or perhaps split kernel-os, or else just
+# First recognize some ad-hoc cases, or perhaps split kernel-os, or else just
 # set os.
+obj=
 case $basic_os in
 	gnu/linux*)
 		kernel=linux
-		os=$(echo $basic_os | sed -e 's|gnu/linux|gnu|')
+		os=`echo "$basic_os" | sed -e 's|gnu/linux|gnu|'`
 		;;
 	os2-emx)
 		kernel=os2
-		os=$(echo $basic_os | sed -e 's|os2-emx|emx|')
+		os=`echo "$basic_os" | sed -e 's|os2-emx|emx|'`
 		;;
 	nto-qnx*)
 		kernel=nto
-		os=$(echo $basic_os | sed -e 's|nto-qnx|qnx|')
+		os=`echo "$basic_os" | sed -e 's|nto-qnx|qnx|'`
 		;;
 	*-*)
-		# shellcheck disable=SC2162
+		saved_IFS=$IFS
 		IFS="-" read kernel os <<EOF
 $basic_os
 EOF
+		IFS=$saved_IFS
 		;;
 	# Default OS when just kernel was specified
 	nto*)
 		kernel=nto
-		os=$(echo $basic_os | sed -e 's|nto|qnx|')
+		os=`echo "$basic_os" | sed -e 's|nto|qnx|'`
+		;;
+	ironclad*)
+		kernel=ironclad
+		os=`echo "$basic_os" | sed -e 's|ironclad|mlibc|'`
 		;;
 	linux*)
 		kernel=linux
-		os=$(echo $basic_os | sed -e 's|linux|gnu|')
+		os=`echo "$basic_os" | sed -e 's|linux|gnu|'`
+		;;
+	managarm*)
+		kernel=managarm
+		os=`echo "$basic_os" | sed -e 's|managarm|mlibc|'`
 		;;
 	*)
 		kernel=
@@ -1332,7 +1554,7 @@ case $os in
 		os=cnk
 		;;
 	solaris1 | solaris1.*)
-		os=$(echo $os | sed -e 's|solaris1|sunos4|')
+		os=`echo "$os" | sed -e 's|solaris1|sunos4|'`
 		;;
 	solaris)
 		os=solaris2
@@ -1340,6 +1562,23 @@ case $os in
 	unixware*)
 		os=sysv4.2uw
 		;;
+	# The marketing names for NeXT's operating systems were
+	# NeXTSTEP, NeXTSTEP 2, OpenSTEP 3, OpenSTEP 4.  'openstep' is
+	# mapped to 'openstep3', but 'openstep1' and 'openstep2' are
+	# mapped to 'nextstep' and 'nextstep2', consistent with the
+	# treatment of SunOS/Solaris.
+	ns | ns1 | nextstep | nextstep1 | openstep1)
+		os=nextstep
+		;;
+	ns2 | nextstep2 | openstep2)
+		os=nextstep2
+		;;
+	ns3 | nextstep3 | openstep | openstep3)
+		os=openstep3
+		;;
+	ns4 | nextstep4 | openstep4)
+		os=openstep4
+		;;
 	# es1800 is here to avoid being matched by es* (a different OS)
 	es1800*)
 		os=ose
@@ -1361,7 +1600,7 @@ case $os in
 		os=sco3.2v4
 		;;
 	sco3.2.[4-9]*)
-		os=$(echo $os | sed -e 's/sco3.2./sco3.2v/')
+		os=`echo "$os" | sed -e 's/sco3.2./sco3.2v/'`
 		;;
 	sco*v* | scout)
 		# Don't match below
@@ -1391,7 +1630,7 @@ case $os in
 		os=lynxos
 		;;
 	mac[0-9]*)
-		os=$(echo "$os" | sed -e 's|mac|macos|')
+		os=`echo "$os" | sed -e 's|mac|macos|'`
 		;;
 	opened*)
 		os=openedition
@@ -1400,16 +1639,17 @@ case $os in
 		os=os400
 		;;
 	sunos5*)
-		os=$(echo "$os" | sed -e 's|sunos5|solaris2|')
+		os=`echo "$os" | sed -e 's|sunos5|solaris2|'`
 		;;
 	sunos6*)
-		os=$(echo "$os" | sed -e 's|sunos6|solaris3|')
+		os=`echo "$os" | sed -e 's|sunos6|solaris3|'`
 		;;
 	wince*)
 		os=wince
 		;;
 	utek*)
 		os=bsd
+		vendor=`echo "$vendor" | sed -e 's|^unknown$|tektronix|'`
 		;;
 	dynix*)
 		os=bsd
@@ -1426,21 +1666,25 @@ case $os in
 	386bsd)
 		os=bsd
 		;;
-	ctix* | uts*)
+	ctix*)
+		os=sysv
+		vendor=`echo "$vendor" | sed -e 's|^unknown$|convergent|'`
+		;;
+	uts*)
 		os=sysv
 		;;
 	nova*)
-		os=rtmk-nova
-		;;
-	ns2)
-		os=nextstep2
+		kernel=rtmk
+		os=nova
 		;;
 	# Preserve the version number of sinix5.
 	sinix5.*)
-		os=$(echo $os | sed -e 's|sinix|sysv|')
+		os=`echo "$os" | sed -e 's|sinix|sysv|'`
+		vendor=`echo "$vendor" | sed -e 's|^unknown$|sni|'`
 		;;
 	sinix*)
 		os=sysv4
+		vendor=`echo "$vendor" | sed -e 's|^unknown$|sni|'`
 		;;
 	tpf*)
 		os=tpf
@@ -1478,10 +1722,16 @@ case $os in
 			os=eabi
 			;;
 		    *)
-			os=elf
+			os=
+			obj=elf
 			;;
 		esac
 		;;
+	aout* | coff* | elf* | pe*)
+		# These are machine code file formats, not OSes
+		obj=$os
+		os=
+		;;
 	*)
 		# No normalization, but not necessarily accepted, that comes below.
 		;;
@@ -1500,12 +1750,15 @@ else
 # system, and we'll never get to this point.
 
 kernel=
+obj=
 case $cpu-$vendor in
 	score-*)
-		os=elf
+		os=
+		obj=elf
 		;;
 	spu-*)
-		os=elf
+		os=
+		obj=elf
 		;;
 	*-acorn)
 		os=riscix1.2
@@ -1515,28 +1768,35 @@ case $cpu-$vendor in
 		os=gnu
 		;;
 	arm*-semi)
-		os=aout
+		os=
+		obj=aout
 		;;
 	c4x-* | tic4x-*)
-		os=coff
+		os=
+		obj=coff
 		;;
 	c8051-*)
-		os=elf
+		os=
+		obj=elf
 		;;
 	clipper-intergraph)
 		os=clix
 		;;
 	hexagon-*)
-		os=elf
+		os=
+		obj=elf
 		;;
 	tic54x-*)
-		os=coff
+		os=
+		obj=coff
 		;;
 	tic55x-*)
-		os=coff
+		os=
+		obj=coff
 		;;
 	tic6x-*)
-		os=coff
+		os=
+		obj=coff
 		;;
 	# This must come before the *-dec entry.
 	pdp10-*)
@@ -1558,28 +1818,43 @@ case $cpu-$vendor in
 		os=sunos3
 		;;
 	m68*-cisco)
-		os=aout
+		os=
+		obj=aout
 		;;
 	mep-*)
-		os=elf
+		os=
+		obj=elf
+		;;
+	# The -sgi and -siemens entries must be before the mips- entry
+	# or we get the wrong os.
+	*-sgi)
+		os=irix
+		;;
+	*-siemens)
+		os=sysv4
 		;;
 	mips*-cisco)
-		os=elf
+		os=
+		obj=elf
 		;;
-	mips*-*)
-		os=elf
+	mips*-*|nanomips*-*)
+		os=
+		obj=elf
 		;;
 	or32-*)
-		os=coff
+		os=
+		obj=coff
 		;;
-	*-tti)	# must be before sparc entry or we get the wrong os.
+	# This must be before the sparc-* entry or we get the wrong os.
+	*-tti)
 		os=sysv3
 		;;
 	sparc-* | *-sun)
 		os=sunos4.1.1
 		;;
 	pru-*)
-		os=elf
+		os=
+		obj=elf
 		;;
 	*-be)
 		os=beos
@@ -1603,7 +1878,7 @@ case $cpu-$vendor in
 		os=hpux
 		;;
 	*-hitachi)
-		os=hiux
+		os=hiuxwe2
 		;;
 	i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent)
 		os=sysv
@@ -1647,12 +1922,6 @@ case $cpu-$vendor in
 	*-encore)
 		os=bsd
 		;;
-	*-sgi)
-		os=irix
-		;;
-	*-siemens)
-		os=sysv4
-		;;
 	*-masscomp)
 		os=rtu
 		;;
@@ -1660,10 +1929,12 @@ case $cpu-$vendor in
 		os=uxpv
 		;;
 	*-rom68k)
-		os=coff
+		os=
+		obj=coff
 		;;
 	*-*bug)
-		os=coff
+		os=
+		obj=coff
 		;;
 	*-apple)
 		os=macos
@@ -1681,87 +1952,325 @@ esac
 
 fi
 
-# Now, validate our (potentially fixed-up) OS.
+# Now, validate our (potentially fixed-up) individual pieces (OS, OBJ).
+
 case $os in
-	# Sometimes we do "kernel-abi", so those need to count as OSes.
-	musl* | newlib* | uclibc*)
+	# Sometimes we do "kernel-libc", so those need to count as OSes.
+	llvm* | musl* | newlib* | relibc* | uclibc*)
 		;;
-	# Likewise for "kernel-libc"
+	# Likewise for "kernel-abi"
 	eabi* | gnueabi*)
 		;;
+	# VxWorks passes extra cpu info in the 4th filed.
+	simlinux | simwindows | spe)
+		;;
+	# See `case $cpu-$os` validation below
+	ghcjs)
+		;;
 	# Now accept the basic system types.
-	# The portable systems comes first.
 	# Each alternative MUST end in a * to match a version number.
-	gnu* | android* | bsd* | mach* | minix* | genix* | ultrix* | irix* \
-	     | *vms* | esix* | aix* | cnk* | sunos | sunos[34]* \
-	     | hpux* | unos* | osf* | luna* | dgux* | auroraux* | solaris* \
-	     | sym* |  plan9* | psp* | sim* | xray* | os68k* | v88r* \
-	     | hiux* | abug | nacl* | netware* | windows* \
-	     | os9* | macos* | osx* | ios* \
-	     | mpw* | magic* | mmixware* | mon960* | lnews* \
-	     | amigaos* | amigados* | msdos* | newsos* | unicos* | aof* \
-	     | aos* | aros* | cloudabi* | sortix* | twizzler* \
-	     | nindy* | vxsim* | vxworks* | ebmon* | hms* | mvs* \
-	     | clix* | riscos* | uniplus* | iris* | isc* | rtu* | xenix* \
-	     | mirbsd* | netbsd* | dicos* | openedition* | ose* \
-	     | bitrig* | openbsd* | solidbsd* | libertybsd* | os108* \
-	     | ekkobsd* | freebsd* | riscix* | lynxos* | os400* \
-	     | bosx* | nextstep* | cxux* | aout* | elf* | oabi* \
-	     | ptx* | coff* | ecoff* | winnt* | domain* | vsta* \
-	     | udi* | lites* | ieee* | go32* | aux* | hcos* \
-	     | chorusrdb* | cegcc* | glidix* \
-	     | cygwin* | msys* | pe* | moss* | proelf* | rtems* \
-	     | midipix* | mingw32* | mingw64* | mint* \
-	     | uxpv* | beos* | mpeix* | udk* | moxiebox* \
-	     | interix* | uwin* | mks* | rhapsody* | darwin* \
-	     | openstep* | oskit* | conix* | pw32* | nonstopux* \
-	     | storm-chaos* | tops10* | tenex* | tops20* | its* \
-	     | os2* | vos* | palmos* | uclinux* | nucleus* | morphos* \
-	     | scout* | superux* | sysv* | rtmk* | tpf* | windiss* \
-	     | powermax* | dnix* | nx6 | nx7 | sei* | dragonfly* \
-	     | skyos* | haiku* | rdos* | toppers* | drops* | es* \
-	     | onefs* | tirtos* | phoenix* | fuchsia* | redox* | bme* \
-	     | midnightbsd* | amdhsa* | unleashed* | emscripten* | wasi* \
-	     | nsk* | powerunix* | genode* | zvmoe* | qnx* | emx*)
+	  abug \
+	| aix* \
+	| amdhsa* \
+	| amigados* \
+	| amigaos* \
+	| android* \
+	| aof* \
+	| aos* \
+	| aros* \
+	| atheos* \
+	| auroraux* \
+	| aux* \
+	| banan_os* \
+	| beos* \
+	| bitrig* \
+	| bme* \
+	| bosx* \
+	| bsd* \
+	| cegcc* \
+	| chorusos* \
+	| chorusrdb* \
+	| clix* \
+	| cloudabi* \
+	| cnk* \
+	| conix* \
+	| cos* \
+	| cxux* \
+	| cygwin* \
+	| darwin* \
+	| dgux* \
+	| dicos* \
+	| dnix* \
+	| domain* \
+	| dragonfly* \
+	| drops* \
+	| ebmon* \
+	| ecoff* \
+	| ekkobsd* \
+	| emscripten* \
+	| emx* \
+	| es* \
+	| fiwix* \
+	| freebsd* \
+	| fuchsia* \
+	| genix* \
+	| genode* \
+	| glidix* \
+	| gnu* \
+	| go32* \
+	| haiku* \
+	| hcos* \
+	| hiux* \
+	| hms* \
+	| hpux* \
+	| ieee* \
+	| interix* \
+	| ios* \
+	| iris* \
+	| irix* \
+	| isc* \
+	| its* \
+	| l4re* \
+	| libertybsd* \
+	| lites* \
+	| lnews* \
+	| luna* \
+	| lynxos* \
+	| mach* \
+	| macos* \
+	| magic* \
+	| mbr* \
+	| midipix* \
+	| midnightbsd* \
+	| mingw32* \
+	| mingw64* \
+	| minix* \
+	| mint* \
+	| mirbsd* \
+	| mks* \
+	| mlibc* \
+	| mmixware* \
+	| mon960* \
+	| morphos* \
+	| moss* \
+	| moxiebox* \
+	| mpeix* \
+	| mpw* \
+	| msdos* \
+	| msys* \
+	| mvs* \
+	| nacl* \
+	| netbsd* \
+	| netware* \
+	| newsos* \
+	| nextstep* \
+	| nindy* \
+	| nonstopux* \
+	| nova* \
+	| nsk* \
+	| nucleus* \
+	| nx6 \
+	| nx7 \
+	| oabi* \
+	| ohos* \
+	| onefs* \
+	| openbsd* \
+	| openedition* \
+	| openstep* \
+	| os108* \
+	| os2* \
+	| os400* \
+	| os68k* \
+	| os9* \
+	| ose* \
+	| osf* \
+	| oskit* \
+	| osx* \
+	| palmos* \
+	| phoenix* \
+	| plan9* \
+	| powermax* \
+	| powerunix* \
+	| proelf* \
+	| psos* \
+	| psp* \
+	| ptx* \
+	| pw32* \
+	| qnx* \
+	| rdos* \
+	| redox* \
+	| rhapsody* \
+	| riscix* \
+	| riscos* \
+	| rtems* \
+	| rtmk* \
+	| rtu* \
+	| scout* \
+	| secbsd* \
+	| sei* \
+	| serenity* \
+	| sim* \
+	| skyos* \
+	| solaris* \
+	| solidbsd* \
+	| sortix* \
+	| storm-chaos* \
+	| sunos \
+	| sunos[34]* \
+	| superux* \
+	| syllable* \
+	| sym* \
+	| sysv* \
+	| tenex* \
+	| tirtos* \
+	| tock* \
+	| toppers* \
+	| tops10* \
+	| tops20* \
+	| tpf* \
+	| tvos* \
+	| twizzler* \
+	| uclinux* \
+	| udi* \
+	| udk* \
+	| ultrix* \
+	| unicos* \
+	| uniplus* \
+	| unleashed* \
+	| unos* \
+	| uwin* \
+	| uxpv* \
+	| v88r* \
+	|*vms* \
+	| vos* \
+	| vsta* \
+	| vxsim* \
+	| vxworks* \
+	| wasi* \
+	| watchos* \
+	| wince* \
+	| windiss* \
+	| windows* \
+	| winnt* \
+	| xenix* \
+	| xray* \
+	| zephyr* \
+	| zvmoe* )
 		;;
 	# This one is extra strict with allowed versions
 	sco3.2v2 | sco3.2v[4-9]* | sco5v6*)
 		# Don't forget version if it is 3.2v4 or newer.
 		;;
+	# This refers to builds using the UEFI calling convention
+	# (which depends on the architecture) and PE file format.
+	# Note that this is both a different calling convention and
+	# different file format than that of GNU-EFI
+	# (x86_64-w64-mingw32).
+	uefi)
+		;;
 	none)
 		;;
+	kernel* | msvc* )
+		# Restricted further below
+		;;
+	'')
+		if test x"$obj" = x
+		then
+			echo "Invalid configuration '$1': Blank OS only allowed with explicit machine code file format" 1>&2
+		fi
+		;;
 	*)
-		echo Invalid configuration \`"$1"\': OS \`"$os"\' not recognized 1>&2
+		echo "Invalid configuration '$1': OS '$os' not recognized" 1>&2
+		exit 1
+		;;
+esac
+
+case $obj in
+	aout* | coff* | elf* | pe*)
+		;;
+	'')
+		# empty is fine
+		;;
+	*)
+		echo "Invalid configuration '$1': Machine code format '$obj' not recognized" 1>&2
+		exit 1
+		;;
+esac
+
+# Here we handle the constraint that a (synthetic) cpu and os are
+# valid only in combination with each other and nowhere else.
+case $cpu-$os in
+	# The "javascript-unknown-ghcjs" triple is used by GHC; we
+	# accept it here in order to tolerate that, but reject any
+	# variations.
+	javascript-ghcjs)
+		;;
+	javascript-* | *-ghcjs)
+		echo "Invalid configuration '$1': cpu '$cpu' is not valid with os '$os$obj'" 1>&2
 		exit 1
 		;;
 esac
 
 # As a final step for OS-related things, validate the OS-kernel combination
 # (given a valid OS), if there is a kernel.
-case $kernel-$os in
-	linux-gnu* | linux-dietlibc* | linux-android* | linux-newlib* | linux-musl* | linux-uclibc* )
+case $kernel-$os-$obj in
+	linux-gnu*- | linux-android*- | linux-dietlibc*- | linux-llvm*- \
+		    | linux-mlibc*- | linux-musl*- | linux-newlib*- \
+		    | linux-relibc*- | linux-uclibc*- | linux-ohos*- )
 		;;
-	uclinux-uclibc* )
+	uclinux-uclibc*- | uclinux-gnu*- )
 		;;
-	-dietlibc* | -newlib* | -musl* | -uclibc* )
+	ironclad-mlibc*-)
+		;;
+	managarm-mlibc*- | managarm-kernel*- )
+		;;
+	windows*-msvc*-)
+		;;
+	-dietlibc*- | -llvm*- | -mlibc*- | -musl*- | -newlib*- | -relibc*- \
+		    | -uclibc*- )
 		# These are just libc implementations, not actual OSes, and thus
 		# require a kernel.
-		echo "Invalid configuration \`$1': libc \`$os' needs explicit kernel." 1>&2
+		echo "Invalid configuration '$1': libc '$os' needs explicit kernel." 1>&2
 		exit 1
 		;;
-	kfreebsd*-gnu* | kopensolaris*-gnu*)
+	-kernel*- )
+		echo "Invalid configuration '$1': '$os' needs explicit kernel." 1>&2
+		exit 1
 		;;
-	nto-qnx*)
+	*-kernel*- )
+		echo "Invalid configuration '$1': '$kernel' does not support '$os'." 1>&2
+		exit 1
 		;;
-	os2-emx)
+	*-msvc*- )
+		echo "Invalid configuration '$1': '$os' needs 'windows'." 1>&2
+		exit 1
 		;;
-	*-eabi* | *-gnueabi*)
+	kfreebsd*-gnu*- | knetbsd*-gnu*- | netbsd*-gnu*- | kopensolaris*-gnu*-)
 		;;
-	-*)
+	vxworks-simlinux- | vxworks-simwindows- | vxworks-spe-)
+		;;
+	nto-qnx*-)
+		;;
+	os2-emx-)
+		;;
+	rtmk-nova-)
+		;;
+	*-eabi*- | *-gnueabi*-)
+		;;
+	ios*-simulator- | tvos*-simulator- | watchos*-simulator- )
+		;;
+	none--*)
+		# None (no kernel, i.e. freestanding / bare metal),
+		# can be paired with an machine code file format
+		;;
+	-*-)
 		# Blank kernel with real OS is always fine.
 		;;
-	*-*)
-		echo "Invalid configuration \`$1': Kernel \`$kernel' not known to work with OS \`$os'." 1>&2
+	--*)
+		# Blank kernel and OS with real machine code file format is always fine.
+		;;
+	*-*-*)
+		echo "Invalid configuration '$1': Kernel '$kernel' not known to work with OS '$os'." 1>&2
 		exit 1
 		;;
 esac
@@ -1774,7 +2283,7 @@ case $vendor in
 			*-riscix*)
 				vendor=acorn
 				;;
-			*-sunos*)
+			*-sunos* | *-solaris*)
 				vendor=sun
 				;;
 			*-cnk* | *-aix*)
@@ -1844,12 +2353,12 @@ case $vendor in
 		;;
 esac
 
-echo "$cpu-$vendor-${kernel:+$kernel-}$os"
+echo "$cpu-$vendor${kernel:+-$kernel}${os:+-$os}${obj:+-$obj}"
 exit
 
 # Local variables:
-# eval: (add-hook 'before-save-hook 'time-stamp)
+# eval: (add-hook 'before-save-hook 'time-stamp nil t)
 # time-stamp-start: "timestamp='"
-# time-stamp-format: "%:y-%02m-%02d"
+# time-stamp-format: "%Y-%02m-%02d"
 # time-stamp-end: "'"
 # End:

From ce02945070fc569a3fe80ccc01cb5cb6feb4d858 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aur=C3=A9lien=20Brooke?= <dev@abrooke.fr>
Date: Thu, 4 Sep 2025 08:55:47 +0200
Subject: [PATCH 326/395] Add missing thread_event_registry.c to Visual Studio
 projects

This file was added by b2a35a905f8d3c89529914987407ef33e6b05cec.
---
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj | 1 +
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj | 1 +
 msvc/projects/vc2019/jemalloc/jemalloc.vcxproj | 1 +
 msvc/projects/vc2022/jemalloc/jemalloc.vcxproj | 1 +
 4 files changed, 4 insertions(+)

diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 9743e10b..fff77a4b 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -95,6 +95,7 @@
     <ClCompile Include="..\..\..\..\src\tcache.c" />
     <ClCompile Include="..\..\..\..\src\test_hooks.c" />
     <ClCompile Include="..\..\..\..\src\thread_event.c" />
+    <ClCompile Include="..\..\..\..\src\thread_event_registry.c" />
     <ClCompile Include="..\..\..\..\src\ticker.c" />
     <ClCompile Include="..\..\..\..\src\tsd.c" />
     <ClCompile Include="..\..\..\..\src\util.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index c1ff11a9..53d4af8d 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -95,6 +95,7 @@
     <ClCompile Include="..\..\..\..\src\tcache.c" />
     <ClCompile Include="..\..\..\..\src\test_hooks.c" />
     <ClCompile Include="..\..\..\..\src\thread_event.c" />
+    <ClCompile Include="..\..\..\..\src\thread_event_registry.c" />
     <ClCompile Include="..\..\..\..\src\ticker.c" />
     <ClCompile Include="..\..\..\..\src\tsd.c" />
     <ClCompile Include="..\..\..\..\src\util.c" />
diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
index 6cb1b35e..10514d35 100644
--- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
@@ -95,6 +95,7 @@
     <ClCompile Include="..\..\..\..\src\tcache.c" />
     <ClCompile Include="..\..\..\..\src\test_hooks.c" />
     <ClCompile Include="..\..\..\..\src\thread_event.c" />
+    <ClCompile Include="..\..\..\..\src\thread_event_registry.c" />
     <ClCompile Include="..\..\..\..\src\ticker.c" />
     <ClCompile Include="..\..\..\..\src\tsd.c" />
     <ClCompile Include="..\..\..\..\src\util.c" />
diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
index 5c7b00a2..cda827be 100644
--- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
@@ -95,6 +95,7 @@
     <ClCompile Include="..\..\..\..\src\tcache.c" />
     <ClCompile Include="..\..\..\..\src\test_hooks.c" />
     <ClCompile Include="..\..\..\..\src\thread_event.c" />
+    <ClCompile Include="..\..\..\..\src\thread_event_registry.c" />
     <ClCompile Include="..\..\..\..\src\ticker.c" />
     <ClCompile Include="..\..\..\..\src\tsd.c" />
     <ClCompile Include="..\..\..\..\src\util.c" />

From daf44173c54f2e388210bc7f03b4e9bfd938597c Mon Sep 17 00:00:00 2001
From: Carl Shapiro <cshapiro@meta.com>
Date: Thu, 11 Sep 2025 14:46:35 -0700
Subject: [PATCH 327/395] Replace an instance of indentation with spaces with
 tabs

---
 Makefile.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile.in b/Makefile.in
index 4e9d0bea..12cde70c 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -155,7 +155,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/tcache.c \
 	$(srcroot)src/test_hooks.c \
 	$(srcroot)src/thread_event.c \
-        $(srcroot)src/thread_event_registry.c \
+	$(srcroot)src/thread_event_registry.c \
 	$(srcroot)src/ticker.c \
 	$(srcroot)src/tsd.c \
 	$(srcroot)src/util.c \

From 56cdce8592bf4ffd7962bed99b31027f22e1895d Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@meta.com>
Date: Sun, 24 Aug 2025 18:56:12 -0700
Subject: [PATCH 328/395] Adding trace analysis in preparation for page
 allocator microbenchmark.

---
 .gitignore                              |   1 +
 Makefile.in                             |  19 ++
 test/stress/pa/.gitignore               |  23 ++
 test/stress/pa/data/.gitignore          |   6 +
 test/stress/pa/pa_data_preprocessor.cpp | 423 ++++++++++++++++++++++++
 5 files changed, 472 insertions(+)
 create mode 100644 test/stress/pa/.gitignore
 create mode 100644 test/stress/pa/data/.gitignore
 create mode 100644 test/stress/pa/pa_data_preprocessor.cpp

diff --git a/.gitignore b/.gitignore
index 9180ddf1..95dbaa5f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -73,6 +73,7 @@ test/include/test/jemalloc_test_defs.h
 
 /test/stress/[A-Za-z]*
 !/test/stress/[A-Za-z]*.*
+!/test/stress/pa/
 /test/stress/*.[od]
 /test/stress/*.out
 
diff --git a/Makefile.in b/Makefile.in
index 12cde70c..8ea194e5 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -566,6 +566,24 @@ $(objroot)test/stress/%$(EXE): $(objroot)test/stress/%.$(O) $(C_JET_OBJS) $(C_TE
 	@mkdir -p $(@D)
 	$(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(LIBS)) $(LM) $(EXTRA_LDFLAGS)
 
+$(objroot)test/stress/pa/pa_data_preprocessor$(EXE): $(objroot)test/stress/pa/pa_data_preprocessor.$(O)
+	@mkdir -p $(@D)
+	$(CXX) $(LDTARGET) $(filter %.$(O),$^) $(LDFLAGS) $(filter-out -lm,$(LIBS)) $(LM) $(EXTRA_LDFLAGS)
+
+$(objroot)test/stress/pa/%.$(O): $(srcroot)test/stress/pa/%.c
+	@mkdir -p $(@D)
+	$(CC) $(CFLAGS) -c $(CPPFLAGS) -I$(srcroot)test/include -I$(objroot)test/include $(CTARGET) $<
+ifdef CC_MM
+	@$(CC) -MM $(CPPFLAGS) -I$(srcroot)test/include -I$(objroot)test/include -MT $@ -o $(@:%.$(O)=%.d) $<
+endif
+
+$(objroot)test/stress/pa/%.$(O): $(srcroot)test/stress/pa/%.cpp
+	@mkdir -p $(@D)
+	$(CXX) $(CXXFLAGS) -c $(CPPFLAGS) -I$(srcroot)test/include -I$(objroot)test/include $(CTARGET) $<
+ifdef CC_MM
+	@$(CXX) -MM $(CPPFLAGS) -I$(srcroot)test/include -I$(objroot)test/include -MT $@ -o $(@:%.$(O)=%.d) $<
+endif
+
 build_lib_shared: $(DSOS)
 build_lib_static: $(STATIC_LIBS)
 ifeq ($(enable_shared), 1)
@@ -686,6 +704,7 @@ tests_unit: $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%$(EXE))
 tests_integration: $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%$(EXE)) $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%$(EXE))
 tests_analyze: $(TESTS_ANALYZE:$(srcroot)%.c=$(objroot)%$(EXE))
 tests_stress: $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%$(EXE)) $(TESTS_STRESS_CPP:$(srcroot)%.cpp=$(objroot)%$(EXE))
+tests_pa: $(objroot)test/stress/pa/pa_data_preprocessor$(EXE)
 tests: tests_unit tests_integration tests_analyze tests_stress
 
 check_unit_dir:
diff --git a/test/stress/pa/.gitignore b/test/stress/pa/.gitignore
new file mode 100644
index 00000000..378ee4e0
--- /dev/null
+++ b/test/stress/pa/.gitignore
@@ -0,0 +1,23 @@
+# Ignore executable files
+pa_microbench
+pa_data_preprocessor
+
+# Ignore object files
+*.o
+*.d
+
+# Ignore temporary and backup files
+*~
+*.tmp
+*.bak
+
+# Ignore compiled output files
+*.out
+
+# Keep source files and documentation
+!*.c
+!*.cpp
+!*.h
+!*.md
+!*.sh
+!Makefile*
diff --git a/test/stress/pa/data/.gitignore b/test/stress/pa/data/.gitignore
new file mode 100644
index 00000000..3b8ddcbb
--- /dev/null
+++ b/test/stress/pa/data/.gitignore
@@ -0,0 +1,6 @@
+# Ignore data files
+*.csv
+
+# But keep example files
+!example_*.csv
+!*.md
diff --git a/test/stress/pa/pa_data_preprocessor.cpp b/test/stress/pa/pa_data_preprocessor.cpp
new file mode 100644
index 00000000..757f37bb
--- /dev/null
+++ b/test/stress/pa/pa_data_preprocessor.cpp
@@ -0,0 +1,423 @@
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <string>
+#include <unordered_map>
+#include <cstdint>
+#include <cassert>
+
+/*
+ * Page Allocator Data Preprocessor (C++ Version)
+ *
+ * This tool processes real allocation traces (collected via BPF)
+ * and converts them into a format suitable for the PA simulator.
+ *
+ * Supported input formats:
+ *   HPA: shard_ind_int,addr_int,nsecs_int,probe,size_int
+ *   SEC: process_id,thread_id,thread_name,nsecs_int,_c4,sec_ptr_int,sec_shard_ptr_int,edata_ptr_int,size_int,is_frequent_reuse_int
+ *
+ * Output format (4 columns):
+ *   shard_ind_int,operation_index,size_or_alloc_index,is_frequent
+ *   where:
+ *   - shard_ind_int: shard index as integer
+ *   - operation_index: 0=alloc, 1=dalloc
+ *   - size_or_alloc_index: for alloc operations show bytes,
+ *                          for dalloc operations show index of corresponding alloc
+ *   - is_frequent: 1 if frequent reuse allocation, 0 otherwise
+ */
+
+enum class TraceFormat { HPA, SEC };
+
+struct TraceEvent {
+	int         shard_ind;
+	uintptr_t   addr;
+	uint64_t    nsecs;
+	std::string probe;
+	size_t      size;
+	bool        is_frequent;
+};
+
+struct AllocationRecord {
+	uintptr_t addr;
+	size_t    size;
+	int       shard_ind;
+	size_t    alloc_index;
+	uint64_t  nsecs;
+};
+
+class AllocationTracker {
+      private:
+	std::unordered_map<uintptr_t, AllocationRecord> records_;
+
+      public:
+	void
+	add_allocation(uintptr_t addr, size_t size, int shard_ind,
+	    size_t alloc_index, uint64_t nsecs) {
+		records_[addr] = {addr, size, shard_ind, alloc_index, nsecs};
+	}
+
+	AllocationRecord *
+	find_allocation(uintptr_t addr) {
+		auto it = records_.find(addr);
+		return (it != records_.end()) ? &it->second : nullptr;
+	}
+
+	void
+	remove_allocation(uintptr_t addr) {
+		records_.erase(addr);
+	}
+
+	size_t
+	count() const {
+		return records_.size();
+	}
+};
+
+class ArenaMapper {
+      private:
+	std::unordered_map<uintptr_t, int> sec_ptr_to_arena_;
+	int                                next_arena_index_;
+
+      public:
+	ArenaMapper() : next_arena_index_(0) {}
+
+	int
+	get_arena_index(uintptr_t sec_ptr) {
+		if (sec_ptr == 0) {
+			/* Should not be seeing null sec pointer anywhere. Use this as a sanity check.*/
+			return 0;
+		}
+
+		auto it = sec_ptr_to_arena_.find(sec_ptr);
+		if (it != sec_ptr_to_arena_.end()) {
+			return it->second;
+		}
+
+		/* New sec_ptr, assign next available arena index */
+		int arena_index = next_arena_index_++;
+		sec_ptr_to_arena_[sec_ptr] = arena_index;
+		return arena_index;
+	}
+
+	size_t
+	arena_count() const {
+		return sec_ptr_to_arena_.size();
+	}
+};
+
+bool
+is_alloc_operation(const std::string &probe) {
+	return (probe == "hpa_alloc" || probe == "sec_alloc");
+}
+
+bool
+is_dalloc_operation(const std::string &probe) {
+	return (probe == "hpa_dalloc" || probe == "sec_dalloc");
+}
+
+bool
+parse_hpa_line(const std::string &line, TraceEvent &event) {
+	std::istringstream ss(line);
+	std::string        token;
+
+	/* Parse shard_ind_int */
+	if (!std::getline(ss, token, ',')) {
+		return true;
+	}
+	event.shard_ind = std::stoi(token);
+
+	/* Parse addr_int */
+	if (!std::getline(ss, token, ',')) {
+		return true;
+	}
+	event.addr = std::stoull(token);
+
+	/* Parse nsecs_int */
+	if (!std::getline(ss, token, ',')) {
+		return true;
+	}
+	event.nsecs = std::stoull(token);
+
+	/* Parse probe */
+	if (!std::getline(ss, token, ',')) {
+		return true;
+	}
+	event.probe = token;
+
+	/* Parse size_int */
+	if (!std::getline(ss, token, ',')) {
+		return true;
+	}
+	event.size = std::stoull(token);
+
+	/* HPA format doesn't have is_frequent field, set default */
+	event.is_frequent = true;
+
+	return false;
+}
+
+bool
+parse_sec_line(
+    const std::string &line, TraceEvent &event, ArenaMapper &arena_mapper) {
+	std::istringstream ss(line);
+	std::string        token;
+
+	/* Skip process_id */
+	if (!std::getline(ss, token, ',')) {
+		return true;
+	}
+
+	/* Skip thread_id */
+	if (!std::getline(ss, token, ',')) {
+		return true;
+	}
+
+	/* Skip thread_name */
+	if (!std::getline(ss, token, ',')) {
+		return true;
+	}
+
+	/* Parse nsecs_int */
+	if (!std::getline(ss, token, ',')) {
+		return true;
+	}
+	event.nsecs = std::stoull(token);
+
+	/* Parse operation */
+	if (!std::getline(ss, token, ',')) {
+		return true;
+	}
+
+	event.probe = token;
+
+	/* Parse sec_ptr_int (used for arena mapping) */
+	uintptr_t sec_ptr = 0;
+	if (!std::getline(ss, token, ',')) {
+		return true;
+	}
+	if (!token.empty()) {
+		sec_ptr = std::stoull(token);
+	}
+
+	/* Map sec_ptr to arena index */
+	event.shard_ind = arena_mapper.get_arena_index(sec_ptr);
+
+	/* Skip sec_shard_ptr_int */
+	if (!std::getline(ss, token, ',')) {
+		return true;
+	}
+
+	/* Parse edata_ptr_int (used as the address) */
+	if (!std::getline(ss, token, ',')) {
+		return true;
+	}
+	if (!token.empty()) {
+		event.addr = std::stoull(token);
+	} else {
+		event.addr = 0;
+	}
+
+	/* Parse size_int */
+	if (!std::getline(ss, token, ',')
+	    && !is_dalloc_operation(event.probe)) {
+		/* SEC format may not always have size for dalloc */
+		return true;
+	}
+	if (!token.empty()) {
+		event.size = std::stoull(token);
+	} else {
+		/* When no size given, this is a dalloc, size won't be used. */
+		event.size = 0;
+	}
+
+	/* Parse is_frequent_reuse_int */
+	if (!std::getline(ss, token, ',')
+	    && !is_dalloc_operation(event.probe)) {
+		return true;
+	}
+	if (!token.empty()) {
+		event.is_frequent = (std::stoi(token) != 0);
+	} else {
+		/*
+		 * When no is_frequent_reuse_int given, this is a dalloc,
+		 * is_frequent won't be used.
+		 */
+		event.is_frequent = false;
+	}
+
+	return false;
+}
+
+void
+write_output_header(std::ofstream &output) {
+	output << "shard_ind,operation,size_or_alloc_index,is_frequent\n";
+}
+
+void
+write_output_event(std::ofstream &output, int shard_ind, int operation,
+    size_t value, bool is_frequent) {
+	output << shard_ind << "," << operation << "," << value << ","
+	       << (is_frequent ? 1 : 0) << "\n";
+}
+
+size_t
+process_trace_file(const std::string &input_filename,
+    const std::string &output_filename, TraceFormat format) {
+	std::ifstream input(input_filename);
+	if (!input.is_open()) {
+		std::cerr << "Failed to open input file: " << input_filename
+		          << std::endl;
+		return 0;
+	}
+
+	std::ofstream output(output_filename);
+	if (!output.is_open()) {
+		std::cerr << "Failed to open output file: " << output_filename
+		          << std::endl;
+		return 0;
+	}
+
+	AllocationTracker tracker;
+	ArenaMapper       arena_mapper; /* For SEC format arena mapping */
+
+	std::string line;
+	size_t      line_count = 0;
+	size_t      output_count = 0;
+	size_t      alloc_sequence = 0; /* Sequential index for allocations */
+	size_t      unmatched_frees = 0;
+
+	write_output_header(output);
+	std::cout << "Reading from: " << input_filename << std::endl;
+
+	/* Skip header line */
+	if (!std::getline(input, line)) {
+		std::cerr << "Error: Empty input file" << std::endl;
+		return 0;
+	}
+
+	while (std::getline(input, line)) {
+		line_count++;
+
+		/* Skip empty lines */
+		if (line.empty()) {
+			continue;
+		}
+
+		TraceEvent event;
+		bool       parse_error = false;
+
+		if (format == TraceFormat::HPA) {
+			parse_error = parse_hpa_line(line, event);
+		} else if (format == TraceFormat::SEC) {
+			parse_error = parse_sec_line(line, event, arena_mapper);
+		}
+
+		if (parse_error) {
+			continue;
+		}
+
+		if (is_alloc_operation(event.probe)) {
+			/* This is an allocation */
+			write_output_event(output, event.shard_ind, 0,
+			    event.size, event.is_frequent);
+
+			/* Track this allocation with the current sequence number */
+			tracker.add_allocation(event.addr, event.size,
+			    event.shard_ind, alloc_sequence, event.nsecs);
+			alloc_sequence++;
+		} else if (is_dalloc_operation(event.probe)) {
+			/* This is a deallocation. Ignore dalloc without a corresponding alloc. */
+			AllocationRecord *record = tracker.find_allocation(
+			    event.addr);
+
+			if (record) {
+				/* Validate timing: deallocation should happen after allocation */
+				assert(event.nsecs >= record->nsecs);
+				/* Found matching allocation with valid timing */
+				write_output_event(output, event.shard_ind, 1,
+				    record->alloc_index, event.is_frequent);
+				tracker.remove_allocation(event.addr);
+				output_count++; /* Count this deallocation */
+			} else {
+				unmatched_frees++;
+			}
+		} else {
+			std::cerr << "Unknown operation: " << event.probe
+			          << std::endl;
+		}
+	}
+
+	std::cout << "Processed " << line_count << " lines" << std::endl;
+	std::cout << "Unmatched frees: " << unmatched_frees << std::endl;
+	std::cout << "Extracted " << output_count << " alloc/dalloc pairs"
+	          << std::endl;
+	std::cout << "Results written to: " << output_filename << std::endl;
+
+	return output_count;
+}
+
+TraceFormat
+parse_format(const std::string &format_str) {
+	if (format_str == "hpa") {
+		return TraceFormat::HPA;
+	} else if (format_str == "sec") {
+		return TraceFormat::SEC;
+	} else {
+		throw std::invalid_argument(
+		    "Unknown format: " + format_str + ". Use 'hpa' or 'sec'");
+	}
+}
+
+int
+main(int argc, char *argv[]) {
+	if (argc < 4 || argc > 5) {
+		std::cerr << "Usage: " << argv[0]
+		          << " <format> <input_csv_file> <output_file>"
+		          << std::endl;
+		std::cerr << std::endl;
+		std::cerr << "Arguments:" << std::endl;
+		std::cerr << "  format          - Input format: 'hpa' or 'sec'"
+		          << std::endl;
+		std::cerr
+		    << "                    hpa: shard_ind_int,addr_int,nsecs_int,probe,size_int"
+		    << std::endl;
+		std::cerr
+		    << "                    sec: process_id,thread_id,thread_name,nsecs_int,_c4,sec_ptr_int,sec_shard_ptr_int,edata_ptr_int,size_int,is_frequent_reuse_int"
+		    << std::endl;
+		std::cerr << "  input_csv_file  - Input CSV trace file"
+		          << std::endl;
+		std::cerr
+		    << "  output_file     - Output file for simulator with format:"
+		    << std::endl;
+		std::cerr
+		    << "                    shard_ind,operation,size_or_alloc_index,is_frequent"
+		    << std::endl;
+		std::cerr << std::endl;
+		std::cerr << "Output format:" << std::endl;
+		std::cerr << "  - operation: 0=alloc, 1=dalloc" << std::endl;
+		std::cerr
+		    << "  - size_or_alloc_index: bytes for alloc, alloc index for dalloc"
+		    << std::endl;
+		return 1;
+	}
+
+	try {
+		TraceFormat format = parse_format(argv[1]);
+		std::string input_file = argv[2];
+		std::string output_file = argv[3];
+
+		size_t events_generated = process_trace_file(
+		    input_file, output_file, format);
+
+		if (events_generated == 0) {
+			std::cerr
+			    << "No events generated. Check input file format and filtering criteria."
+			    << std::endl;
+			return 1;
+		}
+		return 0;
+	} catch (const std::exception &e) {
+		std::cerr << "Error: " << e.what() << std::endl;
+		return 1;
+	}
+}

From 261591f12360fbce99440584a611e9c338ff7378 Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@meta.com>
Date: Sun, 24 Aug 2025 23:57:26 -0700
Subject: [PATCH 329/395] Add a page-allocator microbenchmark.

---
 Makefile.in                    |  10 +-
 test/stress/pa/README.md       | 118 ++++++
 test/stress/pa/pa_microbench.c | 672 +++++++++++++++++++++++++++++++++
 3 files changed, 797 insertions(+), 3 deletions(-)
 create mode 100644 test/stress/pa/README.md
 create mode 100644 test/stress/pa/pa_microbench.c

diff --git a/Makefile.in b/Makefile.in
index 8ea194e5..047e05cb 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -570,11 +570,15 @@ $(objroot)test/stress/pa/pa_data_preprocessor$(EXE): $(objroot)test/stress/pa/pa
 	@mkdir -p $(@D)
 	$(CXX) $(LDTARGET) $(filter %.$(O),$^) $(LDFLAGS) $(filter-out -lm,$(LIBS)) $(LM) $(EXTRA_LDFLAGS)
 
+$(objroot)test/stress/pa/pa_microbench$(EXE): $(objroot)test/stress/pa/pa_microbench.$(O) $(C_JET_OBJS) $(C_TESTLIB_STRESS_OBJS)
+	@mkdir -p $(@D)
+	$(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LDFLAGS) $(filter-out -lm,$(LIBS)) $(LM) $(EXTRA_LDFLAGS)
+
 $(objroot)test/stress/pa/%.$(O): $(srcroot)test/stress/pa/%.c
 	@mkdir -p $(@D)
-	$(CC) $(CFLAGS) -c $(CPPFLAGS) -I$(srcroot)test/include -I$(objroot)test/include $(CTARGET) $<
+	$(CC) $(CFLAGS) -c $(CPPFLAGS) -DJEMALLOC_STRESS_TEST -I$(srcroot)test/include -I$(objroot)test/include $(CTARGET) $<
 ifdef CC_MM
-	@$(CC) -MM $(CPPFLAGS) -I$(srcroot)test/include -I$(objroot)test/include -MT $@ -o $(@:%.$(O)=%.d) $<
+	@$(CC) -MM $(CPPFLAGS) -DJEMALLOC_STRESS_TEST -I$(srcroot)test/include -I$(objroot)test/include -MT $@ -o $(@:%.$(O)=%.d) $<
 endif
 
 $(objroot)test/stress/pa/%.$(O): $(srcroot)test/stress/pa/%.cpp
@@ -704,7 +708,7 @@ tests_unit: $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%$(EXE))
 tests_integration: $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%$(EXE)) $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%$(EXE))
 tests_analyze: $(TESTS_ANALYZE:$(srcroot)%.c=$(objroot)%$(EXE))
 tests_stress: $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%$(EXE)) $(TESTS_STRESS_CPP:$(srcroot)%.cpp=$(objroot)%$(EXE))
-tests_pa: $(objroot)test/stress/pa/pa_data_preprocessor$(EXE)
+tests_pa: $(objroot)test/stress/pa/pa_data_preprocessor$(EXE) $(objroot)test/stress/pa/pa_microbench$(EXE)
 tests: tests_unit tests_integration tests_analyze tests_stress
 
 check_unit_dir:
diff --git a/test/stress/pa/README.md b/test/stress/pa/README.md
new file mode 100644
index 00000000..4ae59bb9
--- /dev/null
+++ b/test/stress/pa/README.md
@@ -0,0 +1,118 @@
+# Page Allocator (PA) Microbenchmark Suite
+
+This directory contains a comprehensive microbenchmark suite for testing and analyzing jemalloc's Page Allocator (PA) system, including the Hugepage-aware Page Allocator (HPA) and Slab Extent Cache (SEC) components.
+
+## Overview
+
+The PA microbenchmark suite consists of two main programs designed to preprocess allocation traces and replay them against jemalloc's internal PA system to measure performance, memory usage, and allocation patterns.
+
+To summarize how to run it, assume we have a file `test/stress/pa/data/hpa.csv` collected from a real application using USDT, the simulation can be run as follows:
+```
+make tests_pa
+./test/stress/pa/pa_data_preprocessor hpa test/stress/pa/data/hpa.csv test/stress/pa/data/sample_hpa_output.csv
+./test/stress/pa/pa_microbench -p -o test/stress/pa/data/sample_hpa_stats.csv test/stress/pa/data/sample_hpa_output.csv
+```
+
+If it's sec, simply replace the first parameter passed to `pa_data_preprocessor` with sec.
+
+## Architecture
+
+### PA System Components
+
+The Page Allocator sits at the core of jemalloc's memory management hierarchy:
+
+```
+Application
+    ↓
+Arena (tcache, bins)
+    ↓
+PA (Page Allocator) ← This is what we benchmark
+    ├── HPA (Hugepage-aware Page Allocator)
+    └── SEC (Slab Extent Cache)
+    ↓
+Extent Management (emap, edata)
+    ↓
+Base Allocator
+    ↓
+OS (mmap/munmap)
+```
+
+### Microbenchmark Architecture
+
+```
+Raw Allocation Traces
+    ↓
+[pa_data_preprocessor] ← Preprocesses and filters traces
+    ↓
+CSV alloc/dalloc Files
+    ↓
+[pa_microbench] ← Replays against real PA system
+    ↓
+Performance Statistics & Analysis
+```
+
+## Programs
+
+### 1. pa_data_preprocessor
+
+A C++ data preprocessing tool that converts raw allocation traces into a standardized CSV format suitable for microbenchmarking.
+
+**Purpose:**
+- Parse and filter raw allocation trace data
+- Convert various trace formats to standardized CSV
+- Filter by process ID, thread ID, or other criteria
+- Validate and clean allocation/deallocation sequences
+
+### 2. pa_microbench
+
+A C microbenchmark that replays allocation traces against jemalloc's actual PA system to measure performance and behavior with HPA statistics collection.
+
+**Purpose:**
+- Initialize real PA infrastructure (HPA, SEC, base allocators, emaps)
+- Replay allocation/deallocation sequences from CSV traces
+- Measure allocation latency, memory usage, and fragmentation
+- Test different PA configurations (HPA-only vs HPA+SEC)
+- Generate detailed HPA internal statistics
+
+**Key Features:**
+- **Real PA Integration**: Uses jemalloc's actual PA implementation, not simulation
+- **Multi-shard Support**: Tests allocation patterns across multiple PA shards
+- **Configurable Modes**: Supports HPA-only mode (`-p`) and HPA+SEC mode (`-s`)
+- **Statistics Output**: Detailed per-shard statistics and timing data
+- **Configurable Intervals**: Customizable statistics output frequency (`-i/--interval`)
+
+## Building
+
+### Compilation
+
+```bash
+# Build both PA microbenchmark tools
+cd /path/to/jemalloc
+make tests_pa
+```
+
+This creates:
+- `test/stress/pa/pa_data_preprocessor` - Data preprocessing tool
+- `test/stress/pa/pa_microbench` - PA microbenchmark
+
+## Usage
+
+### Data Preprocessing
+
+```bash
+# Basic preprocessing
+./test/stress/pa/pa_data_preprocessor <hpa/sec> input_trace.txt output.csv
+```
+
+### Microbenchmark Execution
+
+```bash
+# Run with HPA + SEC (default mode)
+./test/stress/pa/pa_microbench -s -o stats.csv trace.csv
+
+# Run with HPA-only (no SEC)
+./test/stress/pa/pa_microbench -p -o stats.csv trace.csv
+
+# Show help
+./test/stress/pa/pa_microbench -h
+```
diff --git a/test/stress/pa/pa_microbench.c b/test/stress/pa/pa_microbench.c
new file mode 100644
index 00000000..4ad3652d
--- /dev/null
+++ b/test/stress/pa/pa_microbench.c
@@ -0,0 +1,672 @@
+#include "test/jemalloc_test.h"
+
+/* Additional includes for PA functionality */
+#include "jemalloc/internal/pa.h"
+#include "jemalloc/internal/tsd.h"
+#include "jemalloc/internal/sz.h"
+#include "jemalloc/internal/base.h"
+#include "jemalloc/internal/ehooks.h"
+#include "jemalloc/internal/nstime.h"
+#include "jemalloc/internal/hpa.h"
+#include "jemalloc/internal/sec.h"
+#include "jemalloc/internal/emap.h"
+#include "jemalloc/internal/psset.h"
+
+/*
+ * PA Microbenchmark (Simplified Version)
+ *
+ * This tool reads allocation traces and simulates PA behavior
+ * for testing and understanding the allocation patterns.
+ *
+ * Features:
+ * 1. Reads CSV input file with format: shard_ind,operation,size_or_alloc_index,is_frequent
+ * 2. Simulates allocations/deallocations tracking
+ * 3. Provides basic statistics analysis
+ * 4. Validates the framework setup
+ */
+
+#define MAX_LINE_LENGTH 1024
+#define MAX_ALLOCATIONS 10000000
+#define MAX_ARENAS 128
+
+typedef enum { PA_ALLOC = 0, PA_DALLOC = 1 } pa_op_t;
+
+typedef struct {
+	int     shard_ind;
+	pa_op_t operation;
+	size_t  size_or_alloc_index;
+	int     is_frequent;
+} pa_event_t;
+
+typedef struct {
+	edata_t *edata;
+	size_t   size;
+	int      shard_ind;
+	bool     active;
+} allocation_record_t;
+
+/* Structure to group per-shard tracking statistics */
+typedef struct {
+	uint64_t alloc_count;     /* Number of allocations */
+	uint64_t dealloc_count;   /* Number of deallocations */
+	uint64_t bytes_allocated; /* Current bytes allocated */
+} shard_stats_t;
+
+/* Structure to group per-shard PA infrastructure */
+typedef struct {
+	base_t          *base;        /* Base allocator */
+	emap_t           emap;        /* Extent map */
+	pa_shard_t       pa_shard;    /* PA shard */
+	pa_shard_stats_t shard_stats; /* PA shard statistics */
+	malloc_mutex_t   stats_mtx;   /* Statistics mutex */
+} shard_infrastructure_t;
+
+static FILE                *g_stats_output = NULL; /* Output file for stats */
+static size_t               g_alloc_counter = 0; /* Global allocation counter */
+static allocation_record_t *g_alloc_records =
+    NULL;                     /* Global allocation tracking */
+static bool g_use_sec = true; /* Global flag for SEC vs HPA-only */
+
+/* Refactored arrays using structures */
+static shard_stats_t *g_shard_stats = NULL; /* Per-shard tracking statistics */
+static shard_infrastructure_t *g_shard_infra =
+    NULL;                         /* Per-shard PA infrastructure */
+static pa_central_t g_pa_central; /* Global PA central */
+
+static void cleanup_pa_infrastructure(int num_shards);
+
+static bool
+initialize_pa_infrastructure(int num_shards) {
+	/*
+	 * Note when we call malloc, it resolves to je_malloc, while internal
+	 * functions like base_new resolve to jet_malloc.  This is because this
+	 * file is compiled with -DJEMALLOC_JET as a test.  This allows us to
+	 * completely isolate the PA infrastructure benchmark from the rest of
+	 * the jemalloc usage.
+	*/
+	void *dummy_jet = jet_malloc(16);
+	if (dummy_jet == NULL) {
+		fprintf(stderr, "Failed to initialize JET jemalloc\n");
+		return 1;
+	}
+
+	/* Force JET system to be fully initialized */
+	if (jet_mallctl("epoch", NULL, NULL, NULL, 0) != 0) {
+		fprintf(stderr, "Failed to initialize JET system fully\n");
+		jet_free(dummy_jet);
+		return 1;
+	}
+	jet_free(dummy_jet);
+
+	/* Allocate shard tracking statistics */
+	g_shard_stats = calloc(num_shards, sizeof(shard_stats_t));
+	if (g_shard_stats == NULL) {
+		printf("DEBUG: Failed to allocate shard stats\n");
+		return true;
+	}
+
+	/* Allocate shard infrastructure */
+	g_shard_infra = calloc(num_shards, sizeof(shard_infrastructure_t));
+	if (g_shard_infra == NULL) {
+		printf("DEBUG: Failed to allocate shard infrastructure\n");
+		free(g_shard_stats);
+		return true;
+	}
+
+	/* Initialize one base allocator for PA central */
+	base_t *central_base = base_new(tsd_tsdn(tsd_fetch()), 0 /* ind */,
+	    (extent_hooks_t *)&ehooks_default_extent_hooks,
+	    /* metadata_use_hooks */ true);
+	if (central_base == NULL) {
+		printf("DEBUG: Failed to create central_base\n");
+		free(g_shard_stats);
+		free(g_shard_infra);
+		return true;
+	}
+
+	/* Initialize PA central with HPA enabled */
+	if (pa_central_init(&g_pa_central, central_base, true /* hpa */,
+	        &hpa_hooks_default)) {
+		printf("DEBUG: Failed to initialize PA central\n");
+		base_delete(tsd_tsdn(tsd_fetch()), central_base);
+		free(g_shard_stats);
+		free(g_shard_infra);
+		return true;
+	}
+
+	for (int i = 0; i < num_shards; i++) {
+		/* Create a separate base allocator for each shard */
+		g_shard_infra[i].base = base_new(tsd_tsdn(tsd_fetch()),
+		    i /* ind */, (extent_hooks_t *)&ehooks_default_extent_hooks,
+		    /* metadata_use_hooks */ true);
+		if (g_shard_infra[i].base == NULL) {
+			printf("DEBUG: Failed to create base %d\n", i);
+			/* Clean up partially initialized shards */
+			cleanup_pa_infrastructure(num_shards);
+			return true;
+		}
+
+		/* Initialize emap for this shard */
+		if (emap_init(&g_shard_infra[i].emap, g_shard_infra[i].base,
+		        /* zeroed */ false)) {
+			printf("DEBUG: Failed to initialize emap %d\n", i);
+			/* Clean up partially initialized shards */
+			cleanup_pa_infrastructure(num_shards);
+			return true;
+		}
+
+		/* Initialize stats mutex */
+		if (malloc_mutex_init(&g_shard_infra[i].stats_mtx,
+		        "pa_shard_stats", WITNESS_RANK_OMIT,
+		        malloc_mutex_rank_exclusive)) {
+			printf(
+			    "DEBUG: Failed to initialize stats mutex %d\n", i);
+			/* Clean up partially initialized shards */
+			cleanup_pa_infrastructure(num_shards);
+			return true;
+		}
+
+		/* Initialize PA shard */
+		nstime_t cur_time;
+		nstime_init_zero(&cur_time);
+
+		if (pa_shard_init(tsd_tsdn(tsd_fetch()),
+		        &g_shard_infra[i].pa_shard, &g_pa_central,
+		        &g_shard_infra[i].emap /* emap */,
+		        g_shard_infra[i].base, i /* ind */,
+		        &g_shard_infra[i].shard_stats /* stats */,
+		        &g_shard_infra[i].stats_mtx /* stats_mtx */,
+		        &cur_time /* cur_time */,
+		        SIZE_MAX /* oversize_threshold */,
+		        -1 /* dirty_decay_ms */, -1 /* muzzy_decay_ms */)) {
+			printf("DEBUG: Failed to initialize PA shard %d\n", i);
+			/* Clean up partially initialized shards */
+			cleanup_pa_infrastructure(num_shards);
+			return true;
+		}
+
+		/* Enable HPA for this shard with proper configuration */
+		hpa_shard_opts_t hpa_opts = HPA_SHARD_OPTS_DEFAULT;
+		hpa_opts.deferral_allowed =
+		    false; /* No background threads in microbench */
+
+		sec_opts_t sec_opts = SEC_OPTS_DEFAULT;
+		if (!g_use_sec) {
+			/* Disable SEC by setting nshards to 0 */
+			sec_opts.nshards = 0;
+		}
+
+		if (pa_shard_enable_hpa(tsd_tsdn(tsd_fetch()),
+		        &g_shard_infra[i].pa_shard, &hpa_opts, &sec_opts)) {
+			fprintf(
+			    stderr, "Failed to enable HPA on shard %d\n", i);
+			/* Clean up partially initialized shards */
+			cleanup_pa_infrastructure(num_shards);
+			return true;
+		}
+	}
+
+	printf("PA infrastructure configured: HPA=enabled, SEC=%s\n",
+	    g_use_sec ? "enabled" : "disabled");
+
+	return false;
+}
+
+static void
+cleanup_pa_infrastructure(int num_shards) {
+	if (g_shard_infra != NULL) {
+		for (int i = 0; i < num_shards; i++) {
+			pa_shard_destroy(
+			    tsd_tsdn(tsd_fetch()), &g_shard_infra[i].pa_shard);
+			if (g_shard_infra[i].base != NULL) {
+				base_delete(tsd_tsdn(tsd_fetch()),
+				    g_shard_infra[i].base);
+			}
+		}
+		free(g_shard_infra);
+		g_shard_infra = NULL;
+	}
+
+	if (g_shard_stats != NULL) {
+		free(g_shard_stats);
+		g_shard_stats = NULL;
+	}
+}
+
+static bool
+parse_csv_line(const char *line, pa_event_t *event) {
+	/* Expected format: shard_ind,operation,size_or_alloc_index,is_frequent */
+	int operation;
+	int fields = sscanf(line, "%d,%d,%zu,%d", &event->shard_ind, &operation,
+	    &event->size_or_alloc_index, &event->is_frequent);
+
+	if (fields < 3) { /* is_frequent is optional */
+		return false;
+	}
+
+	if (fields == 3) {
+		event->is_frequent = 0; /* Default value */
+	}
+
+	if (operation == 0) {
+		event->operation = PA_ALLOC;
+	} else if (operation == 1) {
+		event->operation = PA_DALLOC;
+	} else {
+		return false;
+	}
+
+	return true;
+}
+
+static size_t
+load_trace_file(const char *filename, pa_event_t **events, int *max_shard_id) {
+	FILE *file = fopen(filename, "r");
+	if (!file) {
+		fprintf(stderr, "Failed to open trace file: %s\n", filename);
+		return 0;
+	}
+
+	*events = malloc(MAX_ALLOCATIONS * sizeof(pa_event_t));
+	if (!*events) {
+		fclose(file);
+		return 0;
+	}
+
+	char   line[MAX_LINE_LENGTH];
+	size_t count = 0;
+	*max_shard_id = 0;
+
+	/* Skip header line */
+	if (fgets(line, sizeof(line), file) == NULL) {
+		fclose(file);
+		free(*events);
+		return 0;
+	}
+
+	while (fgets(line, sizeof(line), file) && count < MAX_ALLOCATIONS) {
+		if (parse_csv_line(line, &(*events)[count])) {
+			if ((*events)[count].shard_ind > *max_shard_id) {
+				*max_shard_id = (*events)[count].shard_ind;
+			}
+			count++;
+		}
+	}
+
+	fclose(file);
+	printf("Loaded %zu events from %s\n", count, filename);
+	printf("Maximum shard ID found: %d\n", *max_shard_id);
+	return count;
+}
+
+static void
+collect_hpa_stats(int shard_id, hpa_shard_stats_t *hpa_stats_out) {
+	/* Get tsdn for statistics collection */
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+
+	/* Clear the output structure */
+	memset(hpa_stats_out, 0, sizeof(hpa_shard_stats_t));
+
+	/* Check if this shard has HPA enabled */
+	if (!g_shard_infra[shard_id].pa_shard.ever_used_hpa) {
+		return;
+	}
+
+	/* Merge HPA statistics from the shard */
+	hpa_shard_stats_merge(
+	    tsdn, &g_shard_infra[shard_id].pa_shard.hpa_shard, hpa_stats_out);
+}
+
+static void
+print_shard_stats(int shard_id, size_t operation_count) {
+	if (!g_stats_output) {
+		return;
+	}
+
+	/* Collect HPA statistics */
+	hpa_shard_stats_t hpa_stats;
+	collect_hpa_stats(shard_id, &hpa_stats);
+	psset_stats_t *psset_stats = &hpa_stats.psset_stats;
+
+	/* Total pageslabs */
+	size_t total_pageslabs = psset_stats->merged.npageslabs;
+
+	/* Full pageslabs breakdown by hugification */
+	size_t full_pageslabs_non_huge =
+	    psset_stats->full_slabs[0].npageslabs; /* [0] = non-hugified */
+	size_t full_pageslabs_huge =
+	    psset_stats->full_slabs[1].npageslabs; /* [1] = hugified */
+	size_t full_pageslabs_total = full_pageslabs_non_huge
+	    + full_pageslabs_huge;
+
+	/* Empty pageslabs breakdown by hugification */
+	size_t empty_pageslabs_non_huge =
+	    psset_stats->empty_slabs[0].npageslabs; /* [0] = non-hugified */
+	size_t empty_pageslabs_huge =
+	    psset_stats->empty_slabs[1].npageslabs; /* [1] = hugified */
+	size_t empty_pageslabs_total = empty_pageslabs_non_huge
+	    + empty_pageslabs_huge;
+
+	/* Hugified pageslabs (full + empty + partial) */
+	size_t hugified_pageslabs = full_pageslabs_huge + empty_pageslabs_huge;
+	/* Add hugified partial slabs */
+	for (int i = 0; i < PSSET_NPSIZES; i++) {
+		hugified_pageslabs +=
+		    psset_stats->nonfull_slabs[i][1].npageslabs;
+	}
+
+	/* Dirty bytes */
+	size_t dirty_bytes = psset_stats->merged.ndirty * PAGE;
+
+	/* Output enhanced stats with detailed breakdown */
+	fprintf(g_stats_output,
+	    "%zu,%d,%lu,%lu,%lu,%zu,%zu,%zu,%zu,%zu,%zu,%zu,%zu,%zu,%lu,%lu,%lu\n",
+	    operation_count, shard_id, g_shard_stats[shard_id].alloc_count,
+	    g_shard_stats[shard_id].dealloc_count,
+	    g_shard_stats[shard_id].bytes_allocated, total_pageslabs,
+	    full_pageslabs_total, empty_pageslabs_total, hugified_pageslabs,
+	    full_pageslabs_non_huge, full_pageslabs_huge,
+	    empty_pageslabs_non_huge, empty_pageslabs_huge, dirty_bytes,
+	    hpa_stats.nonderived_stats.nhugifies,
+	    hpa_stats.nonderived_stats.nhugify_failures,
+	    hpa_stats.nonderived_stats.ndehugifies);
+	fflush(g_stats_output);
+}
+
+static void
+simulate_trace(
+    int num_shards, pa_event_t *events, size_t count, size_t stats_interval) {
+	uint64_t total_allocs = 0, total_deallocs = 0;
+	uint64_t total_allocated_bytes = 0;
+
+	printf("Starting simulation with %zu events across %d shards...\n",
+	    count, num_shards);
+
+	for (size_t i = 0; i < count; i++) {
+		pa_event_t *event = &events[i];
+
+		/* Validate shard index */
+		if (event->shard_ind >= num_shards) {
+			fprintf(stderr,
+			    "Warning: Invalid shard index %d (max %d)\n",
+			    event->shard_ind, num_shards - 1);
+			continue;
+		}
+
+		switch (event->operation) {
+		case PA_ALLOC: {
+			size_t size = event->size_or_alloc_index;
+
+			/* Get tsdn and calculate parameters for PA */
+			tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+			szind_t szind = sz_size2index(size);
+			bool    slab =
+			    event
+			        ->is_frequent; /* Use frequent_reuse for slab */
+			bool deferred_work_generated = false;
+
+			/* Allocate using PA allocator */
+			edata_t *edata = pa_alloc(tsdn,
+			    &g_shard_infra[event->shard_ind].pa_shard, size,
+			    PAGE /* alignment */, slab, szind, false /* zero */,
+			    false /* guarded */, &deferred_work_generated);
+
+			if (edata != NULL) {
+				/* Store allocation record */
+				g_alloc_records[g_alloc_counter].edata = edata;
+				g_alloc_records[g_alloc_counter].size = size;
+				g_alloc_records[g_alloc_counter].shard_ind =
+				    event->shard_ind;
+				g_alloc_records[g_alloc_counter].active = true;
+				g_alloc_counter++;
+
+				/* Update shard-specific stats */
+				g_shard_stats[event->shard_ind].alloc_count++;
+				g_shard_stats[event->shard_ind]
+				    .bytes_allocated += size;
+
+				total_allocs++;
+				total_allocated_bytes += size;
+			}
+			break;
+		}
+		case PA_DALLOC: {
+			size_t alloc_index = event->size_or_alloc_index;
+			if (alloc_index < g_alloc_counter
+			    && g_alloc_records[alloc_index].active
+			    && g_alloc_records[alloc_index].shard_ind
+			        == event->shard_ind) {
+				/* Get tsdn for PA */
+				tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+				bool    deferred_work_generated = false;
+
+				/* Deallocate using PA allocator */
+				pa_dalloc(tsdn,
+				    &g_shard_infra[event->shard_ind].pa_shard,
+				    g_alloc_records[alloc_index].edata,
+				    &deferred_work_generated);
+
+				/* Update shard-specific stats */
+				g_shard_stats[event->shard_ind].dealloc_count++;
+				g_shard_stats[event->shard_ind]
+				    .bytes_allocated -=
+				    g_alloc_records[alloc_index].size;
+
+				g_alloc_records[alloc_index].active = false;
+				total_deallocs++;
+			}
+			break;
+		}
+		}
+
+		/* Periodic stats output and progress reporting */
+		if (stats_interval > 0 && (i + 1) % stats_interval == 0) {
+			/* Print stats for all shards */
+			for (int j = 0; j < num_shards; j++) {
+				print_shard_stats(j, i + 1);
+			}
+		}
+	}
+
+	printf("\nSimulation completed:\n");
+	printf("  Total allocations: %lu\n", total_allocs);
+	printf("  Total deallocations: %lu\n", total_deallocs);
+	printf("  Total allocated: %lu bytes\n", total_allocated_bytes);
+	printf("  Active allocations: %lu\n", g_alloc_counter - total_deallocs);
+
+	/* Print final stats for all shards */
+	printf("\nFinal shard statistics:\n");
+	for (int i = 0; i < num_shards; i++) {
+		printf(
+		    "  Shard %d: Allocs=%lu, Deallocs=%lu, Active Bytes=%lu\n",
+		    i, g_shard_stats[i].alloc_count,
+		    g_shard_stats[i].dealloc_count,
+		    g_shard_stats[i].bytes_allocated);
+
+		/* Final stats to file */
+		print_shard_stats(i, count);
+	}
+}
+
+static void
+cleanup_remaining_allocations(int num_shards) {
+	size_t cleaned_up = 0;
+
+	printf("Cleaning up remaining allocations...\n");
+
+	for (size_t i = 0; i < g_alloc_counter; i++) {
+		if (g_alloc_records[i].active) {
+			int shard_ind = g_alloc_records[i].shard_ind;
+			if (shard_ind < num_shards) {
+				tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+				bool    deferred_work_generated = false;
+
+				pa_dalloc(tsdn,
+				    &g_shard_infra[shard_ind].pa_shard,
+				    g_alloc_records[i].edata,
+				    &deferred_work_generated);
+
+				g_alloc_records[i].active = false;
+				cleaned_up++;
+			}
+		}
+	}
+
+	printf("Cleaned up %zu remaining allocations\n", cleaned_up);
+}
+
+static void
+print_usage(const char *program) {
+	printf("Usage: %s [options] <trace_file.csv>\n", program);
+	printf("Options:\n");
+	printf("  -h, --help           Show this help message\n");
+	printf(
+	    "  -o, --output FILE    Output file for statistics (default: stdout)\n");
+	printf("  -s, --sec            Use SEC (default)\n");
+	printf("  -p, --hpa-only       Use HPA only (no SEC)\n");
+	printf(
+	    "  -i, --interval N     Stats print interval (default: 100000, 0=disable)\n");
+	printf(
+	    "\nTrace file format: shard_ind,operation,size_or_alloc_index,is_frequent\n");
+	printf("  - operation: 0=alloc, 1=dealloc\n");
+	printf("  - is_frequent: optional column\n");
+}
+
+int
+main(int argc, char *argv[]) {
+	const char *trace_file = NULL;
+	const char *stats_output_file = NULL;
+	size_t      stats_interval = 100000; /* Default stats print interval */
+	/* Parse command line arguments */
+	for (int i = 1; i < argc; i++) {
+		if (strcmp(argv[i], "-h") == 0
+		    || strcmp(argv[i], "--help") == 0) {
+			print_usage(argv[0]);
+			return 0;
+		} else if (strcmp(argv[i], "-o") == 0
+		    || strcmp(argv[i], "--output") == 0) {
+			if (i + 1 >= argc) {
+				fprintf(stderr,
+				    "Error: %s requires an argument\n",
+				    argv[i]);
+				return 1;
+			}
+			stats_output_file = argv[++i];
+		} else if (strcmp(argv[i], "-s") == 0
+		    || strcmp(argv[i], "--sec") == 0) {
+			g_use_sec = true;
+		} else if (strcmp(argv[i], "-p") == 0
+		    || strcmp(argv[i], "--hpa-only") == 0) {
+			g_use_sec = false;
+		} else if (strcmp(argv[i], "-i") == 0
+		    || strcmp(argv[i], "--interval") == 0) {
+			if (i + 1 >= argc) {
+				fprintf(stderr,
+				    "Error: %s requires an argument\n",
+				    argv[i]);
+				return 1;
+			}
+			stats_interval = (size_t)atol(argv[++i]);
+		} else if (argv[i][0] != '-') {
+			trace_file = argv[i];
+		} else {
+			fprintf(stderr, "Unknown option: %s\n", argv[i]);
+			print_usage(argv[0]);
+			return 1;
+		}
+	}
+
+	if (!trace_file) {
+		fprintf(stderr, "Error: No trace file specified\n");
+		print_usage(argv[0]);
+		return 1;
+	}
+
+	printf("Trace file: %s\n", trace_file);
+	printf("Mode: %s\n", g_use_sec ? "PA with SEC" : "HPA only");
+
+	/* Open stats output file */
+	if (stats_output_file) {
+		g_stats_output = fopen(stats_output_file, "w");
+		if (!g_stats_output) {
+			fprintf(stderr,
+			    "Failed to open stats output file: %s\n",
+			    stats_output_file);
+			return 1;
+		}
+		printf("Stats output: %s\n", stats_output_file);
+
+		/* Write CSV header */
+		fprintf(g_stats_output,
+		    "operation_count,shard_id,alloc_count,dealloc_count,active_bytes,"
+		    "total_pageslabs,full_pageslabs_total,empty_pageslabs_total,hugified_pageslabs,"
+		    "full_pageslabs_non_huge,full_pageslabs_huge,"
+		    "empty_pageslabs_non_huge,empty_pageslabs_huge,"
+		    "dirty_bytes,nhugifies,nhugify_failures,ndehugifies\n");
+	}
+
+	/* Load trace data and determine max number of arenas */
+	pa_event_t *events;
+	int         max_shard_id;
+	size_t      event_count = load_trace_file(
+            trace_file, &events, &max_shard_id);
+	if (event_count == 0) {
+		if (g_stats_output)
+			fclose(g_stats_output);
+		return 1;
+	}
+
+	int num_shards = max_shard_id + 1; /* shard IDs are 0-based */
+	if (num_shards > MAX_ARENAS) {
+		fprintf(stderr, "Error: Too many arenas required (%d > %d)\n",
+		    num_shards, MAX_ARENAS);
+		free(events);
+		if (g_stats_output)
+			fclose(g_stats_output);
+		return 1;
+	}
+
+	/* Allocate allocation tracking array */
+	g_alloc_records = malloc(event_count * sizeof(allocation_record_t));
+
+	if (!g_alloc_records) {
+		fprintf(
+		    stderr, "Failed to allocate allocation tracking array\n");
+		free(events);
+		if (g_stats_output) {
+			fclose(g_stats_output);
+		}
+		return 1;
+	}
+
+	/* Initialize PA infrastructure */
+	if (initialize_pa_infrastructure(num_shards)) {
+		fprintf(stderr, "Failed to initialize PA infrastructure\n");
+		free(events);
+		free(g_alloc_records);
+		if (g_stats_output) {
+			fclose(g_stats_output);
+		}
+		return 1;
+	}
+
+	/* Run simulation */
+	simulate_trace(num_shards, events, event_count, stats_interval);
+
+	/* Clean up remaining allocations */
+	cleanup_remaining_allocations(num_shards);
+
+	/* Cleanup PA infrastructure */
+	cleanup_pa_infrastructure(num_shards);
+
+	/* Cleanup */
+	free(g_alloc_records);
+	free(events);
+
+	if (g_stats_output) {
+		fclose(g_stats_output);
+		printf("Statistics written to: %s\n", stats_output_file);
+	}
+
+	return 0;
+}

From 67435187d103a9bef7995be3d625712329578e64 Mon Sep 17 00:00:00 2001
From: Carl Shapiro <cshapiro@meta.com>
Date: Fri, 12 Sep 2025 18:07:12 -0700
Subject: [PATCH 330/395] Improve the portability of grep patterns in
 configure.ac

The configure.ac script uses backslash plus in its grep patterns to
match one or more occurrences.  This is a GNU grep extension to the
Basic Regular Expressions syntax that fails on systems with a more
traditional grep.  This changes fixes grep patterns that use backslash
plus to use a star instead.

Closes: #2777
---
 configure.ac | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/configure.ac b/configure.ac
index 8ea092d6..5e907511 100644
--- a/configure.ac
+++ b/configure.ac
@@ -652,7 +652,7 @@ AC_ARG_WITH([version],
   [AS_HELP_STRING([--with-version=<major>.<minor>.<bugfix>-<nrev>-g<gid>],
    [Version string])],
   [
-    echo "${with_version}" | grep ['^[0-9]\+\.[0-9]\+\.[0-9]\+-[0-9]\+-g[0-9a-f]\+$'] 2>&1 1>/dev/null
+    echo "${with_version}" | grep ['^[0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*-[0-9][0-9]*-g[0-9a-f][0-9a-f]*$'] 2>&1 1>/dev/null
     if test $? -eq 0 ; then
       echo "$with_version" > "${objroot}VERSION"
     else
@@ -2059,7 +2059,7 @@ if test "x${je_cv_lg_hugepage}" = "x" ; then
   dnl   Hugepagesize:       2048 kB
   if test -e "/proc/meminfo" ; then
     hpsk=[`cat /proc/meminfo 2>/dev/null | \
-          grep -e '^Hugepagesize:[[:space:]]\+[0-9]\+[[:space:]]kB$' | \
+          grep '^Hugepagesize:[[:space:]][[:space:]]*[0-9][0-9]*[[:space:]]kB$' | \
           awk '{print $2}'`]
     if test "x${hpsk}" != "x" ; then
       je_cv_lg_hugepage=10

From d70882a05d02e21c27990d4c6deb5c5bf614d9ec Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Thu, 7 Aug 2025 09:34:30 -0700
Subject: [PATCH 331/395] [sdt] Add some tracepoints to sec and hpa modules

---
 src/hpa.c    | 8 ++++++++
 src/hpdata.c | 5 -----
 src/sec.c    | 5 +++++
 3 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/src/hpa.c b/src/hpa.c
index e297e411..d848b1ed 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -6,6 +6,7 @@
 
 #include "jemalloc/internal/fb.h"
 #include "jemalloc/internal/witness.h"
+#include "jemalloc/internal/jemalloc_probe.h"
 
 #define HPA_EDEN_SIZE (128 * HUGEPAGE)
 
@@ -752,6 +753,8 @@ hpa_try_alloc_one_no_grow(
 	}
 
 	void *addr = hpdata_reserve_alloc(ps, size);
+	JE_USDT(hpa_alloc, 5, shard->ind, addr, size, hpdata_nactive_get(ps),
+	    hpdata_age_get(ps));
 	edata_init(edata, shard->ind, addr, size, /* slab */ false, SC_NSIZES,
 	    /* sn */ hpdata_age_get(ps), extent_state_active,
 	    /* zeroed */ false, /* committed */ true, EXTENT_PAI_HPA,
@@ -771,6 +774,9 @@ hpa_try_alloc_one_no_grow(
 	if (err) {
 		hpdata_unreserve(
 		    ps, edata_addr_get(edata), edata_size_get(edata));
+		JE_USDT(hpa_dalloc_err, 5, shard->ind, edata_addr_get(edata),
+		    edata_size_get(edata), hpdata_nactive_get(ps),
+		    hpdata_age_get(ps));
 		/*
 		 * We should arguably reset dirty state here, but this would
 		 * require some sort of prepare + commit functionality that's a
@@ -1024,6 +1030,8 @@ hpa_dalloc_locked(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *edata) {
 
 	psset_update_begin(&shard->psset, ps);
 	hpdata_unreserve(ps, unreserve_addr, unreserve_size);
+	JE_USDT(hpa_dalloc, 5, shard->ind, unreserve_addr, unreserve_size,
+	    hpdata_nactive_get(ps), hpdata_age_get(ps));
 	hpa_update_purge_hugify_eligibility(tsdn, shard, ps);
 	psset_update_end(&shard->psset, ps);
 }
diff --git a/src/hpdata.c b/src/hpdata.c
index e18e03cd..f9c8f4fa 100644
--- a/src/hpdata.c
+++ b/src/hpdata.c
@@ -2,7 +2,6 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/hpdata.h"
-#include "jemalloc/internal/jemalloc_probe.h"
 
 static int
 hpdata_age_comp(const hpdata_t *a, const hpdata_t *b) {
@@ -102,8 +101,6 @@ hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz) {
 	    hpdata->touched_pages, HUGEPAGE_PAGES, result, npages);
 	fb_set_range(hpdata->touched_pages, HUGEPAGE_PAGES, result, npages);
 	hpdata->h_ntouched += new_dirty;
-	JE_USDT(hpa_reserve, 5, npages, hpdata->h_nactive, hpdata->h_ntouched,
-	    new_dirty, largest_unchosen_range);
 
 	/*
 	 * If we allocated out of a range that was the longest in the hpdata, it
@@ -164,8 +161,6 @@ hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz) {
 	hpdata->h_nactive -= npages;
 
 	hpdata_assert_consistent(hpdata);
-	JE_USDT(hpa_unreserve, 5, npages, hpdata->h_nactive, hpdata->h_ntouched,
-	    old_longest_range, new_range_len);
 }
 
 size_t
diff --git a/src/sec.c b/src/sec.c
index 36cd2dcc..c827dd5c 100644
--- a/src/sec.c
+++ b/src/sec.c
@@ -2,6 +2,7 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/sec.h"
+#include "jemalloc/internal/jemalloc_probe.h"
 
 static edata_t *sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
     size_t alignment, bool zero, bool guarded, bool frequent_reuse,
@@ -266,6 +267,7 @@ sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
 			    deferred_work_generated);
 		}
 	}
+	JE_USDT(sec_alloc, 5, sec, shard, edata, size, frequent_reuse);
 	return edata;
 }
 
@@ -273,6 +275,7 @@ static bool
 sec_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
     size_t new_size, bool zero, bool *deferred_work_generated) {
 	sec_t *sec = (sec_t *)self;
+	JE_USDT(sec_expand, 4, sec, edata, old_size, new_size);
 	return pai_expand(tsdn, sec->fallback, edata, old_size, new_size, zero,
 	    deferred_work_generated);
 }
@@ -281,6 +284,7 @@ static bool
 sec_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
     size_t new_size, bool *deferred_work_generated) {
 	sec_t *sec = (sec_t *)self;
+	JE_USDT(sec_shrink, 4, sec, edata, old_size, new_size);
 	return pai_shrink(tsdn, sec->fallback, edata, old_size, new_size,
 	    deferred_work_generated);
 }
@@ -351,6 +355,7 @@ sec_dalloc(
 		return;
 	}
 	sec_shard_t *shard = sec_shard_pick(tsdn, sec);
+	JE_USDT(sec_dalloc, 3, sec, shard, edata);
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	if (shard->enabled) {
 		sec_shard_dalloc_and_unlock(tsdn, sec, shard, edata);

From 755735a6bf8f7b7f4e31ebc684f0fce7ac22dd78 Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@meta.com>
Date: Wed, 17 Sep 2025 10:18:25 -0700
Subject: [PATCH 332/395] Remove Travis Windows CI for now since it has infra
 failures.

---
 .travis.yml           | 24 ------------------------
 scripts/gen_travis.py |  5 ++++-
 2 files changed, 4 insertions(+), 25 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 433288cb..643da4f1 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -10,30 +10,6 @@ dist: jammy
 
 jobs:
   include:
-    - os: windows
-      arch: amd64
-      env: CC=gcc CXX=g++ EXTRA_CFLAGS="-fcommon"
-    - os: windows
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-fcommon"
-    - os: windows
-      arch: amd64
-      env: CC=cl.exe CXX=cl.exe
-    - os: windows
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes EXTRA_CFLAGS="-fcommon"
-    - os: windows
-      arch: amd64
-      env: CC=cl.exe CXX=cl.exe CONFIGURE_FLAGS="--enable-debug"
-    - os: windows
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-fcommon"
-    - os: windows
-      arch: amd64
-      env: CC=cl.exe CXX=cl.exe CROSS_COMPILE_32BIT=yes
-    - os: windows
-      arch: amd64
-      env: CC=cl.exe CXX=cl.exe CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug"
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ EXTRA_CFLAGS="-Werror -Wno-array-bounds"
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
index d43c802e..3f7aeab0 100755
--- a/scripts/gen_travis.py
+++ b/scripts/gen_travis.py
@@ -333,7 +333,10 @@ EXTRA_CFLAGS="-Werror -Wno-array-bounds"
 
 def main():
     jobs = '\n'.join((
-        generate_windows(AMD64),
+        # Travis is failing on Windows due to infra failures, comment it out for
+        # now.  Should resume once it is fixed.
+
+        # generate_windows(AMD64),
 
         # Travis currently provides only FreeBSD 12.1 which is EOL.  Builds are
         # not working as of Jan 2024.  Disable the tests for now to avoid the

From de886e05d27ef3806dca802f3b9d9a0af7765046 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Mon, 29 Sep 2025 09:41:14 -0700
Subject: [PATCH 333/395] Revert "Remove an unused function and global
 variable"

This reverts commit acd85e5359fc3ee38388e0763ceac72db7ca7150.
---
 test/unit/hpa_vectorized_madvise_large_batch.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/test/unit/hpa_vectorized_madvise_large_batch.c b/test/unit/hpa_vectorized_madvise_large_batch.c
index e1393225..c974500c 100644
--- a/test/unit/hpa_vectorized_madvise_large_batch.c
+++ b/test/unit/hpa_vectorized_madvise_large_batch.c
@@ -113,6 +113,12 @@ defer_test_hugify(void *ptr, size_t size, bool sync) {
 	return false;
 }
 
+static size_t ndefer_dehugify_calls = 0;
+static void
+defer_test_dehugify(void *ptr, size_t size) {
+	++ndefer_dehugify_calls;
+}
+
 static nstime_t defer_curtime;
 static void
 defer_test_curtime(nstime_t *r_time, bool first_reading) {

From 2688047b56e6ef21d960e40281cb13774c8c17ab Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Mon, 29 Sep 2025 09:41:51 -0700
Subject: [PATCH 334/395] Revert "Do not dehugify when purging"

This reverts commit 16c5abd1cd0a21e8f985f77d8e342c8ed91450d7.
---
 include/jemalloc/internal/hpa_hooks.h         |  1 +
 src/hpa.c                                     |  9 +++++++-
 src/hpa_hooks.c                               | 12 ++++++++--
 test/unit/hpa.c                               | 23 +++++++++++++++++++
 test/unit/hpa_vectorized_madvise.c            | 11 +++++++++
 .../unit/hpa_vectorized_madvise_large_batch.c |  1 +
 6 files changed, 54 insertions(+), 3 deletions(-)

diff --git a/include/jemalloc/internal/hpa_hooks.h b/include/jemalloc/internal/hpa_hooks.h
index 5e68e349..f50ff58f 100644
--- a/include/jemalloc/internal/hpa_hooks.h
+++ b/include/jemalloc/internal/hpa_hooks.h
@@ -10,6 +10,7 @@ struct hpa_hooks_s {
 	void (*unmap)(void *ptr, size_t size);
 	void (*purge)(void *ptr, size_t size);
 	bool (*hugify)(void *ptr, size_t size, bool sync);
+	void (*dehugify)(void *ptr, size_t size);
 	void (*curtime)(nstime_t *r_time, bool first_reading);
 	uint64_t (*ms_since)(nstime_t *r_time);
 	bool (*vectorized_purge)(void *vec, size_t vlen, size_t nbytes);
diff --git a/src/hpa.c b/src/hpa.c
index d848b1ed..271b1af4 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -433,11 +433,18 @@ hpa_purge_actual_unlocked(
 	hpa_range_accum_init(&accum, vec, len);
 
 	for (size_t i = 0; i < batch_sz; ++i) {
+		hpdata_t *to_purge = batch[i].hp;
+
+		/* Actually do the purging, now that the lock is dropped. */
+		if (batch[i].dehugify) {
+			shard->central->hooks.dehugify(
+			    hpdata_addr_get(to_purge), HUGEPAGE);
+		}
 		void  *purge_addr;
 		size_t purge_size;
 		size_t total_purged_on_one_hp = 0;
 		while (hpdata_purge_next(
-		    batch[i].hp, &batch[i].state, &purge_addr, &purge_size)) {
+		    to_purge, &batch[i].state, &purge_addr, &purge_size)) {
 			total_purged_on_one_hp += purge_size;
 			assert(total_purged_on_one_hp <= HUGEPAGE);
 			hpa_range_accum_add(
diff --git a/src/hpa_hooks.c b/src/hpa_hooks.c
index e40d30ec..14005ae0 100644
--- a/src/hpa_hooks.c
+++ b/src/hpa_hooks.c
@@ -8,13 +8,14 @@ static void    *hpa_hooks_map(size_t size);
 static void     hpa_hooks_unmap(void *ptr, size_t size);
 static void     hpa_hooks_purge(void *ptr, size_t size);
 static bool     hpa_hooks_hugify(void *ptr, size_t size, bool sync);
+static void     hpa_hooks_dehugify(void *ptr, size_t size);
 static void     hpa_hooks_curtime(nstime_t *r_nstime, bool first_reading);
 static uint64_t hpa_hooks_ms_since(nstime_t *past_nstime);
 static bool hpa_hooks_vectorized_purge(void *vec, size_t vlen, size_t nbytes);
 
 const hpa_hooks_t hpa_hooks_default = {&hpa_hooks_map, &hpa_hooks_unmap,
-    &hpa_hooks_purge, &hpa_hooks_hugify, &hpa_hooks_curtime,
-    &hpa_hooks_ms_since, &hpa_hooks_vectorized_purge};
+    &hpa_hooks_purge, &hpa_hooks_hugify, &hpa_hooks_dehugify,
+    &hpa_hooks_curtime, &hpa_hooks_ms_since, &hpa_hooks_vectorized_purge};
 
 static void *
 hpa_hooks_map(size_t size) {
@@ -60,6 +61,13 @@ hpa_hooks_hugify(void *ptr, size_t size, bool sync) {
 	return err;
 }
 
+static void
+hpa_hooks_dehugify(void *ptr, size_t size) {
+	bool err = pages_nohuge(ptr, size);
+	JE_USDT(hpa_dehugify, 3, size, ptr, err);
+	(void)err;
+}
+
 static void
 hpa_hooks_curtime(nstime_t *r_nstime, bool first_reading) {
 	if (first_reading) {
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index d62ac762..1fed8a80 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -389,6 +389,12 @@ defer_test_hugify(void *ptr, size_t size, bool sync) {
 	return false;
 }
 
+static size_t ndefer_dehugify_calls = 0;
+static void
+defer_test_dehugify(void *ptr, size_t size) {
+	++ndefer_dehugify_calls;
+}
+
 static nstime_t defer_curtime;
 static void
 defer_test_curtime(nstime_t *r_time, bool first_reading) {
@@ -408,6 +414,7 @@ TEST_BEGIN(test_defer_time) {
 	hooks.unmap = &defer_test_unmap;
 	hooks.purge = &defer_test_purge;
 	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
 	hooks.ms_since = &defer_test_ms_since;
 	hooks.vectorized_purge = &defer_vectorized_purge;
@@ -446,8 +453,10 @@ TEST_BEGIN(test_defer_time) {
 	hpa_shard_do_deferred_work(tsdn, shard);
 
 	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(1, ndefer_dehugify_calls, "Should have dehugified");
 	expect_zu_eq(1, ndefer_purge_calls, "Should have purged");
 	ndefer_hugify_calls = 0;
+	ndefer_dehugify_calls = 0;
 	ndefer_purge_calls = 0;
 
 	/*
@@ -468,6 +477,7 @@ TEST_BEGIN(test_defer_time) {
 	nstime_init2(&defer_curtime, 22, 0);
 	hpa_shard_do_deferred_work(tsdn, shard);
 	expect_zu_eq(1, ndefer_hugify_calls, "Failed to hugify");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Unexpected dehugify");
 	expect_zu_eq(0, ndefer_purge_calls, "Unexpected purge");
 	ndefer_hugify_calls = 0;
 
@@ -514,6 +524,7 @@ TEST_BEGIN(test_no_min_purge_interval) {
 	hooks.unmap = &defer_test_unmap;
 	hooks.purge = &defer_test_purge;
 	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
 	hooks.ms_since = &defer_test_ms_since;
 	hooks.vectorized_purge = &defer_vectorized_purge;
@@ -540,6 +551,7 @@ TEST_BEGIN(test_no_min_purge_interval) {
 	 * we have dirty pages.
 	 */
 	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
 	expect_zu_eq(1, ndefer_purge_calls, "Expect purge");
 	ndefer_purge_calls = 0;
 
@@ -555,6 +567,7 @@ TEST_BEGIN(test_min_purge_interval) {
 	hooks.unmap = &defer_test_unmap;
 	hooks.purge = &defer_test_purge;
 	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
 	hooks.ms_since = &defer_test_ms_since;
 	hooks.vectorized_purge = &defer_vectorized_purge;
@@ -580,6 +593,7 @@ TEST_BEGIN(test_min_purge_interval) {
 	 * opt.min_purge_interval_ms didn't pass yet.
 	 */
 	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
 	expect_zu_eq(0, ndefer_purge_calls, "Purged too early");
 
 	/* Minumum purge interval is set to 5 seconds in options. */
@@ -588,6 +602,7 @@ TEST_BEGIN(test_min_purge_interval) {
 
 	/* Now we should purge, but nothing else. */
 	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
 	expect_zu_eq(1, ndefer_purge_calls, "Expect purge");
 	ndefer_purge_calls = 0;
 
@@ -603,6 +618,7 @@ TEST_BEGIN(test_purge) {
 	hooks.unmap = &defer_test_unmap;
 	hooks.purge = &defer_test_purge;
 	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
 	hooks.ms_since = &defer_test_ms_since;
 	hooks.vectorized_purge = &defer_vectorized_purge;
@@ -632,6 +648,7 @@ TEST_BEGIN(test_purge) {
 	hpa_shard_do_deferred_work(tsdn, shard);
 
 	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
 	/*
 	 * Expect only 2 purges, because opt.dirty_mult is set to 0.25 and we still
 	 * have 5 active hugepages (1 / 5 = 0.2 < 0.25).
@@ -648,6 +665,7 @@ TEST_BEGIN(test_purge) {
 	 */
 	expect_zu_eq(5, ndefer_hugify_calls, "Expect hugification");
 	ndefer_hugify_calls = 0;
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
 	/*
 	 * We still have completely dirty hugepage, but we are below
 	 * opt.dirty_mult.
@@ -667,6 +685,7 @@ TEST_BEGIN(test_experimental_max_purge_nhp) {
 	hooks.unmap = &defer_test_unmap;
 	hooks.purge = &defer_test_purge;
 	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
 	hooks.ms_since = &defer_test_ms_since;
 	hooks.vectorized_purge = &defer_vectorized_purge;
@@ -697,6 +716,7 @@ TEST_BEGIN(test_experimental_max_purge_nhp) {
 	hpa_shard_do_deferred_work(tsdn, shard);
 
 	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
 	/*
 	 * Expect only one purge call, because opts.experimental_max_purge_nhp
 	 * is set to 1.
@@ -709,6 +729,7 @@ TEST_BEGIN(test_experimental_max_purge_nhp) {
 
 	expect_zu_eq(5, ndefer_hugify_calls, "Expect hugification");
 	ndefer_hugify_calls = 0;
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
 	/* We still above the limit for dirty pages. */
 	expect_zu_eq(1, ndefer_purge_calls, "Expect purge");
 	ndefer_purge_calls = 0;
@@ -717,6 +738,7 @@ TEST_BEGIN(test_experimental_max_purge_nhp) {
 	hpa_shard_do_deferred_work(tsdn, shard);
 
 	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
 	/* Finally, we are below the limit, no purges are expected. */
 	expect_zu_eq(0, ndefer_purge_calls, "Purged too early");
 
@@ -732,6 +754,7 @@ TEST_BEGIN(test_vectorized_opt_eq_zero) {
 	hooks.unmap = &defer_test_unmap;
 	hooks.purge = &defer_test_purge;
 	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
 	hooks.ms_since = &defer_test_ms_since;
 	hooks.vectorized_purge = &defer_vectorized_purge;
diff --git a/test/unit/hpa_vectorized_madvise.c b/test/unit/hpa_vectorized_madvise.c
index c2aa3b58..8df54d06 100644
--- a/test/unit/hpa_vectorized_madvise.c
+++ b/test/unit/hpa_vectorized_madvise.c
@@ -123,6 +123,12 @@ defer_test_hugify(void *ptr, size_t size, bool sync) {
 	return false;
 }
 
+static size_t ndefer_dehugify_calls = 0;
+static void
+defer_test_dehugify(void *ptr, size_t size) {
+	++ndefer_dehugify_calls;
+}
+
 static nstime_t defer_curtime;
 static void
 defer_test_curtime(nstime_t *r_time, bool first_reading) {
@@ -142,6 +148,7 @@ TEST_BEGIN(test_vectorized_failure_fallback) {
 	hooks.unmap = &defer_test_unmap;
 	hooks.purge = &defer_test_purge;
 	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
 	hooks.ms_since = &defer_test_ms_since;
 	hooks.vectorized_purge = &defer_vectorized_purge_fail;
@@ -181,6 +188,7 @@ TEST_BEGIN(test_more_regions_purged_from_one_page) {
 	hooks.unmap = &defer_test_unmap;
 	hooks.purge = &defer_test_purge;
 	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
 	hooks.ms_since = &defer_test_ms_since;
 	hooks.vectorized_purge = &defer_vectorized_purge;
@@ -223,6 +231,7 @@ TEST_BEGIN(test_more_regions_purged_from_one_page) {
 	 * we have dirty pages.
 	 */
 	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
 
 	/* We purge from 2 huge pages, each one 3 dirty continous segments.
 	 * For opt_process_madvise_max_batch = 2, that is
@@ -250,6 +259,7 @@ TEST_BEGIN(test_more_pages_than_batch_page_size) {
 	hooks.unmap = &defer_test_unmap;
 	hooks.purge = &defer_test_purge;
 	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
 	hooks.ms_since = &defer_test_ms_since;
 	hooks.vectorized_purge = &defer_vectorized_purge;
@@ -286,6 +296,7 @@ TEST_BEGIN(test_more_pages_than_batch_page_size) {
 	 * we have dirty pages.
 	 */
 	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
 
 	/* We have page batch size = 1.
 	 * we have 5 * HP active pages, 3 * HP dirty pages
diff --git a/test/unit/hpa_vectorized_madvise_large_batch.c b/test/unit/hpa_vectorized_madvise_large_batch.c
index c974500c..a5766620 100644
--- a/test/unit/hpa_vectorized_madvise_large_batch.c
+++ b/test/unit/hpa_vectorized_madvise_large_batch.c
@@ -140,6 +140,7 @@ TEST_BEGIN(test_vectorized_purge) {
 	hooks.unmap = &defer_test_unmap;
 	hooks.purge = &defer_test_purge;
 	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
 	hooks.ms_since = &defer_test_ms_since;
 	hooks.vectorized_purge = &defer_vectorized_purge;

From ace437d26ae9c2b27d08492135da52d211c53e01 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Tue, 30 Sep 2025 07:13:12 -0700
Subject: [PATCH 335/395] Running clang-format on two files

---
 src/jemalloc.c | 49 ++++++++++++++++++++++++-------------------------
 src/pages.c    | 14 +++++++-------
 2 files changed, 31 insertions(+), 32 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 5c77621c..a3f01b3c 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -172,8 +172,7 @@ unsigned ncpus;
 unsigned opt_debug_double_free_max_scan =
     SAFETY_CHECK_DOUBLE_FREE_MAX_SCAN_DEFAULT;
 
-size_t opt_calloc_madvise_threshold =
-    CALLOC_MADVISE_THRESHOLD_DEFAULT;
+size_t opt_calloc_madvise_threshold = CALLOC_MADVISE_THRESHOLD_DEFAULT;
 
 /* Protects arenas initialization. */
 static malloc_mutex_t arenas_lock;
@@ -261,8 +260,8 @@ _init_init_lock(void) {
 #		ifdef _MSC_VER
 #			pragma section(".CRT$XCU", read)
 JEMALLOC_SECTION(".CRT$XCU")
-JEMALLOC_ATTR(used) static const
-    void(WINAPI *init_init_lock)(void) = _init_init_lock;
+JEMALLOC_ATTR(used)
+static const void(WINAPI *init_init_lock)(void) = _init_init_lock;
 #		endif
 #	endif
 #else
@@ -2883,8 +2882,8 @@ malloc_default(size_t size) {
  */
 
 JEMALLOC_EXPORT
-    JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
-    JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1) je_malloc(size_t size) {
+JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
+JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1) je_malloc(size_t size) {
 	LOG("core.malloc.entry", "size: %zu", size);
 
 	void *ret = imalloc_fastpath(size, &malloc_default);
@@ -2935,9 +2934,9 @@ JEMALLOC_ATTR(nonnull(1))
 }
 
 JEMALLOC_EXPORT
-    JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
-    JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(2)
-        je_aligned_alloc(size_t alignment, size_t size) {
+JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
+JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(2)
+    je_aligned_alloc(size_t alignment, size_t size) {
 	void *ret;
 
 	static_opts_t  sopts;
@@ -2976,9 +2975,9 @@ JEMALLOC_EXPORT
 }
 
 JEMALLOC_EXPORT
-    JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
-    JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE2(1, 2)
-        je_calloc(size_t num, size_t size) {
+JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
+JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE2(1, 2)
+    je_calloc(size_t num, size_t size) {
 	void          *ret;
 	static_opts_t  sopts;
 	dynamic_opts_t dopts;
@@ -3186,8 +3185,8 @@ je_free_aligned_sized(void *ptr, size_t alignment, size_t size) {
 
 #ifdef JEMALLOC_OVERRIDE_MEMALIGN
 JEMALLOC_EXPORT
-    JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
-    JEMALLOC_ATTR(malloc) je_memalign(size_t alignment, size_t size) {
+JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
+JEMALLOC_ATTR(malloc) je_memalign(size_t alignment, size_t size) {
 	void          *ret;
 	static_opts_t  sopts;
 	dynamic_opts_t dopts;
@@ -3225,8 +3224,8 @@ JEMALLOC_EXPORT
 
 #ifdef JEMALLOC_OVERRIDE_VALLOC
 JEMALLOC_EXPORT
-    JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
-    JEMALLOC_ATTR(malloc) je_valloc(size_t size) {
+JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
+JEMALLOC_ATTR(malloc) je_valloc(size_t size) {
 	void *ret;
 
 	static_opts_t  sopts;
@@ -3262,8 +3261,8 @@ JEMALLOC_EXPORT
 
 #ifdef JEMALLOC_OVERRIDE_PVALLOC
 JEMALLOC_EXPORT
-    JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
-    JEMALLOC_ATTR(malloc) je_pvalloc(size_t size) {
+JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
+JEMALLOC_ATTR(malloc) je_pvalloc(size_t size) {
 	void *ret;
 
 	static_opts_t  sopts;
@@ -3457,9 +3456,9 @@ JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN smallocx_return_t
 #endif
 
 JEMALLOC_EXPORT
-    JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
-    JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1)
-        je_mallocx(size_t size, int flags) {
+JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
+JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1)
+    je_mallocx(size_t size, int flags) {
 	void          *ret;
 	static_opts_t  sopts;
 	dynamic_opts_t dopts;
@@ -3639,8 +3638,8 @@ label_oom:
 }
 
 JEMALLOC_EXPORT
-    JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
-    JEMALLOC_ALLOC_SIZE(2) je_rallocx(void *ptr, size_t size, int flags) {
+JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
+JEMALLOC_ALLOC_SIZE(2) je_rallocx(void *ptr, size_t size, int flags) {
 	LOG("core.rallocx.entry", "ptr: %p, size: %zu, flags: %d", ptr, size,
 	    flags);
 	void *ret = do_rallocx(ptr, size, flags, false);
@@ -3689,8 +3688,8 @@ do_realloc_nonnull_zero(void *ptr) {
 }
 
 JEMALLOC_EXPORT
-    JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
-    JEMALLOC_ALLOC_SIZE(2) je_realloc(void *ptr, size_t size) {
+JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
+JEMALLOC_ALLOC_SIZE(2) je_realloc(void *ptr, size_t size) {
 	LOG("core.realloc.entry", "ptr: %p, size: %zu\n", ptr, size);
 
 	if (likely(ptr != NULL && size != 0)) {
diff --git a/src/pages.c b/src/pages.c
index 44c57b28..bc1093a3 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -622,9 +622,9 @@ pages_dodump(void *addr, size_t size) {
 #	include <sys/mman.h>
 #	include <sys/syscall.h>
 
-#ifndef PIDFD_SELF
-#define PIDFD_SELF -10000
-#endif
+#	ifndef PIDFD_SELF
+#		define PIDFD_SELF -10000
+#	endif
 
 static atomic_b_t process_madvise_gate = ATOMIC_INIT(true);
 
@@ -659,15 +659,15 @@ pages_purge_process_madvise_impl(
 	 * TODO: remove this save/restore of errno after supporting errno
 	 * preservation for free() call properly.
 	 */
-	int saved_errno = get_errno();
+	int    saved_errno = get_errno();
 	size_t purged_bytes = (size_t)syscall(JE_SYS_PROCESS_MADVISE_NR,
 	    PIDFD_SELF, (struct iovec *)vec, vec_len, MADV_DONTNEED, 0);
-	if (purged_bytes == (size_t) -1) {
+	if (purged_bytes == (size_t)-1) {
 		if (errno == EPERM || errno == EINVAL || errno == ENOSYS
 		    || errno == EBADF) {
 			/* Process madvise not supported the way we need it. */
-			atomic_store_b(&process_madvise_gate, false,
-				       ATOMIC_RELAXED);
+			atomic_store_b(
+			    &process_madvise_gate, false, ATOMIC_RELAXED);
 		}
 		set_errno(saved_errno);
 	}

From a199278f3711bc0806e15e2f5f16004f3b287177 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Mon, 25 Aug 2025 13:23:07 -0700
Subject: [PATCH 336/395] [HPA] Add ability to start page as huge and more
 flexibility for purging

---
 include/jemalloc/internal/hpa.h               |   9 +
 include/jemalloc/internal/hpa_opts.h          | 103 ++-
 include/jemalloc/internal/hpdata.h            |  34 +-
 include/jemalloc/internal/nstime.h            |   4 +-
 include/jemalloc/internal/psset.h             |   8 +-
 src/ctl.c                                     |  12 +-
 src/hpa.c                                     | 211 ++++--
 src/hpa_hooks.c                               |   6 +
 src/hpdata.c                                  |  17 +-
 src/jemalloc.c                                |  44 ++
 src/nstime.c                                  |  17 +-
 src/pages.c                                   |  10 +
 src/psset.c                                   |  40 +-
 src/stats.c                                   |   3 +
 test/unit/hpa.c                               | 669 +++++++++++++++++-
 test/unit/hpa_vectorized_madvise.c            |   8 +-
 .../unit/hpa_vectorized_madvise_large_batch.c |   8 +-
 test/unit/hpdata.c                            |  10 +-
 test/unit/mallctl.c                           |   3 +
 test/unit/psset.c                             | 131 +++-
 20 files changed, 1231 insertions(+), 116 deletions(-)

diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index 7a6ba0b9..131bbb90 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -147,6 +147,15 @@ struct hpa_shard_s {
 	 * Last time we performed purge on this shard.
 	 */
 	nstime_t last_purge;
+
+	/*
+	 * Last time when we attempted work (purging or hugifying). If deferral
+	 * of the work is allowed (we have background thread), this is the time
+	 * when background thread checked if purging or hugifying needs to be
+	 * done. If deferral is not allowed, this is the time of (hpa_alloc or
+	 * hpa_dalloc) activity in the shard.
+	 */
+	nstime_t last_time_work_attempted;
 };
 
 bool hpa_hugepage_size_exceeds_limit(void);
diff --git a/include/jemalloc/internal/hpa_opts.h b/include/jemalloc/internal/hpa_opts.h
index 9e7f76ac..6747c2db 100644
--- a/include/jemalloc/internal/hpa_opts.h
+++ b/include/jemalloc/internal/hpa_opts.h
@@ -7,8 +7,60 @@
 /*
  * This file is morally part of hpa.h, but is split out for header-ordering
  * reasons.
+ *
+ * All of these hpa_shard_opts below are experimental. We are exploring more
+ * efficient packing, hugifying, and purging approaches to make efficient
+ * trade-offs between CPU, memory, latency, and usability. This means all of
+ * them are at the risk of being deprecated and corresponding configurations
+ * should be updated once the final version settles.
  */
 
+/*
+ * This enum controls how jemalloc hugifies/dehugifies pages.  Each style may be
+ * more suitable depending on deployment environments.
+ *
+ * hpa_hugify_style_none
+ * Using this means that jemalloc will not be hugifying or dehugifying pages,
+ * but will let the kernel make those decisions.  This style only makes sense
+ * when deploying on systems where THP are enabled in 'always' mode.  With this
+ * style, you most likely want to have no purging at all (dirty_mult=-1) or
+ * purge_threshold=HUGEPAGE bytes (2097152 for 2Mb page), although other
+ * thresholds may work well depending on kernel settings of your deployment
+ * targets.
+ *
+ * hpa_hugify_style_eager
+ * This style results in jemalloc giving hugepage advice, if needed, to
+ * anonymous memory immediately after it is mapped, so huge pages can be backing
+ * that memory at page-fault time.  This is usually more efficient than doing
+ * it later, and it allows us to benefit from the hugepages from the start.
+ * Same options for purging as for the style 'none' are good starting choices:
+ * no purging, or purge_threshold=HUGEPAGE, some min_purge_delay_ms that allows
+ * for page not to be purged quickly, etc.  This is a good choice if you can
+ * afford extra memory and your application gets performance increase from
+ * transparent hughepages.
+ *
+ * hpa_hugify_style_lazy
+ * This style is suitable when you purge more aggressively (you sacrifice CPU
+ * performance for less memory).  When this style is chosen, jemalloc will
+ * hugify once hugification_threshold is reached, and dehugify before purging.
+ * If the kernel is configured to use direct compaction you may experience some
+ * allocation latency when using this style.  The best is to measure what works
+ * better for your application needs, and in the target deployment environment.
+ * This is a good choice for apps that cannot afford a lot of memory regression,
+ * but would still like to benefit from backing certain memory regions with
+ * hugepages.
+ */
+enum hpa_hugify_style_e {
+	hpa_hugify_style_auto = 0,
+	hpa_hugify_style_none = 1,
+	hpa_hugify_style_eager = 2,
+	hpa_hugify_style_lazy = 3,
+	hpa_hugify_style_limit = hpa_hugify_style_lazy + 1
+};
+typedef enum hpa_hugify_style_e hpa_hugify_style_t;
+
+extern const char *const hpa_hugify_style_names[];
+
 typedef struct hpa_shard_opts_s hpa_shard_opts_t;
 struct hpa_shard_opts_s {
 	/*
@@ -46,7 +98,8 @@ struct hpa_shard_opts_s {
 	uint64_t hugify_delay_ms;
 
 	/*
-	 * Hugify pages synchronously.
+	 * Hugify pages synchronously (hugify will happen even if hugify_style
+	 * is not hpa_hugify_style_lazy).
 	 */
 	bool hugify_sync;
 
@@ -59,6 +112,46 @@ struct hpa_shard_opts_s {
 	 * Maximum number of hugepages to purge on each purging attempt.
 	 */
 	ssize_t experimental_max_purge_nhp;
+
+	/*
+	 * Minimum number of inactive bytes needed for a non-empty page to be
+	 * considered purgable.
+	 *
+	 * When the number of touched inactive bytes on non-empty hugepage is
+	 * >= purge_threshold, the page is purgable.  Empty pages are always
+	 * purgable.  Setting this to HUGEPAGE bytes would only purge empty
+	 * pages if using hugify_style_eager and the purges would be exactly
+	 * HUGEPAGE bytes.  Depending on your kernel settings, this may result
+	 * in better performance.
+	 *
+	 * Please note, when threshold is reached, we will purge all the dirty
+	 * bytes, and not just up to the threshold.  If this is PAGE bytes, then
+	 * all the pages that have any dirty bytes are purgable.  We treat
+	 * purgability constraint for purge_threshold as stronger than
+	 * dirty_mult, IOW, if no page meets purge_threshold, we will not purge
+	 * even if we are above dirty_mult.
+	 */
+	size_t purge_threshold;
+
+	/*
+	 * Minimum number of ms that needs to elapse between HP page becoming
+	 * eligible for purging and actually getting purged.
+	 *
+	 * Setting this to a larger number would give better chance of reusing
+	 * that memory.  Setting it to 0 means that page is eligible for purging
+	 * as soon as it meets the purge_threshold.  The clock resets when
+	 * purgability of the page changes (page goes from being non-purgable to
+	 * purgable).  When using eager style you probably want to allow for
+	 * some delay, to avoid purging the page too quickly and give it time to
+	 * be used.
+	 */
+	uint64_t min_purge_delay_ms;
+
+	/*
+	 * Style of hugification/dehugification (see comment at
+	 * hpa_hugify_style_t for options).
+	 */
+	hpa_hugify_style_t hugify_style;
 };
 
 /* clang-format off */
@@ -84,7 +177,13 @@ struct hpa_shard_opts_s {
 	/* min_purge_interval_ms */					\
 	5 * 1000,							\
 	/* experimental_max_purge_nhp */				\
-	-1								\
+	-1,      							\
+	/* size_t purge_threshold */					\
+	PAGE,								\
+	/* min_purge_delay_ms */             				\
+	0,  								\
+	/* hugify_style */                				\
+	hpa_hugify_style_lazy						\
 }
 /* clang-format on */
 
diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index 75550f9b..eb83c900 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -124,6 +124,12 @@ struct hpdata_s {
 
 	/* The touched pages (using the same definition as above). */
 	fb_group_t touched_pages[FB_NGROUPS(HUGEPAGE_PAGES)];
+
+	/* Time when this extent (hpdata) becomes eligible for purging */
+	nstime_t h_time_purge_allowed;
+
+	/* True if the extent was huge and empty last time when it was purged */
+	bool h_purged_when_empty_and_huge;
 };
 
 TYPED_LIST(hpdata_empty_list, hpdata_t, ql_link_empty)
@@ -284,17 +290,17 @@ hpdata_longest_free_range_set(hpdata_t *hpdata, size_t longest_free_range) {
 }
 
 static inline size_t
-hpdata_nactive_get(hpdata_t *hpdata) {
+hpdata_nactive_get(const hpdata_t *hpdata) {
 	return hpdata->h_nactive;
 }
 
 static inline size_t
-hpdata_ntouched_get(hpdata_t *hpdata) {
+hpdata_ntouched_get(const hpdata_t *hpdata) {
 	return hpdata->h_ntouched;
 }
 
 static inline size_t
-hpdata_ndirty_get(hpdata_t *hpdata) {
+hpdata_ndirty_get(const hpdata_t *hpdata) {
 	return hpdata->h_ntouched - hpdata->h_nactive;
 }
 
@@ -303,6 +309,26 @@ hpdata_nretained_get(hpdata_t *hpdata) {
 	return HUGEPAGE_PAGES - hpdata->h_ntouched;
 }
 
+static inline void
+hpdata_time_purge_allowed_set(hpdata_t *hpdata, const nstime_t *v) {
+	nstime_copy(&hpdata->h_time_purge_allowed, v);
+}
+
+static inline const nstime_t *
+hpdata_time_purge_allowed_get(const hpdata_t *hpdata) {
+	return &hpdata->h_time_purge_allowed;
+}
+
+static inline bool
+hpdata_purged_when_empty_and_huge_get(const hpdata_t *hpdata) {
+	return hpdata->h_purged_when_empty_and_huge;
+}
+
+static inline void
+hpdata_purged_when_empty_and_huge_set(hpdata_t *hpdata, bool v) {
+	hpdata->h_purged_when_empty_and_huge = v;
+}
+
 static inline void
 hpdata_assert_empty(hpdata_t *hpdata) {
 	assert(fb_empty(hpdata->active_pages, HUGEPAGE_PAGES));
@@ -360,7 +386,7 @@ hpdata_full(const hpdata_t *hpdata) {
 	return hpdata->h_nactive == HUGEPAGE_PAGES;
 }
 
-void hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age);
+void hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age, bool is_huge);
 
 /*
  * Given an hpdata which can serve an allocation request, pick and reserve an
diff --git a/include/jemalloc/internal/nstime.h b/include/jemalloc/internal/nstime.h
index a10b2de1..0848b9d0 100644
--- a/include/jemalloc/internal/nstime.h
+++ b/include/jemalloc/internal/nstime.h
@@ -40,6 +40,8 @@ void     nstime_isubtract(nstime_t *time, uint64_t subtrahend);
 void     nstime_imultiply(nstime_t *time, uint64_t multiplier);
 void     nstime_idivide(nstime_t *time, uint64_t divisor);
 uint64_t nstime_divide(const nstime_t *time, const nstime_t *divisor);
+uint64_t nstime_ns_between(const nstime_t *earlier, const nstime_t *later);
+uint64_t nstime_ms_between(const nstime_t *earlier, const nstime_t *later);
 uint64_t nstime_ns_since(const nstime_t *past);
 uint64_t nstime_ms_since(const nstime_t *past);
 
@@ -67,7 +69,7 @@ nstime_init_zero(nstime_t *time) {
 }
 
 JEMALLOC_ALWAYS_INLINE bool
-nstime_equals_zero(nstime_t *time) {
+nstime_equals_zero(const nstime_t *time) {
 	int diff = nstime_compare(time, &nstime_zero);
 	assert(diff >= 0);
 	return diff == 0;
diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
index 3fdecaed..f096e414 100644
--- a/include/jemalloc/internal/psset.h
+++ b/include/jemalloc/internal/psset.h
@@ -121,8 +121,12 @@ void psset_update_end(psset_t *psset, hpdata_t *ps);
 
 /* Analogous to the eset_fit; pick a hpdata to serve the request. */
 hpdata_t *psset_pick_alloc(psset_t *psset, size_t size);
-/* Pick one to purge. */
-hpdata_t *psset_pick_purge(psset_t *psset);
+/*
+ * Pick one to purge that is purgable before given time (inclusive).  If now
+ * is NULL then time is not considered.
+ */
+hpdata_t *psset_pick_purge(psset_t *psset, const nstime_t *now);
+
 /* Pick one to hugify. */
 hpdata_t *psset_pick_hugify(psset_t *psset);
 
diff --git a/src/ctl.c b/src/ctl.c
index a4c60ce0..85583bec 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -106,6 +106,9 @@ CTL_PROTO(opt_hpa_hugify_delay_ms)
 CTL_PROTO(opt_hpa_hugify_sync)
 CTL_PROTO(opt_hpa_min_purge_interval_ms)
 CTL_PROTO(opt_experimental_hpa_max_purge_nhp)
+CTL_PROTO(opt_hpa_purge_threshold)
+CTL_PROTO(opt_hpa_min_purge_delay_ms)
+CTL_PROTO(opt_hpa_hugify_style)
 CTL_PROTO(opt_hpa_dirty_mult)
 CTL_PROTO(opt_hpa_sec_nshards)
 CTL_PROTO(opt_hpa_sec_max_alloc)
@@ -469,6 +472,9 @@ static const ctl_named_node_t opt_node[] = {{NAME("abort"), CTL(opt_abort)},
     {NAME("hpa_min_purge_interval_ms"), CTL(opt_hpa_min_purge_interval_ms)},
     {NAME("experimental_hpa_max_purge_nhp"),
         CTL(opt_experimental_hpa_max_purge_nhp)},
+    {NAME("hpa_purge_threshold"), CTL(opt_hpa_purge_threshold)},
+    {NAME("hpa_min_purge_delay_ms"), CTL(opt_hpa_min_purge_delay_ms)},
+    {NAME("hpa_hugify_style"), CTL(opt_hpa_hugify_style)},
     {NAME("hpa_dirty_mult"), CTL(opt_hpa_dirty_mult)},
     {NAME("hpa_sec_nshards"), CTL(opt_hpa_sec_nshards)},
     {NAME("hpa_sec_max_alloc"), CTL(opt_hpa_sec_max_alloc)},
@@ -2137,7 +2143,11 @@ CTL_RO_NL_GEN(
     opt_hpa_min_purge_interval_ms, opt_hpa_opts.min_purge_interval_ms, uint64_t)
 CTL_RO_NL_GEN(opt_experimental_hpa_max_purge_nhp,
     opt_hpa_opts.experimental_max_purge_nhp, ssize_t)
-
+CTL_RO_NL_GEN(opt_hpa_purge_threshold, opt_hpa_opts.purge_threshold, size_t)
+CTL_RO_NL_GEN(
+    opt_hpa_min_purge_delay_ms, opt_hpa_opts.min_purge_delay_ms, uint64_t)
+CTL_RO_NL_GEN(opt_hpa_hugify_style,
+    hpa_hugify_style_names[opt_hpa_opts.hugify_style], const char *)
 /*
  * This will have to change before we publicly document this option; fxp_t and
  * its representation are internal implementation details.
diff --git a/src/hpa.c b/src/hpa.c
index 271b1af4..27db53a9 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -26,6 +26,8 @@ static void     hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self,
         edata_list_active_t *list, bool *deferred_work_generated);
 static uint64_t hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self);
 
+const char *const hpa_hugify_style_names[] = {"auto", "none", "eager", "lazy"};
+
 bool
 hpa_hugepage_size_exceeds_limit(void) {
 	return HUGEPAGE > HUGEPAGE_MAX_EXPECTED_SIZE;
@@ -97,7 +99,7 @@ hpa_alloc_ps(tsdn_t *tsdn, hpa_central_t *central) {
 
 static hpdata_t *
 hpa_central_extract(tsdn_t *tsdn, hpa_central_t *central, size_t size,
-    uint64_t age, bool *oom) {
+    uint64_t age, bool hugify_eager, bool *oom) {
 	/* Don't yet support big allocations; these should get filtered out. */
 	assert(size <= HUGEPAGE);
 	/*
@@ -120,7 +122,7 @@ hpa_central_extract(tsdn_t *tsdn, hpa_central_t *central, size_t size,
 			malloc_mutex_unlock(tsdn, &central->grow_mtx);
 			return NULL;
 		}
-		hpdata_init(ps, central->eden, age);
+		hpdata_init(ps, central->eden, age, hugify_eager);
 		central->eden = NULL;
 		central->eden_len = 0;
 		malloc_mutex_unlock(tsdn, &central->grow_mtx);
@@ -133,22 +135,20 @@ hpa_central_extract(tsdn_t *tsdn, hpa_central_t *central, size_t size,
 	 * allocate an edata_t for the new psset.
 	 */
 	if (central->eden == NULL) {
-		/*
-		 * During development, we're primarily concerned with systems
-		 * with overcommit.  Eventually, we should be more careful here.
-		 */
-		bool commit = true;
 		/* Allocate address space, bailing if we fail. */
-		void *new_eden = pages_map(
-		    NULL, HPA_EDEN_SIZE, HUGEPAGE, &commit);
+		void *new_eden = central->hooks.map(HPA_EDEN_SIZE);
 		if (new_eden == NULL) {
 			*oom = true;
 			malloc_mutex_unlock(tsdn, &central->grow_mtx);
 			return NULL;
 		}
+		if (hugify_eager) {
+			central->hooks.hugify(
+			    new_eden, HPA_EDEN_SIZE, /* sync */ false);
+		}
 		ps = hpa_alloc_ps(tsdn, central);
 		if (ps == NULL) {
-			pages_unmap(new_eden, HPA_EDEN_SIZE);
+			central->hooks.unmap(new_eden, HPA_EDEN_SIZE);
 			*oom = true;
 			malloc_mutex_unlock(tsdn, &central->grow_mtx);
 			return NULL;
@@ -170,7 +170,7 @@ hpa_central_extract(tsdn_t *tsdn, hpa_central_t *central, size_t size,
 	assert(central->eden_len % HUGEPAGE == 0);
 	assert(HUGEPAGE_ADDR2BASE(central->eden) == central->eden);
 
-	hpdata_init(ps, central->eden, age);
+	hpdata_init(ps, central->eden, age, hugify_eager);
 
 	char *eden_char = (char *)central->eden;
 	eden_char += HUGEPAGE;
@@ -213,6 +213,7 @@ hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap,
 
 	shard->npending_purge = 0;
 	nstime_init_zero(&shard->last_purge);
+	nstime_init_zero(&shard->last_time_work_attempted);
 
 	shard->stats.npurge_passes = 0;
 	shard->stats.npurges = 0;
@@ -274,6 +275,34 @@ hpa_shard_stats_merge(
 	malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 }
 
+static bool
+hpa_is_hugify_eager(hpa_shard_t *shard) {
+	return shard->opts.hugify_style == hpa_hugify_style_eager;
+}
+
+static bool
+hpa_is_hugify_lazy(hpa_shard_t *shard) {
+	/* When hugify_sync==true we also set/unset HG bit manually */
+	return shard->opts.hugify_style == hpa_hugify_style_lazy
+	    || shard->opts.hugify_sync;
+}
+
+static bool
+hpa_is_hugify_none(hpa_shard_t *shard) {
+	return shard->opts.hugify_style == hpa_hugify_style_none;
+}
+
+/*
+ * Experimentation has shown that when we are purging only HUGEPAGE ranges and
+ * hugifying eagerly (or thp enabled=always) we get huge pages more often.  This
+ * helps us have more realistic accounting.
+ */
+static bool
+hpa_should_assume_huge(hpa_shard_t *shard, const hpdata_t *ps) {
+	return (hpa_is_hugify_eager(shard) || hpa_is_hugify_none(shard))
+	    && hpdata_purged_when_empty_and_huge_get(ps);
+}
+
 static bool
 hpa_good_hugification_candidate(hpa_shard_t *shard, hpdata_t *ps) {
 	/*
@@ -285,6 +314,20 @@ hpa_good_hugification_candidate(hpa_shard_t *shard, hpdata_t *ps) {
 	    >= shard->opts.hugification_threshold;
 }
 
+static bool
+hpa_good_purge_candidate(hpa_shard_t *shard, hpdata_t *ps) {
+	if (shard->opts.dirty_mult == (fxp_t)-1) {
+		/* No purging. */
+		return false;
+	}
+	size_t ndirty = hpdata_ndirty_get(ps);
+	/* Empty pages are good candidate for purging. */
+	if (ndirty > 0 && hpdata_empty(ps)) {
+		return true;
+	}
+	return ndirty * PAGE >= shard->opts.purge_threshold;
+}
+
 static size_t
 hpa_adjusted_ndirty(tsdn_t *tsdn, hpa_shard_t *shard) {
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
@@ -316,6 +359,14 @@ hpa_hugify_blocked_by_ndirty(tsdn_t *tsdn, hpa_shard_t *shard) {
 static bool
 hpa_should_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+	/*
+	 * The page that is purgable may be delayed, but we just want to know
+	 * if there is a need for bg thread to wake up in the future.
+	 */
+	hpdata_t *ps = psset_pick_purge(&shard->psset, NULL);
+	if (ps == NULL) {
+		return false;
+	}
 	if (hpa_adjusted_ndirty(tsdn, shard) > hpa_ndirty_max(tsdn, shard)) {
 		return true;
 	}
@@ -325,6 +376,20 @@ hpa_should_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 	return false;
 }
 
+static void
+hpa_assume_huge(tsdn_t *tsdn, hpa_shard_t *shard, hpdata_t *ps) {
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+
+	assert(hpa_should_assume_huge(shard, ps));
+	if (hpdata_huge_get(ps) || hpdata_empty(ps)) {
+		return;
+	}
+
+	if (hpdata_ntouched_get(ps) != HUGEPAGE_PAGES) {
+		hpdata_hugify(ps);
+	}
+}
+
 static void
 hpa_update_purge_hugify_eligibility(
     tsdn_t *tsdn, hpa_shard_t *shard, hpdata_t *ps) {
@@ -356,13 +421,28 @@ hpa_update_purge_hugify_eligibility(
 	 * allocator's end at all; we just try to pack allocations in a
 	 * hugepage-friendly manner and let the OS hugify in the background.
 	 */
-	hpdata_purge_allowed_set(ps, hpdata_ndirty_get(ps) > 0);
-	if (hpa_good_hugification_candidate(shard, ps)
+	if (hpa_should_assume_huge(shard, ps)) {
+		/* Assume it is huge without the need to madvise */
+		hpa_assume_huge(tsdn, shard, ps);
+	}
+	if (hpa_is_hugify_lazy(shard)
+	    && hpa_good_hugification_candidate(shard, ps)
 	    && !hpdata_huge_get(ps)) {
 		nstime_t now;
 		shard->central->hooks.curtime(&now, /* first_reading */ true);
 		hpdata_allow_hugify(ps, now);
 	}
+	bool purgable = hpa_good_purge_candidate(shard, ps);
+	if (purgable && !hpdata_purge_allowed_get(ps)
+	    && (shard->opts.min_purge_delay_ms > 0)) {
+		nstime_t now;
+		uint64_t delayns = shard->opts.min_purge_delay_ms * 1000 * 1000;
+		shard->central->hooks.curtime(&now, /* first_reading */ true);
+		nstime_iadd(&now, delayns);
+		hpdata_time_purge_allowed_set(ps, &now);
+	}
+	hpdata_purge_allowed_set(ps, purgable);
+
 	/*
 	 * Once a hugepage has become eligible for hugification, we don't mark
 	 * it as ineligible just because it stops meeting the criteria (this
@@ -375,7 +455,7 @@ hpa_update_purge_hugify_eligibility(
 	 * empty; it definitely doesn't help there until the hugepage gets
 	 * reused, which is likely not for a while.
 	 */
-	if (hpdata_nactive_get(ps) == 0) {
+	if (hpdata_nactive_get(ps) == 0 && !hpa_should_assume_huge(shard, ps)) {
 		hpdata_disallow_hugify(ps);
 	}
 }
@@ -394,8 +474,7 @@ hpa_shard_has_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard) {
  * This value protects two things:
  *    1. Stack size
  *    2. Number of huge pages that are being purged in a batch as
- *       we do not allow allocations while making *madvise
- *       syscall.
+ *       we do not allow allocations while making madvise syscall.
  */
 #define HPA_PURGE_BATCH_MAX_DEFAULT 16
 
@@ -433,18 +512,16 @@ hpa_purge_actual_unlocked(
 	hpa_range_accum_init(&accum, vec, len);
 
 	for (size_t i = 0; i < batch_sz; ++i) {
-		hpdata_t *to_purge = batch[i].hp;
-
 		/* Actually do the purging, now that the lock is dropped. */
 		if (batch[i].dehugify) {
 			shard->central->hooks.dehugify(
-			    hpdata_addr_get(to_purge), HUGEPAGE);
+			    hpdata_addr_get(batch[i].hp), HUGEPAGE);
 		}
 		void  *purge_addr;
 		size_t purge_size;
 		size_t total_purged_on_one_hp = 0;
 		while (hpdata_purge_next(
-		    to_purge, &batch[i].state, &purge_addr, &purge_size)) {
+		    batch[i].hp, &batch[i].state, &purge_addr, &purge_size)) {
 			total_purged_on_one_hp += purge_size;
 			assert(total_purged_on_one_hp <= HUGEPAGE);
 			hpa_range_accum_add(
@@ -454,14 +531,23 @@ hpa_purge_actual_unlocked(
 	hpa_range_accum_finish(&accum, shard);
 }
 
-/* Prepare purge of one page. Return num of dirty regular pages on it
+static inline bool
+hpa_needs_dehugify(hpa_shard_t *shard, const hpdata_t *ps) {
+	return hpa_is_hugify_lazy(shard) && hpdata_huge_get(ps)
+	    && !hpdata_empty(ps);
+}
+
+/* Prepare purge of one page. Return number of dirty regular pages on it
  * Return 0 if no purgable huge page is found
  *
  * If there was a page to purge its purge state is initialized
  */
 static inline size_t
-hpa_purge_start_hp(hpa_purge_batch_t *b, psset_t *psset) {
-	hpdata_t *to_purge = psset_pick_purge(psset);
+hpa_purge_start_hp(hpa_purge_batch_t *b, hpa_shard_t *shard) {
+	psset_t  *psset = &shard->psset;
+	hpdata_t *to_purge = (shard->opts.min_purge_delay_ms > 0)
+	    ? psset_pick_purge(psset, &shard->last_time_work_attempted)
+	    : psset_pick_purge(psset, NULL);
 	if (to_purge == NULL) {
 		return 0;
 	}
@@ -493,7 +579,9 @@ hpa_purge_start_hp(hpa_purge_batch_t *b, psset_t *psset) {
 	b->item_cnt++;
 	hp_item->hp = to_purge;
 	/* Gather all the metadata we'll need during the purge. */
-	hp_item->dehugify = hpdata_huge_get(hp_item->hp);
+	hp_item->dehugify = hpa_needs_dehugify(shard, hp_item->hp);
+	hpdata_purged_when_empty_and_huge_set(hp_item->hp,
+	    hpdata_huge_get(hp_item->hp) && hpdata_empty(hp_item->hp));
 	size_t nranges;
 	size_t ndirty = hpdata_purge_begin(
 	    hp_item->hp, &hp_item->state, &nranges);
@@ -513,7 +601,11 @@ hpa_purge_finish_hp(
 	}
 	/* The hpdata updates. */
 	psset_update_begin(&shard->psset, hp_item->hp);
-	if (hp_item->dehugify) {
+	if (hpdata_huge_get(hp_item->hp)) {
+		/*
+		 * Even when dehugify is not explicitly called, the page is
+		 * assumed to be non-huge after purge.
+		 */
 		hpdata_dehugify(hp_item->hp);
 	}
 	hpdata_purge_end(hp_item->hp, &hp_item->state);
@@ -569,8 +661,7 @@ hpa_purge(tsdn_t *tsdn, hpa_shard_t *shard, size_t max_hp) {
 		assert(hpa_batch_empty(&batch));
 		while (
 		    !hpa_batch_full(&batch) && hpa_should_purge(tsdn, shard)) {
-			size_t ndirty = hpa_purge_start_hp(
-			    &batch, &shard->psset);
+			size_t ndirty = hpa_purge_start_hp(&batch, shard);
 			if (ndirty == 0) {
 				break;
 			}
@@ -633,25 +724,33 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
 	hpdata_disallow_hugify(to_hugify);
 	assert(hpdata_alloc_allowed_get(to_hugify));
 	psset_update_end(&shard->psset, to_hugify);
-
-	malloc_mutex_unlock(tsdn, &shard->mtx);
-
-	bool err = shard->central->hooks.hugify(
-	    hpdata_addr_get(to_hugify), HUGEPAGE, shard->opts.hugify_sync);
-
-	malloc_mutex_lock(tsdn, &shard->mtx);
-	shard->stats.nhugifies++;
-	if (err) {
-		/*
-		 * When asynchronous hugification is used
-		 * (shard->opts.hugify_sync option is false), we are not
-		 * expecting to get here, unless something went terrible wrong.
-		 * Because underlying syscall is only setting kernel flag for
-		 * memory range (actual hugification happens asynchronously
-		 * and we are not getting any feedback about its outcome), we
-		 * expect syscall to be successful all the time.
-		 */
-		shard->stats.nhugify_failures++;
+	/*
+	 * Without lazy hugification, user relies on eagerly setting HG bit, or
+	 * leaving everything up to the kernel (ex: thp enabled=always).  We
+	 * will still pretend that call succeeds to keep our accounting close to
+	 * what user believes is the truth on the target system, but we won't
+	 * update nhugifies stat as system call is not being made.
+	 */
+	if (hpa_is_hugify_lazy(shard)) {
+		malloc_mutex_unlock(tsdn, &shard->mtx);
+		bool err = shard->central->hooks.hugify(
+		    hpdata_addr_get(to_hugify), HUGEPAGE,
+		    shard->opts.hugify_sync);
+		malloc_mutex_lock(tsdn, &shard->mtx);
+		shard->stats.nhugifies++;
+		if (err) {
+			/*
+			 * When asynchronous hugification is used
+			 * (shard->opts.hugify_sync option is false), we are not
+			 * expecting to get here, unless something went terrible
+			 * wrong. Because underlying syscall is only setting
+			 * kernel flag for memory range (actual hugification
+			 * happens asynchronously and we are not getting any
+			 * feedback about its outcome), we expect syscall to be
+			 * successful all the time.
+			 */
+			shard->stats.nhugify_failures++;
+		}
 	}
 
 	psset_update_begin(&shard->psset, to_hugify);
@@ -666,11 +765,18 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
 static bool
 hpa_min_purge_interval_passed(tsdn_t *tsdn, hpa_shard_t *shard) {
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
-	uint64_t since_last_purge_ms = shard->central->hooks.ms_since(
-	    &shard->last_purge);
+	uint64_t since_last_purge_ms = nstime_ms_between(
+	    &shard->last_purge, &shard->last_time_work_attempted);
 	return since_last_purge_ms >= shard->opts.min_purge_interval_ms;
 }
 
+static inline void
+hpa_update_time_work_attempted(tsdn_t *tsdn, hpa_shard_t *shard) {
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+	shard->central->hooks.curtime(&shard->last_time_work_attempted,
+	    /* first_reading */ false);
+}
+
 /*
  * Execution of deferred work is forced if it's triggered by an explicit
  * hpa_shard_do_deferred_work() call.
@@ -682,6 +788,7 @@ hpa_shard_maybe_do_deferred_work(
 	if (!forced && shard->opts.deferral_allowed) {
 		return;
 	}
+	hpa_update_time_work_attempted(tsdn, shard);
 
 	/*
 	 * If we're on a background thread, do work so long as there's work to
@@ -753,8 +860,8 @@ hpa_try_alloc_one_no_grow(
 		 * If the pageslab used to be empty, treat it as though it's
 		 * brand new for fragmentation-avoidance purposes; what we're
 		 * trying to approximate is the age of the allocations *in* that
-		 * pageslab, and the allocations in the new pageslab are
-		 * definitionally the youngest in this hpa shard.
+		 * pageslab, and the allocations in the new pageslab are by
+		 * definition the youngest in this hpa shard.
 		 */
 		hpdata_age_set(ps, shard->age_counter++);
 	}
@@ -861,8 +968,8 @@ hpa_alloc_batch_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 	 * deallocations (and allocations of smaller sizes) may still succeed
 	 * while we're doing this potentially expensive system call.
 	 */
-	hpdata_t *ps = hpa_central_extract(
-	    tsdn, shard->central, size, shard->age_counter++, &oom);
+	hpdata_t *ps = hpa_central_extract(tsdn, shard->central, size,
+	    shard->age_counter++, hpa_is_hugify_eager(shard), &oom);
 	if (ps == NULL) {
 		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 		return nsuccess;
diff --git a/src/hpa_hooks.c b/src/hpa_hooks.c
index 14005ae0..2ec7029d 100644
--- a/src/hpa_hooks.c
+++ b/src/hpa_hooks.c
@@ -19,7 +19,13 @@ const hpa_hooks_t hpa_hooks_default = {&hpa_hooks_map, &hpa_hooks_unmap,
 
 static void *
 hpa_hooks_map(size_t size) {
+	/*
+	 * During development, we're primarily concerned with systems
+	 * that overcommit.  Eventually, we should be more careful here.
+	 */
+
 	bool commit = true;
+	assert((size & HUGEPAGE_MASK) == 0);
 	void *ret = pages_map(NULL, size, HUGEPAGE, &commit);
 	JE_USDT(hpa_map, 2, size, ret);
 	return ret;
diff --git a/src/hpdata.c b/src/hpdata.c
index f9c8f4fa..e17d9ecf 100644
--- a/src/hpdata.c
+++ b/src/hpdata.c
@@ -17,11 +17,10 @@ hpdata_age_comp(const hpdata_t *a, const hpdata_t *b) {
 
 ph_gen(, hpdata_age_heap, hpdata_t, age_link, hpdata_age_comp)
 
-void
-hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
+    void hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age, bool is_huge) {
 	hpdata_addr_set(hpdata, addr);
 	hpdata_age_set(hpdata, age);
-	hpdata->h_huge = false;
+	hpdata->h_huge = is_huge;
 	hpdata->h_alloc_allowed = true;
 	hpdata->h_in_psset_alloc_container = false;
 	hpdata->h_purge_allowed = false;
@@ -34,8 +33,16 @@ hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
 	hpdata_longest_free_range_set(hpdata, HUGEPAGE_PAGES);
 	hpdata->h_nactive = 0;
 	fb_init(hpdata->active_pages, HUGEPAGE_PAGES);
-	hpdata->h_ntouched = 0;
-	fb_init(hpdata->touched_pages, HUGEPAGE_PAGES);
+	if (is_huge) {
+		fb_set_range(
+		    hpdata->touched_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES);
+		hpdata->h_ntouched = HUGEPAGE_PAGES;
+	} else {
+		fb_init(hpdata->touched_pages, HUGEPAGE_PAGES);
+		hpdata->h_ntouched = 0;
+	}
+	nstime_init_zero(&hpdata->h_time_purge_allowed);
+	hpdata->h_purged_when_empty_and_huge = false;
 
 	hpdata_assert_consistent(hpdata);
 }
diff --git a/src/jemalloc.c b/src/jemalloc.c
index a3f01b3c..72216508 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1619,6 +1619,50 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    opt_hpa_opts.experimental_max_purge_nhp,
 			    "experimental_hpa_max_purge_nhp", -1, SSIZE_MAX);
 
+			/*
+			 * Accept either a ratio-based or an exact purge
+			 * threshold.
+			 */
+			CONF_HANDLE_SIZE_T(opt_hpa_opts.purge_threshold,
+			    "hpa_purge_threshold", PAGE, HUGEPAGE,
+			    CONF_CHECK_MIN, CONF_CHECK_MAX, true);
+			if (CONF_MATCH("hpa_purge_threshold_ratio")) {
+				fxp_t ratio;
+				char *end;
+				bool  err = fxp_parse(&ratio, v, &end);
+				if (err || (size_t)(end - v) != vlen
+				    || ratio > FXP_INIT_INT(1)) {
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
+				} else {
+					opt_hpa_opts.purge_threshold =
+					    fxp_mul_frac(HUGEPAGE, ratio);
+				}
+				CONF_CONTINUE;
+			}
+
+			CONF_HANDLE_UINT64_T(opt_hpa_opts.min_purge_delay_ms,
+			    "hpa_min_purge_delay_ms", 0, UINT64_MAX,
+			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, false);
+
+			if (strncmp("hpa_hugify_style", k, klen) == 0) {
+				bool match = false;
+				for (int m = 0; m < hpa_hugify_style_limit; m++) {
+					if (strncmp(hpa_hugify_style_names[m],
+					        v, vlen)
+					    == 0) {
+						opt_hpa_opts.hugify_style = m;
+						match = true;
+						break;
+					}
+				}
+				if (!match) {
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
+				}
+				CONF_CONTINUE;
+			}
+
 			if (CONF_MATCH("hpa_dirty_mult")) {
 				if (CONF_MATCH_VALUE("-1")) {
 					opt_hpa_opts.dirty_mult = (fxp_t)-1;
diff --git a/src/nstime.c b/src/nstime.c
index ee2ddc51..0dfbeda1 100644
--- a/src/nstime.c
+++ b/src/nstime.c
@@ -160,6 +160,19 @@ nstime_divide(const nstime_t *time, const nstime_t *divisor) {
 	return time->ns / divisor->ns;
 }
 
+uint64_t
+nstime_ns_between(const nstime_t *earlier, const nstime_t *later) {
+	nstime_assert_initialized(earlier);
+	nstime_assert_initialized(later);
+	assert(nstime_compare(later, earlier) >= 0);
+	return later->ns - earlier->ns;
+}
+
+uint64_t
+nstime_ms_between(const nstime_t *earlier, const nstime_t *later) {
+	return nstime_ns_between(earlier, later) / MILLION;
+}
+
 /* Returns time since *past in nanoseconds, w/o updating *past. */
 uint64_t
 nstime_ns_since(const nstime_t *past) {
@@ -168,9 +181,7 @@ nstime_ns_since(const nstime_t *past) {
 	nstime_t now;
 	nstime_copy(&now, past);
 	nstime_update(&now);
-
-	assert(nstime_compare(&now, past) >= 0);
-	return now.ns - past->ns;
+	return nstime_ns_between(past, &now);
 }
 
 /* Returns time since *past in milliseconds, w/o updating *past. */
diff --git a/src/pages.c b/src/pages.c
index bc1093a3..000b87fe 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -833,9 +833,19 @@ init_thp_state(void) {
 	} else {
 		goto label_error;
 	}
+	if (opt_hpa_opts.hugify_style == hpa_hugify_style_auto) {
+		if (init_system_thp_mode == thp_mode_default) {
+			opt_hpa_opts.hugify_style = hpa_hugify_style_lazy;
+		} else {
+			opt_hpa_opts.hugify_style = hpa_hugify_style_none;
+		}
+	}
 	return;
 #elif defined(JEMALLOC_HAVE_MEMCNTL)
 	init_system_thp_mode = thp_mode_default;
+	if (opt_hpa_opts.hugify_style == hpa_hugify_style_auto) {
+		opt_hpa_opts.hugify_style = hpa_hugify_style_eager;
+	}
 	return;
 #endif
 label_error:
diff --git a/src/psset.c b/src/psset.c
index 509df064..a8a9615d 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -390,17 +390,37 @@ psset_pick_alloc(psset_t *psset, size_t size) {
 }
 
 hpdata_t *
-psset_pick_purge(psset_t *psset) {
-	ssize_t ind_ssz = fb_fls(
-	    psset->purge_bitmap, PSSET_NPURGE_LISTS, PSSET_NPURGE_LISTS - 1);
-	if (ind_ssz < 0) {
-		return NULL;
+psset_pick_purge(psset_t *psset, const nstime_t *now) {
+	size_t max_bit = PSSET_NPURGE_LISTS - 1;
+	while (1) {
+		ssize_t ind_ssz = fb_fls(
+		    psset->purge_bitmap, PSSET_NPURGE_LISTS, max_bit);
+		if (ind_ssz < 0) {
+			break;
+		}
+		pszind_t ind = (pszind_t)ind_ssz;
+		assert(ind < PSSET_NPURGE_LISTS);
+		hpdata_t *ps = hpdata_purge_list_first(&psset->to_purge[ind]);
+		assert(ps != NULL);
+		if (now == NULL) {
+			return ps;
+		}
+		/*
+		 * We only check the first page (it had least recent hpa_alloc
+		 * or hpa_dalloc). It is possible that some page in the list
+		 * would meet the time, but we only guarantee the min delay. If
+		 * we want to get the one that changed the state to purgable
+		 * the earliest, we would change the list into a heap ordered by
+		 * time.  We will use benchmark to make a decision.
+		 */
+		const nstime_t *tm_allowed = hpdata_time_purge_allowed_get(ps);
+		if (nstime_compare(tm_allowed, now) <= 0) {
+			return ps;
+		}
+		max_bit--;
 	}
-	pszind_t ind = (pszind_t)ind_ssz;
-	assert(ind < PSSET_NPURGE_LISTS);
-	hpdata_t *ps = hpdata_purge_list_first(&psset->to_purge[ind]);
-	assert(ps != NULL);
-	return ps;
+	/* No page is ready yet */
+	return NULL;
 }
 
 hpdata_t *
diff --git a/src/stats.c b/src/stats.c
index a8a574ac..ea7a4e2e 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1618,6 +1618,9 @@ stats_general_print(emitter_t *emitter) {
 			    "opt.hpa_dirty_mult", emitter_type_string, &bufp);
 		}
 	}
+	OPT_WRITE_SIZE_T("hpa_purge_threshold")
+	OPT_WRITE_UINT64("hpa_min_purge_delay_ms")
+	OPT_WRITE_CHAR_P("hpa_hugify_style")
 	OPT_WRITE_SIZE_T("hpa_sec_nshards")
 	OPT_WRITE_SIZE_T("hpa_sec_max_alloc")
 	OPT_WRITE_SIZE_T("hpa_sec_max_bytes")
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index 1fed8a80..df2c9d96 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -37,7 +37,13 @@ static hpa_shard_opts_t test_hpa_shard_opts_default = {
     /* min_purge_interval_ms */
     5 * 1000,
     /* experimental_max_purge_nhp */
-    -1};
+    -1,
+    /* purge_threshold */
+    1,
+    /* min_purge_delay_ms */
+    0,
+    /* hugify_style */
+    hpa_hugify_style_lazy};
 
 static hpa_shard_opts_t test_hpa_shard_opts_purge = {
     /* slab_max_alloc */
@@ -55,7 +61,37 @@ static hpa_shard_opts_t test_hpa_shard_opts_purge = {
     /* min_purge_interval_ms */
     5 * 1000,
     /* experimental_max_purge_nhp */
-    -1};
+    -1,
+    /* purge_threshold */
+    1,
+    /* min_purge_delay_ms */
+    0,
+    /* hugify_style */
+    hpa_hugify_style_lazy};
+
+static hpa_shard_opts_t test_hpa_shard_opts_aggressive = {
+    /* slab_max_alloc */
+    HUGEPAGE,
+    /* hugification_threshold */
+    0.9 * HUGEPAGE,
+    /* dirty_mult */
+    FXP_INIT_PERCENT(11),
+    /* deferral_allowed */
+    true,
+    /* hugify_delay_ms */
+    0,
+    /* hugify_sync */
+    false,
+    /* min_purge_interval_ms */
+    5,
+    /* experimental_max_purge_nhp */
+    -1,
+    /* purge_threshold */
+    HUGEPAGE - 5 * PAGE,
+    /* min_purge_delay_ms */
+    10,
+    /* hugify_style */
+    hpa_hugify_style_eager};
 
 static hpa_shard_t *
 create_test_data(const hpa_hooks_t *hooks, hpa_shard_opts_t *opts) {
@@ -365,10 +401,11 @@ defer_test_unmap(void *ptr, size_t size) {
 }
 
 static size_t ndefer_purge_calls = 0;
+static size_t npurge_size = 0;
 static void
 defer_test_purge(void *ptr, size_t size) {
 	(void)ptr;
-	(void)size;
+	npurge_size = size;
 	++ndefer_purge_calls;
 }
 
@@ -783,6 +820,625 @@ TEST_BEGIN(test_vectorized_opt_eq_zero) {
 }
 TEST_END
 
+TEST_BEGIN(test_starts_huge) {
+	test_skip_if(!hpa_supported() || (opt_process_madvise_max_batch != 0)
+	    || !config_stats);
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_aggressive;
+	opts.deferral_allowed = true;
+	opts.min_purge_delay_ms = 10;
+	opts.min_purge_interval_ms = 0;
+
+	defer_vectorized_purge_called = false;
+	ndefer_purge_calls = 0;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+	bool         deferred_work_generated = false;
+	nstime_init2(&defer_curtime, 100, 0);
+
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	enum { NALLOCS = 2 * HUGEPAGE_PAGES };
+	edata_t *edatas[NALLOCS];
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	/* Deallocate 75%  */
+	int pages_to_deallocate = (int)(0.75 * NALLOCS);
+	for (int i = 0; i < pages_to_deallocate; i++) {
+		pai_dalloc(
+		    tsdn, &shard->pai, edatas[i], &deferred_work_generated);
+	}
+
+	/*
+	 * While there is enough to purge as we have one empty page and that
+	 * one meets the threshold,  we need to respect the delay, so no purging
+	 * should happen yet.
+	 */
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_zu_eq(0, ndefer_purge_calls, "Purged too early, delay==10ms");
+
+	nstime_iadd(&defer_curtime, opts.min_purge_delay_ms * 1000 * 1000);
+	/* Now, enough time has passed, so we expect to purge */
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_zu_eq(1, ndefer_purge_calls, "Expected purge");
+
+	/*
+	 * We purged one hugepage, so we expect to have one non-full page and it
+	 * should have half of the other dirty.
+	 */
+	psset_stats_t *stat = &shard->psset.stats;
+	expect_zu_eq(
+	    stat->empty_slabs[1].npageslabs, 0, "Expected zero huge slabs");
+	expect_zu_eq(stat->empty_slabs[0].npageslabs, 1, "Expected 1 nh slab");
+	expect_zu_eq(stat->full_slabs[0].npageslabs, 0, "");
+	expect_zu_eq(stat->full_slabs[1].npageslabs, 0, "");
+	expect_zu_eq(
+	    stat->merged.ndirty, HUGEPAGE_PAGES / 2, "One HP half dirty");
+
+	/*
+	 * We now allocate one more PAGE than a half the hugepage because we
+	 * want to make sure that one more hugepage is needed.
+	 */
+	deferred_work_generated = false;
+	const size_t HALF = HUGEPAGE_PAGES / 2;
+	edatas[1] = pai_alloc(tsdn, &shard->pai, PAGE * (HALF + 1), PAGE, false,
+	    false, false, &deferred_work_generated);
+	expect_ptr_not_null(edatas[1], "Unexpected null edata");
+	expect_false(deferred_work_generated, "No page is purgable");
+
+	expect_zu_eq(stat->empty_slabs[1].npageslabs, 0, "");
+	expect_zu_eq(stat->empty_slabs[0].npageslabs, 0, "");
+	expect_zu_eq(stat->full_slabs[0].npageslabs, 0, "");
+	expect_zu_eq(stat->full_slabs[1].npageslabs, 0, "");
+
+	/*
+	 * We expect that all inactive bytes on the second page are counted as
+	 * dirty (this is because the page was huge and empty when we purged
+	 * it, thus, it is assumed to come back as huge, thus all the bytes are
+	 * counted as touched).
+	 */
+	expect_zu_eq(stat->merged.ndirty, 2 * HALF - 1,
+	    "2nd page is huge because it was empty and huge when purged");
+	expect_zu_eq(stat->merged.nactive, HALF + (HALF + 1), "1st + 2nd");
+
+	nstime_iadd(&defer_curtime, opts.min_purge_delay_ms * 1000 * 1000);
+	pai_dalloc(tsdn, &shard->pai, edatas[1], &deferred_work_generated);
+	expect_true(deferred_work_generated, "");
+	expect_zu_eq(stat->merged.ndirty, 3 * HALF, "1st + 2nd");
+
+	/*
+	 * Deallocate last allocation and confirm that page is empty again, and
+	 * once new minimum delay is reached, page should be purged.
+	 */
+	ndefer_purge_calls = 0;
+	nstime_iadd(&defer_curtime, opts.min_purge_delay_ms * 1000 * 1000);
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_zu_eq(1, ndefer_purge_calls, "");
+	expect_zu_eq(stat->merged.ndirty, HALF, "2nd cleared as it was empty");
+	ndefer_purge_calls = 0;
+
+	/* Deallocate all the rest, but leave only two active */
+	for (int i = pages_to_deallocate; i < NALLOCS - 2; ++i) {
+		pai_dalloc(
+		    tsdn, &shard->pai, edatas[i], &deferred_work_generated);
+	}
+
+	/*
+	 * With prior pai_dalloc our last page becomes purgable, however we
+	 * still want to respect the delay.  Thus, it is not time to purge yet.
+	 */
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_true(deferred_work_generated, "Above limit, but not time yet");
+	expect_zu_eq(0, ndefer_purge_calls, "");
+
+	/*
+	 * Finally, we move the time ahead, and we confirm that purge happens
+	 * and that we have exactly two active base pages and none dirty.
+	 */
+	nstime_iadd(&defer_curtime, opts.min_purge_delay_ms * 1000 * 1000);
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_true(deferred_work_generated, "Above limit, but not time yet");
+	expect_zu_eq(1, ndefer_purge_calls, "");
+	expect_zu_eq(stat->merged.ndirty, 0, "Purged all");
+	expect_zu_eq(stat->merged.nactive, 2, "1st only");
+
+	ndefer_purge_calls = 0;
+	destroy_test_data(shard);
+}
+TEST_END
+
+TEST_BEGIN(test_start_huge_purge_empty_only) {
+	test_skip_if(!hpa_supported() || (opt_process_madvise_max_batch != 0)
+	    || !config_stats);
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_aggressive;
+	opts.deferral_allowed = true;
+	opts.purge_threshold = HUGEPAGE;
+	opts.min_purge_delay_ms = 0;
+	opts.hugify_style = hpa_hugify_style_eager;
+	opts.min_purge_interval_ms = 0;
+
+	ndefer_purge_calls = 0;
+	npurge_size = 0;
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+	bool         deferred_work_generated = false;
+	nstime_init(&defer_curtime, 10 * 1000 * 1000);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	enum { NALLOCS = 2 * HUGEPAGE_PAGES };
+	edata_t *edatas[NALLOCS];
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	/* Deallocate all from the first and one PAGE from the second HP. */
+	for (int i = 0; i < NALLOCS / 2 + 1; i++) {
+		pai_dalloc(
+		    tsdn, &shard->pai, edatas[i], &deferred_work_generated);
+	}
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_true(deferred_work_generated, "");
+	expect_zu_eq(1, ndefer_purge_calls, "Should purge, delay==0ms");
+	expect_zu_eq(HUGEPAGE, npurge_size, "Purge whole folio");
+	expect_zu_eq(shard->psset.stats.merged.ndirty, 1, "");
+	expect_zu_eq(shard->psset.stats.merged.nactive, HUGEPAGE_PAGES - 1, "");
+
+	ndefer_purge_calls = 0;
+	npurge_size = 0;
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_zu_eq(0, ndefer_purge_calls, "Should not purge anything");
+
+	/* Allocate and free 2*PAGE so that it spills into second page again */
+	edatas[0] = pai_alloc(tsdn, &shard->pai, 2 * PAGE, PAGE, false, false,
+	    false, &deferred_work_generated);
+	pai_dalloc(tsdn, &shard->pai, edatas[0], &deferred_work_generated);
+	expect_true(deferred_work_generated, "");
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_zu_eq(1, ndefer_purge_calls, "Should purge, delay==0ms");
+	expect_zu_eq(HUGEPAGE, npurge_size, "Purge whole folio");
+
+	ndefer_purge_calls = 0;
+	destroy_test_data(shard);
+}
+TEST_END
+
+TEST_BEGIN(test_assume_huge_purge_fully) {
+	test_skip_if(!hpa_supported() || (opt_process_madvise_max_batch != 0)
+	    || !config_stats);
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_aggressive;
+	opts.deferral_allowed = true;
+	opts.purge_threshold = PAGE;
+	opts.hugification_threshold = HUGEPAGE;
+	opts.min_purge_delay_ms = 0;
+	opts.min_purge_interval_ms = 0;
+	opts.hugify_style = hpa_hugify_style_eager;
+	opts.dirty_mult = FXP_INIT_PERCENT(1);
+
+	ndefer_purge_calls = 0;
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+	bool         deferred_work_generated = false;
+	nstime_init(&defer_curtime, 10 * 1000 * 1000);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	enum { NALLOCS = HUGEPAGE_PAGES };
+	edata_t *edatas[NALLOCS];
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	/* Deallocate all */
+	for (int i = 0; i < NALLOCS; i++) {
+		pai_dalloc(
+		    tsdn, &shard->pai, edatas[i], &deferred_work_generated);
+	}
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_true(deferred_work_generated, "");
+	expect_zu_eq(1, ndefer_purge_calls, "Should purge, delay==0ms");
+
+	/* Stats should say no active */
+	expect_zu_eq(shard->psset.stats.merged.nactive, 0, "");
+	expect_zu_eq(
+	    shard->psset.stats.empty_slabs[0].npageslabs, 1, "Non huge");
+	npurge_size = 0;
+	edatas[0] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false, false,
+	    false, &deferred_work_generated);
+	expect_ptr_not_null(edatas[0], "Unexpected null edata");
+	expect_zu_eq(shard->psset.stats.merged.nactive, 1, "");
+	expect_zu_eq(shard->psset.stats.slabs[1].npageslabs, 1, "Huge nonfull");
+	pai_dalloc(tsdn, &shard->pai, edatas[0], &deferred_work_generated);
+	expect_true(deferred_work_generated, "");
+	ndefer_purge_calls = 0;
+	npurge_size = 0;
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_zu_eq(1, ndefer_purge_calls, "Should purge, delay==0ms");
+	expect_zu_eq(HUGEPAGE, npurge_size, "Should purge full folio");
+
+	/* Now allocate all, free 10%, alloc 5%, assert non-huge */
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	int ten_pct = NALLOCS / 10;
+	for (int i = 0; i < ten_pct; i++) {
+		pai_dalloc(
+		    tsdn, &shard->pai, edatas[i], &deferred_work_generated);
+	}
+	ndefer_purge_calls = 0;
+	npurge_size = 0;
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_zu_eq(1, ndefer_purge_calls, "Should purge, delay==0ms");
+	expect_zu_eq(
+	    ten_pct * PAGE, npurge_size, "Should purge 10 percent of pages");
+
+	for (int i = 0; i < ten_pct / 2; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	expect_zu_eq(
+	    shard->psset.stats.slabs[0].npageslabs, 1, "Nonhuge nonfull");
+	expect_zu_eq(shard->psset.stats.merged.ndirty, 0, "No dirty");
+
+	npurge_size = 0;
+	ndefer_purge_calls = 0;
+	destroy_test_data(shard);
+}
+TEST_END
+
+TEST_BEGIN(test_eager_with_purge_threshold) {
+	test_skip_if(!hpa_supported() || (opt_process_madvise_max_batch != 0));
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
+
+	const size_t     THRESHOLD = 10;
+	hpa_shard_opts_t opts = test_hpa_shard_opts_aggressive;
+	opts.deferral_allowed = true;
+	opts.purge_threshold = THRESHOLD * PAGE;
+	opts.min_purge_delay_ms = 0;
+	opts.hugify_style = hpa_hugify_style_eager;
+	opts.dirty_mult = FXP_INIT_PERCENT(0);
+
+	ndefer_purge_calls = 0;
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+	bool         deferred_work_generated = false;
+	nstime_init(&defer_curtime, 10 * 1000 * 1000);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	enum { NALLOCS = HUGEPAGE_PAGES };
+	edata_t *edatas[NALLOCS];
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	/* Deallocate less then threshold PAGEs. */
+	for (size_t i = 0; i < THRESHOLD - 1; i++) {
+		pai_dalloc(
+		    tsdn, &shard->pai, edatas[i], &deferred_work_generated);
+	}
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_false(deferred_work_generated, "No page is purgable");
+	expect_zu_eq(0, ndefer_purge_calls, "Should not purge yet");
+	/* Deallocate one more page to meet the threshold */
+	pai_dalloc(
+	    tsdn, &shard->pai, edatas[THRESHOLD - 1], &deferred_work_generated);
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_zu_eq(1, ndefer_purge_calls, "Should purge");
+	expect_zu_eq(shard->psset.stats.merged.ndirty, 0, "");
+
+	ndefer_purge_calls = 0;
+	destroy_test_data(shard);
+}
+TEST_END
+
+TEST_BEGIN(test_delay_when_not_allowed_deferral) {
+	test_skip_if(!hpa_supported() || (opt_process_madvise_max_batch != 0));
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
+
+	const uint64_t   DELAY_NS = 100 * 1000 * 1000;
+	hpa_shard_opts_t opts = test_hpa_shard_opts_aggressive;
+	opts.deferral_allowed = false;
+	opts.purge_threshold = HUGEPAGE - 2 * PAGE;
+	opts.min_purge_delay_ms = DELAY_NS / (1000 * 1000);
+	opts.hugify_style = hpa_hugify_style_lazy;
+	opts.min_purge_interval_ms = 0;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+	bool         deferred_work_generated = false;
+	nstime_init2(&defer_curtime, 100, 0);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	enum { NALLOCS = HUGEPAGE_PAGES };
+	edata_t *edatas[NALLOCS];
+	ndefer_purge_calls = 0;
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	/* Deallocate all */
+	for (int i = 0; i < NALLOCS; i++) {
+		pai_dalloc(
+		    tsdn, &shard->pai, edatas[i], &deferred_work_generated);
+	}
+	/* curtime = 100.0s */
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_true(deferred_work_generated, "");
+	expect_zu_eq(0, ndefer_purge_calls, "Too early");
+
+	nstime_iadd(&defer_curtime, DELAY_NS - 1);
+	/* This activity will take the curtime=100.1 and reset purgability */
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	/* Dealloc all but 2 pages, purgable delay_ns later*/
+	for (int i = 0; i < NALLOCS - 2; i++) {
+		pai_dalloc(
+		    tsdn, &shard->pai, edatas[i], &deferred_work_generated);
+	}
+
+	nstime_iadd(&defer_curtime, DELAY_NS);
+	pai_dalloc(
+	    tsdn, &shard->pai, edatas[NALLOCS - 1], &deferred_work_generated);
+	expect_true(ndefer_purge_calls > 0, "Should have purged");
+
+	ndefer_purge_calls = 0;
+	destroy_test_data(shard);
+}
+TEST_END
+
+TEST_BEGIN(test_deferred_until_time) {
+	test_skip_if(!hpa_supported() || (opt_process_madvise_max_batch != 0));
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_aggressive;
+	opts.deferral_allowed = true;
+	opts.purge_threshold = PAGE;
+	opts.min_purge_delay_ms = 1000;
+	opts.hugification_threshold = HUGEPAGE / 2;
+	opts.dirty_mult = FXP_INIT_PERCENT(10);
+	opts.hugify_style = hpa_hugify_style_none;
+	opts.min_purge_interval_ms = 500;
+	opts.hugify_delay_ms = 3000;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+	bool         deferred_work_generated = false;
+	/* Current time = 10ms */
+	nstime_init(&defer_curtime, 10 * 1000 * 1000);
+
+	/* Allocate one huge page */
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	enum { NALLOCS = HUGEPAGE_PAGES };
+	edata_t *edatas[NALLOCS];
+	ndefer_purge_calls = 0;
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	/* Deallocate 25% */
+	for (int i = 0; i < NALLOCS / 4; i++) {
+		pai_dalloc(
+		    tsdn, &shard->pai, edatas[i], &deferred_work_generated);
+	}
+	expect_true(deferred_work_generated, "We should hugify and purge");
+
+	/* Current time = 300ms, purge_eligible at 300ms + 1000ms */
+	nstime_init(&defer_curtime, 300UL * 1000 * 1000);
+	for (int i = NALLOCS / 4; i < NALLOCS; i++) {
+		pai_dalloc(
+		    tsdn, &shard->pai, edatas[i], &deferred_work_generated);
+	}
+	expect_true(deferred_work_generated, "Purge work generated");
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_zu_eq(0, ndefer_purge_calls, "not time for purging yet");
+
+	/* Current time = 900ms, purge_eligible at 1300ms */
+	nstime_init(&defer_curtime, 900UL * 1000 * 1000);
+	uint64_t until_ns = pai_time_until_deferred_work(tsdn, &shard->pai);
+	expect_u64_eq(until_ns, BACKGROUND_THREAD_DEFERRED_MIN,
+	    "First pass did not happen");
+
+	/* Fake that first pass happened more than min_purge_interval_ago */
+	nstime_init(&shard->last_purge, 350UL * 1000 * 1000);
+	shard->stats.npurge_passes = 1;
+	until_ns = pai_time_until_deferred_work(tsdn, &shard->pai);
+	expect_u64_eq(until_ns, BACKGROUND_THREAD_DEFERRED_MIN,
+	    "No need to heck anything it is more than interval");
+
+	nstime_init(&shard->last_purge, 900UL * 1000 * 1000);
+	nstime_init(&defer_curtime, 1000UL * 1000 * 1000);
+	/* Next purge expected at 900ms + min_purge_interval = 1400ms */
+	uint64_t expected_ms = 1400 - 1000;
+	until_ns = pai_time_until_deferred_work(tsdn, &shard->pai);
+	expect_u64_eq(expected_ms, until_ns / (1000 * 1000), "Next in 400ms");
+	destroy_test_data(shard);
+}
+TEST_END
+
+TEST_BEGIN(test_eager_no_hugify_on_threshold) {
+	test_skip_if(!hpa_supported() || (opt_process_madvise_max_batch != 0)
+	    || !config_stats);
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_aggressive;
+	opts.deferral_allowed = true;
+	opts.purge_threshold = PAGE;
+	opts.min_purge_delay_ms = 0;
+	opts.hugification_threshold = HUGEPAGE * 0.9;
+	opts.dirty_mult = FXP_INIT_PERCENT(10);
+	opts.hugify_style = hpa_hugify_style_eager;
+	opts.min_purge_interval_ms = 0;
+	opts.hugify_delay_ms = 0;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+	bool         deferred_work_generated = false;
+	/* Current time = 10ms */
+	nstime_init(&defer_curtime, 10 * 1000 * 1000);
+
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	/* First allocation makes the page huge */
+	enum { NALLOCS = HUGEPAGE_PAGES };
+	edata_t *edatas[NALLOCS];
+	ndefer_purge_calls = 0;
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	ndefer_hugify_calls = 0;
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_zu_eq(ndefer_hugify_calls, 0, "No hugify needed - eager");
+	expect_zu_eq(shard->psset.stats.full_slabs[1].npageslabs, 1,
+	    "Page should be full-huge");
+
+	/* Deallocate 25% */
+	for (int i = 0; i < NALLOCS / 4; i++) {
+		pai_dalloc(
+		    tsdn, &shard->pai, edatas[i], &deferred_work_generated);
+	}
+	expect_true(deferred_work_generated, "purge is needed");
+	ndefer_purge_calls = 0;
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_zu_eq(ndefer_hugify_calls, 0, "No hugify needed - eager");
+	expect_zu_eq(ndefer_purge_calls, 1, "Purge should have happened");
+
+	/* Allocate 20% again, so that we are above hugification threshold */
+	ndefer_purge_calls = 0;
+	nstime_iadd(&defer_curtime, 800UL * 1000 * 1000);
+	for (int i = 0; i < NALLOCS / 4 - 1; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_zu_eq(0, ndefer_purge_calls, "no purging needed");
+	expect_zu_eq(ndefer_hugify_calls, 0, "no hugify - eager");
+	destroy_test_data(shard);
+}
+TEST_END
+
+TEST_BEGIN(test_hpa_hugify_style_none_huge_no_syscall) {
+	test_skip_if(!hpa_supported() || (opt_process_madvise_max_batch != 0));
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_aggressive;
+	opts.deferral_allowed = true;
+	opts.purge_threshold = PAGE;
+	opts.min_purge_delay_ms = 0;
+	opts.hugification_threshold = HUGEPAGE * 0.25;
+	opts.dirty_mult = FXP_INIT_PERCENT(10);
+	opts.hugify_style = hpa_hugify_style_none;
+	opts.min_purge_interval_ms = 0;
+	opts.hugify_delay_ms = 0;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+	bool         deferred_work_generated = false;
+	/* Current time = 10ms */
+	nstime_init(&defer_curtime, 10 * 1000 * 1000);
+
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	/* First allocation makes the page huge */
+	enum { NALLOCS = HUGEPAGE_PAGES };
+	edata_t *edatas[NALLOCS];
+	ndefer_purge_calls = 0;
+	for (int i = 0; i < NALLOCS / 2; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	hpdata_t *ps = psset_pick_alloc(&shard->psset, PAGE);
+	expect_false(hpdata_huge_get(ps), "Page should be non-huge");
+
+	ndefer_hugify_calls = 0;
+	ndefer_purge_calls = 0;
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_zu_eq(ndefer_hugify_calls, 0, "Hugify none, no syscall");
+	ps = psset_pick_alloc(&shard->psset, PAGE);
+	expect_true(ps, "Page should be huge");
+
+	destroy_test_data(shard);
+}
+TEST_END
+
 int
 main(void) {
 	/*
@@ -801,5 +1457,10 @@ main(void) {
 	    test_alloc_dalloc_batch, test_defer_time,
 	    test_purge_no_infinite_loop, test_no_min_purge_interval,
 	    test_min_purge_interval, test_purge,
-	    test_experimental_max_purge_nhp, test_vectorized_opt_eq_zero);
+	    test_experimental_max_purge_nhp, test_vectorized_opt_eq_zero,
+	    test_starts_huge, test_start_huge_purge_empty_only,
+	    test_assume_huge_purge_fully, test_eager_with_purge_threshold,
+	    test_delay_when_not_allowed_deferral, test_deferred_until_time,
+	    test_eager_no_hugify_on_threshold,
+	    test_hpa_hugify_style_none_huge_no_syscall);
 }
diff --git a/test/unit/hpa_vectorized_madvise.c b/test/unit/hpa_vectorized_madvise.c
index 8df54d06..c66811e1 100644
--- a/test/unit/hpa_vectorized_madvise.c
+++ b/test/unit/hpa_vectorized_madvise.c
@@ -37,7 +37,13 @@ static hpa_shard_opts_t test_hpa_shard_opts_default = {
     /* min_purge_interval_ms */
     5 * 1000,
     /* experimental_max_purge_nhp */
-    -1};
+    -1,
+    /* purge_threshold */
+    1,
+    /* purge_delay_ms */
+    0,
+    /* hugify_style */
+    hpa_hugify_style_lazy};
 
 static hpa_shard_t *
 create_test_data(const hpa_hooks_t *hooks, hpa_shard_opts_t *opts) {
diff --git a/test/unit/hpa_vectorized_madvise_large_batch.c b/test/unit/hpa_vectorized_madvise_large_batch.c
index a5766620..8e7be7c0 100644
--- a/test/unit/hpa_vectorized_madvise_large_batch.c
+++ b/test/unit/hpa_vectorized_madvise_large_batch.c
@@ -37,7 +37,13 @@ static hpa_shard_opts_t test_hpa_shard_opts_default = {
     /* min_purge_interval_ms */
     5 * 1000,
     /* experimental_max_purge_nhp */
-    -1};
+    -1,
+    /* purge_threshold */
+    1,
+    /* min_purge_delay_ms */
+    0,
+    /* hugify_style */
+    hpa_hugify_style_lazy};
 
 static hpa_shard_t *
 create_test_data(const hpa_hooks_t *hooks, hpa_shard_opts_t *opts) {
diff --git a/test/unit/hpdata.c b/test/unit/hpdata.c
index 2329f065..ac45d697 100644
--- a/test/unit/hpdata.c
+++ b/test/unit/hpdata.c
@@ -5,7 +5,7 @@
 
 TEST_BEGIN(test_reserve_alloc) {
 	hpdata_t hpdata;
-	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
+	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE, /* is_huge */ false);
 
 	/* Allocating a page at a time, we should do first fit. */
 	for (size_t i = 0; i < HUGEPAGE_PAGES; i++) {
@@ -57,7 +57,7 @@ TEST_END
 
 TEST_BEGIN(test_purge_simple) {
 	hpdata_t hpdata;
-	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
+	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE, /* is_huge */ false);
 
 	void *alloc = hpdata_reserve_alloc(&hpdata, HUGEPAGE_PAGES / 2 * PAGE);
 	expect_ptr_eq(alloc, HPDATA_ADDR, "");
@@ -101,7 +101,7 @@ TEST_END
  */
 TEST_BEGIN(test_purge_intervening_dalloc) {
 	hpdata_t hpdata;
-	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
+	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE, /* is_huge */ false);
 
 	/* Allocate the first 3/4 of the pages. */
 	void *alloc = hpdata_reserve_alloc(
@@ -164,7 +164,7 @@ TEST_BEGIN(test_purge_over_retained) {
 	size_t purge_size;
 
 	hpdata_t hpdata;
-	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
+	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE, /* is_huge */ false);
 
 	/* Allocate the first 3/4 of the pages. */
 	void *alloc = hpdata_reserve_alloc(
@@ -238,7 +238,7 @@ TEST_END
 
 TEST_BEGIN(test_hugify) {
 	hpdata_t hpdata;
-	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
+	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE, /* is_huge */ false);
 
 	void *alloc = hpdata_reserve_alloc(&hpdata, HUGEPAGE / 2);
 	expect_ptr_eq(alloc, HPDATA_ADDR, "");
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index ac7506cf..d1974e0f 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -313,6 +313,9 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(size_t, hpa_sec_bytes_after_flush, always);
 	TEST_MALLCTL_OPT(size_t, hpa_sec_batch_fill_extra, always);
 	TEST_MALLCTL_OPT(ssize_t, experimental_hpa_max_purge_nhp, always);
+	TEST_MALLCTL_OPT(size_t, hpa_purge_threshold, always);
+	TEST_MALLCTL_OPT(uint64_t, hpa_min_purge_delay_ms, always);
+	TEST_MALLCTL_OPT(const char *, hpa_hugify_style, always);
 	TEST_MALLCTL_OPT(unsigned, narenas, always);
 	TEST_MALLCTL_OPT(const char *, percpu_arena, always);
 	TEST_MALLCTL_OPT(size_t, oversize_threshold, always);
diff --git a/test/unit/psset.c b/test/unit/psset.c
index 73a9835a..3ce8e976 100644
--- a/test/unit/psset.c
+++ b/test/unit/psset.c
@@ -124,7 +124,8 @@ TEST_BEGIN(test_empty) {
 	test_skip_if(hpa_hugepage_size_exceeds_limit());
 	bool     err;
 	hpdata_t pageslab;
-	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
+	bool     is_huge = false;
+	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, is_huge);
 
 	edata_t alloc;
 	edata_init_test(&alloc);
@@ -141,9 +142,10 @@ TEST_END
 TEST_BEGIN(test_fill) {
 	test_skip_if(hpa_hugepage_size_exceeds_limit());
 	bool err;
+	bool is_huge = false;
 
 	hpdata_t pageslab;
-	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
+	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, is_huge);
 
 	edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
 
@@ -179,7 +181,8 @@ TEST_BEGIN(test_reuse) {
 	hpdata_t *ps;
 
 	hpdata_t pageslab;
-	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
+	bool     is_huge = false;
+	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, is_huge);
 
 	edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
 
@@ -274,7 +277,8 @@ TEST_BEGIN(test_evict) {
 	hpdata_t *ps;
 
 	hpdata_t pageslab;
-	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
+	bool     is_huge = false;
+	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, is_huge);
 
 	edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
 
@@ -311,9 +315,10 @@ TEST_BEGIN(test_multi_pageslab) {
 	hpdata_t *ps;
 
 	hpdata_t pageslab[2];
-	hpdata_init(&pageslab[0], PAGESLAB_ADDR, PAGESLAB_AGE);
+	bool     is_huge = false;
+	hpdata_init(&pageslab[0], PAGESLAB_ADDR, PAGESLAB_AGE, is_huge);
 	hpdata_init(&pageslab[1], (void *)((uintptr_t)PAGESLAB_ADDR + HUGEPAGE),
-	    PAGESLAB_AGE + 1);
+	    PAGESLAB_AGE + 1, is_huge);
 
 	edata_t *alloc[2];
 	alloc[0] = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
@@ -376,7 +381,8 @@ TEST_END
 
 TEST_BEGIN(test_stats_merged) {
 	hpdata_t pageslab;
-	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
+	bool     is_huge = false;
+	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, is_huge);
 
 	edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
 
@@ -442,7 +448,8 @@ TEST_BEGIN(test_stats_huge) {
 	test_skip_if(hpa_hugepage_size_exceeds_limit());
 
 	hpdata_t pageslab;
-	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
+	bool     is_huge = false;
+	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, is_huge);
 
 	edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
 
@@ -570,7 +577,8 @@ TEST_BEGIN(test_stats_fullness) {
 	bool err;
 
 	hpdata_t pageslab;
-	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
+	bool     is_huge = false;
+	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, is_huge);
 
 	edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
 
@@ -620,13 +628,15 @@ static void
 init_test_pageslabs(psset_t *psset, hpdata_t *pageslab,
     hpdata_t *worse_pageslab, edata_t *alloc, edata_t *worse_alloc) {
 	bool err;
+	bool is_huge = false;
 
-	hpdata_init(pageslab, (void *)(10 * HUGEPAGE), PAGESLAB_AGE);
+	hpdata_init(pageslab, (void *)(10 * HUGEPAGE), PAGESLAB_AGE, is_huge);
 	/*
 	 * This pageslab would be better from an address-first-fit POV, but
 	 * worse from an age POV.
 	 */
-	hpdata_init(worse_pageslab, (void *)(9 * HUGEPAGE), PAGESLAB_AGE + 1);
+	hpdata_init(
+	    worse_pageslab, (void *)(9 * HUGEPAGE), PAGESLAB_AGE + 1, is_huge);
 
 	psset_init(psset);
 
@@ -763,14 +773,15 @@ TEST_BEGIN(test_purge_prefers_nonhuge) {
 	hpdata_t  hpdata_nonhuge[NHP];
 	uintptr_t nonhuge_begin = (uintptr_t)&hpdata_nonhuge[0];
 	uintptr_t nonhuge_end = (uintptr_t)&hpdata_nonhuge[NHP];
+	bool      is_huge = false;
 
 	for (size_t i = 0; i < NHP; i++) {
-		hpdata_init(
-		    &hpdata_huge[i], (void *)((10 + i) * HUGEPAGE), 123 + i);
+		hpdata_init(&hpdata_huge[i], (void *)((10 + i) * HUGEPAGE),
+		    123 + i, is_huge);
 		psset_insert(&psset, &hpdata_huge[i]);
 
 		hpdata_init(&hpdata_nonhuge[i],
-		    (void *)((10 + NHP + i) * HUGEPAGE), 456 + i);
+		    (void *)((10 + NHP + i) * HUGEPAGE), 456 + i, is_huge);
 		psset_insert(&psset, &hpdata_nonhuge[i]);
 	}
 	for (int i = 0; i < 2 * NHP; i++) {
@@ -802,7 +813,7 @@ TEST_BEGIN(test_purge_prefers_nonhuge) {
 	 * further.
 	 */
 	for (int i = 0; i < NHP; i++) {
-		hpdata = psset_pick_purge(&psset);
+		hpdata = psset_pick_purge(&psset, NULL);
 		assert_true(nonhuge_begin <= (uintptr_t)hpdata
 		        && (uintptr_t)hpdata < nonhuge_end,
 		    "");
@@ -812,7 +823,7 @@ TEST_BEGIN(test_purge_prefers_nonhuge) {
 		psset_update_end(&psset, hpdata);
 	}
 	for (int i = 0; i < NHP; i++) {
-		hpdata = psset_pick_purge(&psset);
+		hpdata = psset_pick_purge(&psset, NULL);
 		expect_true(huge_begin <= (uintptr_t)hpdata
 		        && (uintptr_t)hpdata < huge_end,
 		    "");
@@ -825,6 +836,72 @@ TEST_BEGIN(test_purge_prefers_nonhuge) {
 }
 TEST_END
 
+TEST_BEGIN(test_purge_timing) {
+	test_skip_if(hpa_hugepage_size_exceeds_limit());
+	void *ptr;
+
+	psset_t psset;
+	psset_init(&psset);
+
+	hpdata_t hpdata_empty_nh;
+	hpdata_t hpdata_empty_huge;
+	hpdata_t hpdata_nonempty;
+
+	nstime_t       basetime, now, empty_nh_tm, empty_huge_tm, nonempty_tm;
+	const uint64_t BASE_SEC = 100;
+	nstime_init2(&basetime, BASE_SEC, 0);
+
+	/* Create and add to psset */
+	hpdata_init(&hpdata_empty_nh, (void *)(9 * HUGEPAGE), 102, false);
+	psset_insert(&psset, &hpdata_empty_nh);
+	hpdata_init(&hpdata_empty_huge, (void *)(10 * HUGEPAGE), 123, true);
+	psset_insert(&psset, &hpdata_empty_huge);
+	hpdata_init(&hpdata_nonempty, (void *)(11 * HUGEPAGE), 456, false);
+	psset_insert(&psset, &hpdata_nonempty);
+
+	psset_update_begin(&psset, &hpdata_empty_nh);
+	ptr = hpdata_reserve_alloc(&hpdata_empty_nh, PAGE);
+	expect_ptr_eq(hpdata_addr_get(&hpdata_empty_nh), ptr, "");
+	hpdata_unreserve(&hpdata_empty_nh, ptr, PAGE);
+	hpdata_purge_allowed_set(&hpdata_empty_nh, true);
+	nstime_init2(&empty_nh_tm, BASE_SEC + 100, 0);
+	hpdata_time_purge_allowed_set(&hpdata_empty_nh, &empty_nh_tm);
+	psset_update_end(&psset, &hpdata_empty_nh);
+
+	psset_update_begin(&psset, &hpdata_empty_huge);
+	ptr = hpdata_reserve_alloc(&hpdata_empty_huge, PAGE);
+	expect_ptr_eq(hpdata_addr_get(&hpdata_empty_huge), ptr, "");
+	hpdata_unreserve(&hpdata_empty_huge, ptr, PAGE);
+	nstime_init2(&empty_huge_tm, BASE_SEC + 110, 0);
+	hpdata_time_purge_allowed_set(&hpdata_empty_huge, &empty_huge_tm);
+	hpdata_purge_allowed_set(&hpdata_empty_huge, true);
+	psset_update_end(&psset, &hpdata_empty_huge);
+
+	psset_update_begin(&psset, &hpdata_nonempty);
+	ptr = hpdata_reserve_alloc(&hpdata_nonempty, 10 * PAGE);
+	expect_ptr_eq(hpdata_addr_get(&hpdata_nonempty), ptr, "");
+	hpdata_unreserve(&hpdata_nonempty, ptr, 9 * PAGE);
+	hpdata_purge_allowed_set(&hpdata_nonempty, true);
+	nstime_init2(&nonempty_tm, BASE_SEC + 80, 0);
+	hpdata_time_purge_allowed_set(&hpdata_nonempty, &nonempty_tm);
+	psset_update_end(&psset, &hpdata_nonempty);
+
+	/* The best to purge with no time restriction is the huge one */
+	hpdata_t *ps = psset_pick_purge(&psset, NULL);
+	expect_ptr_eq(&hpdata_empty_huge, ps, "Without tick, pick huge");
+
+	/* However, only the one eligible for purging can be picked */
+	nstime_init2(&now, BASE_SEC + 90, 0);
+	ps = psset_pick_purge(&psset, &now);
+	expect_ptr_eq(&hpdata_nonempty, ps, "Only non empty purgable");
+
+	/* When all eligible, huge empty is the best */
+	nstime_init2(&now, BASE_SEC + 110, 0);
+	ps = psset_pick_purge(&psset, &now);
+	expect_ptr_eq(&hpdata_empty_huge, ps, "Huge empty is the best");
+}
+TEST_END
+
 TEST_BEGIN(test_purge_prefers_empty) {
 	test_skip_if(hpa_hugepage_size_exceeds_limit());
 	void *ptr;
@@ -834,9 +911,10 @@ TEST_BEGIN(test_purge_prefers_empty) {
 
 	hpdata_t hpdata_empty;
 	hpdata_t hpdata_nonempty;
-	hpdata_init(&hpdata_empty, (void *)(10 * HUGEPAGE), 123);
+	bool     is_huge = false;
+	hpdata_init(&hpdata_empty, (void *)(10 * HUGEPAGE), 123, is_huge);
 	psset_insert(&psset, &hpdata_empty);
-	hpdata_init(&hpdata_nonempty, (void *)(11 * HUGEPAGE), 456);
+	hpdata_init(&hpdata_nonempty, (void *)(11 * HUGEPAGE), 456, is_huge);
 	psset_insert(&psset, &hpdata_nonempty);
 
 	psset_update_begin(&psset, &hpdata_empty);
@@ -857,7 +935,7 @@ TEST_BEGIN(test_purge_prefers_empty) {
 	 * The nonempty slab has 9 dirty pages, while the empty one has only 1.
 	 * We should still pick the empty one for purging.
 	 */
-	hpdata_t *to_purge = psset_pick_purge(&psset);
+	hpdata_t *to_purge = psset_pick_purge(&psset, NULL);
 	expect_ptr_eq(&hpdata_empty, to_purge, "");
 }
 TEST_END
@@ -876,13 +954,16 @@ TEST_BEGIN(test_purge_prefers_empty_huge) {
 
 	uintptr_t cur_addr = 100 * HUGEPAGE;
 	uint64_t  cur_age = 123;
+	bool      is_huge = false;
 	for (int i = 0; i < NHP; i++) {
-		hpdata_init(&hpdata_huge[i], (void *)cur_addr, cur_age);
+		hpdata_init(
+		    &hpdata_huge[i], (void *)cur_addr, cur_age, is_huge);
 		cur_addr += HUGEPAGE;
 		cur_age++;
 		psset_insert(&psset, &hpdata_huge[i]);
 
-		hpdata_init(&hpdata_nonhuge[i], (void *)cur_addr, cur_age);
+		hpdata_init(
+		    &hpdata_nonhuge[i], (void *)cur_addr, cur_age, is_huge);
 		cur_addr += HUGEPAGE;
 		cur_age++;
 		psset_insert(&psset, &hpdata_nonhuge[i]);
@@ -917,14 +998,14 @@ TEST_BEGIN(test_purge_prefers_empty_huge) {
 	 * any of the non-huge ones for purging.
 	 */
 	for (int i = 0; i < NHP; i++) {
-		hpdata_t *to_purge = psset_pick_purge(&psset);
+		hpdata_t *to_purge = psset_pick_purge(&psset, NULL);
 		expect_ptr_eq(&hpdata_huge[i], to_purge, "");
 		psset_update_begin(&psset, to_purge);
 		hpdata_purge_allowed_set(to_purge, false);
 		psset_update_end(&psset, to_purge);
 	}
 	for (int i = 0; i < NHP; i++) {
-		hpdata_t *to_purge = psset_pick_purge(&psset);
+		hpdata_t *to_purge = psset_pick_purge(&psset, NULL);
 		expect_ptr_eq(&hpdata_nonhuge[i], to_purge, "");
 		psset_update_begin(&psset, to_purge);
 		hpdata_purge_allowed_set(to_purge, false);
@@ -938,6 +1019,6 @@ main(void) {
 	return test_no_reentrancy(test_empty, test_fill, test_reuse, test_evict,
 	    test_multi_pageslab, test_stats_merged, test_stats_huge,
 	    test_stats_fullness, test_oldest_fit, test_insert_remove,
-	    test_purge_prefers_nonhuge, test_purge_prefers_empty,
-	    test_purge_prefers_empty_huge);
+	    test_purge_prefers_nonhuge, test_purge_timing,
+	    test_purge_prefers_empty, test_purge_prefers_empty_huge);
 }

From 707aab0c955e97abed6bd0780eb47cd38e7b1843 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Tue, 16 Sep 2025 13:25:42 -0700
Subject: [PATCH 337/395] [pa-bench] Add clock to pa benchmark

---
 test/stress/pa/pa_data_preprocessor.cpp | 20 ++++++-----
 test/stress/pa/pa_microbench.c          | 45 ++++++++++++++++++++-----
 2 files changed, 47 insertions(+), 18 deletions(-)

diff --git a/test/stress/pa/pa_data_preprocessor.cpp b/test/stress/pa/pa_data_preprocessor.cpp
index 757f37bb..44e84e8c 100644
--- a/test/stress/pa/pa_data_preprocessor.cpp
+++ b/test/stress/pa/pa_data_preprocessor.cpp
@@ -16,13 +16,14 @@
  *   HPA: shard_ind_int,addr_int,nsecs_int,probe,size_int
  *   SEC: process_id,thread_id,thread_name,nsecs_int,_c4,sec_ptr_int,sec_shard_ptr_int,edata_ptr_int,size_int,is_frequent_reuse_int
  *
- * Output format (4 columns):
- *   shard_ind_int,operation_index,size_or_alloc_index,is_frequent
+ * Output format (5 columns):
+ *   shard_ind_int,operation_index,size_or_alloc_index,nsecs,is_frequent
  *   where:
  *   - shard_ind_int: shard index as integer
  *   - operation_index: 0=alloc, 1=dalloc
  *   - size_or_alloc_index: for alloc operations show bytes,
  *                          for dalloc operations show index of corresponding alloc
+ *   - nsecs: nanonosec of some monotonic clock
  *   - is_frequent: 1 if frequent reuse allocation, 0 otherwise
  */
 
@@ -250,14 +251,14 @@ parse_sec_line(
 
 void
 write_output_header(std::ofstream &output) {
-	output << "shard_ind,operation,size_or_alloc_index,is_frequent\n";
+	output << "shard_ind,operation,size_or_alloc_index,nsecs,is_frequent\n";
 }
 
 void
 write_output_event(std::ofstream &output, int shard_ind, int operation,
-    size_t value, bool is_frequent) {
-	output << shard_ind << "," << operation << "," << value << ","
-	       << (is_frequent ? 1 : 0) << "\n";
+    size_t value, uint64_t nsecs, bool is_frequent) {
+	output << shard_ind << "," << operation << "," << value << "," << nsecs
+	       << "," << (is_frequent ? 1 : 0) << "\n";
 }
 
 size_t
@@ -319,7 +320,7 @@ process_trace_file(const std::string &input_filename,
 		if (is_alloc_operation(event.probe)) {
 			/* This is an allocation */
 			write_output_event(output, event.shard_ind, 0,
-			    event.size, event.is_frequent);
+			    event.size, event.nsecs, event.is_frequent);
 
 			/* Track this allocation with the current sequence number */
 			tracker.add_allocation(event.addr, event.size,
@@ -335,7 +336,8 @@ process_trace_file(const std::string &input_filename,
 				assert(event.nsecs >= record->nsecs);
 				/* Found matching allocation with valid timing */
 				write_output_event(output, event.shard_ind, 1,
-				    record->alloc_index, event.is_frequent);
+				    record->alloc_index, event.nsecs,
+				    event.is_frequent);
 				tracker.remove_allocation(event.addr);
 				output_count++; /* Count this deallocation */
 			} else {
@@ -390,7 +392,7 @@ main(int argc, char *argv[]) {
 		    << "  output_file     - Output file for simulator with format:"
 		    << std::endl;
 		std::cerr
-		    << "                    shard_ind,operation,size_or_alloc_index,is_frequent"
+		    << "                    shard_ind,operation,size_or_alloc_index,nsecs,is_frequent"
 		    << std::endl;
 		std::cerr << std::endl;
 		std::cerr << "Output format:" << std::endl;
diff --git a/test/stress/pa/pa_microbench.c b/test/stress/pa/pa_microbench.c
index 4ad3652d..c4706b04 100644
--- a/test/stress/pa/pa_microbench.c
+++ b/test/stress/pa/pa_microbench.c
@@ -32,10 +32,11 @@
 typedef enum { PA_ALLOC = 0, PA_DALLOC = 1 } pa_op_t;
 
 typedef struct {
-	int     shard_ind;
-	pa_op_t operation;
-	size_t  size_or_alloc_index;
-	int     is_frequent;
+	int      shard_ind;
+	pa_op_t  operation;
+	size_t   size_or_alloc_index;
+	uint64_t nsecs;
+	int      is_frequent;
 } pa_event_t;
 
 typedef struct {
@@ -73,6 +74,29 @@ static shard_infrastructure_t *g_shard_infra =
     NULL;                         /* Per-shard PA infrastructure */
 static pa_central_t g_pa_central; /* Global PA central */
 
+/* Override for curtime */
+static hpa_hooks_t hpa_hooks_override;
+static nstime_t    cur_time_clock;
+
+void
+curtime(nstime_t *r_time, bool first_reading) {
+	if (first_reading) {
+		nstime_init_zero(r_time);
+	}
+	*r_time = cur_time_clock;
+}
+
+static void
+set_clock(uint64_t nsecs) {
+	nstime_init(&cur_time_clock, nsecs);
+}
+
+static void
+init_hpa_hooks() {
+	hpa_hooks_override = hpa_hooks_default;
+	hpa_hooks_override.curtime = curtime;
+}
+
 static void cleanup_pa_infrastructure(int num_shards);
 
 static bool
@@ -125,8 +149,9 @@ initialize_pa_infrastructure(int num_shards) {
 	}
 
 	/* Initialize PA central with HPA enabled */
+	init_hpa_hooks();
 	if (pa_central_init(&g_pa_central, central_base, true /* hpa */,
-	        &hpa_hooks_default)) {
+	        &hpa_hooks_override)) {
 		printf("DEBUG: Failed to initialize PA central\n");
 		base_delete(tsd_tsdn(tsd_fetch()), central_base);
 		free(g_shard_stats);
@@ -237,14 +262,15 @@ static bool
 parse_csv_line(const char *line, pa_event_t *event) {
 	/* Expected format: shard_ind,operation,size_or_alloc_index,is_frequent */
 	int operation;
-	int fields = sscanf(line, "%d,%d,%zu,%d", &event->shard_ind, &operation,
-	    &event->size_or_alloc_index, &event->is_frequent);
+	int fields = sscanf(line, "%d,%d,%zu,%lu,%d", &event->shard_ind,
+	    &operation, &event->size_or_alloc_index, &event->nsecs,
+	    &event->is_frequent);
 
-	if (fields < 3) { /* is_frequent is optional */
+	if (fields < 4) { /* is_frequent is optional */
 		return false;
 	}
 
-	if (fields == 3) {
+	if (fields == 4) {
 		event->is_frequent = 0; /* Default value */
 	}
 
@@ -393,6 +419,7 @@ simulate_trace(
 			continue;
 		}
 
+		set_clock(event->nsecs);
 		switch (event->operation) {
 		case PA_ALLOC: {
 			size_t size = event->size_or_alloc_index;

From 7c40be249cc204b2698d7f97ec5ac1de5551a3cc Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Tue, 16 Sep 2025 16:50:11 -0700
Subject: [PATCH 338/395] Add npurges and npurge_passes to output of
 pa_benchmark

---
 test/stress/pa/pa_microbench.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/test/stress/pa/pa_microbench.c b/test/stress/pa/pa_microbench.c
index c4706b04..3e7d8aef 100644
--- a/test/stress/pa/pa_microbench.c
+++ b/test/stress/pa/pa_microbench.c
@@ -382,11 +382,17 @@ print_shard_stats(int shard_id, size_t operation_count) {
 	}
 
 	/* Dirty bytes */
-	size_t dirty_bytes = psset_stats->merged.ndirty * PAGE;
+	size_t   dirty_bytes = psset_stats->merged.ndirty * PAGE;
+	uint64_t npurge_passes = hpa_stats.nonderived_stats.npurge_passes;
+	uint64_t npurges = hpa_stats.nonderived_stats.npurges;
 
+	assert(g_use_sec
+	    || psset_stats->merged.nactive * PAGE
+	        == g_shard_stats[shard_id].bytes_allocated);
 	/* Output enhanced stats with detailed breakdown */
 	fprintf(g_stats_output,
-	    "%zu,%d,%lu,%lu,%lu,%zu,%zu,%zu,%zu,%zu,%zu,%zu,%zu,%zu,%lu,%lu,%lu\n",
+	    "%zu,%d,%lu,%lu,%lu,%zu,%zu,%zu,%zu,%zu,%zu,%zu,%zu,%zu,%lu,%lu,%lu"
+	    ",%lu,%lu\n",
 	    operation_count, shard_id, g_shard_stats[shard_id].alloc_count,
 	    g_shard_stats[shard_id].dealloc_count,
 	    g_shard_stats[shard_id].bytes_allocated, total_pageslabs,
@@ -395,7 +401,7 @@ print_shard_stats(int shard_id, size_t operation_count) {
 	    empty_pageslabs_non_huge, empty_pageslabs_huge, dirty_bytes,
 	    hpa_stats.nonderived_stats.nhugifies,
 	    hpa_stats.nonderived_stats.nhugify_failures,
-	    hpa_stats.nonderived_stats.ndehugifies);
+	    hpa_stats.nonderived_stats.ndehugifies, npurge_passes, npurges);
 	fflush(g_stats_output);
 }
 
@@ -629,7 +635,8 @@ main(int argc, char *argv[]) {
 		    "total_pageslabs,full_pageslabs_total,empty_pageslabs_total,hugified_pageslabs,"
 		    "full_pageslabs_non_huge,full_pageslabs_huge,"
 		    "empty_pageslabs_non_huge,empty_pageslabs_huge,"
-		    "dirty_bytes,nhugifies,nhugify_failures,ndehugifies\n");
+		    "dirty_bytes,nhugifies,nhugify_failures,ndehugifies,"
+		    "npurge_passes,npurges\n");
 	}
 
 	/* Load trace data and determine max number of arenas */

From 5e49c28ef042d7c1f446ec6615d6d84bafabb3fd Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Mon, 6 Oct 2025 12:01:13 -0700
Subject: [PATCH 339/395] [EASY] Spelling in the comments

---
 include/jemalloc/internal/edata.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index 2b229e7d..06b6c545 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -23,7 +23,7 @@
 
 /*
  * Defines how many nodes visited when enumerating the heap to search for
- * qualifed extents.  More nodes visited may result in better choices at
+ * qualified extents.  More nodes visited may result in better choices at
  * the cost of longer search time.  This size should not exceed 2^16 - 1
  * because we use uint16_t for accessing the queue needed for enumeration.
  */
@@ -230,7 +230,7 @@ struct edata_s {
 
 	/*
 	 * If this edata is a user allocation from an HPA, it comes out of some
-	 * pageslab (we don't yet support huegpage allocations that don't fit
+	 * pageslab (we don't yet support hugepage allocations that don't fit
 	 * into pageslabs).  This tracks it.
 	 */
 	hpdata_t *e_ps;

From f714cd9249eb1df010b035623ebca89b7614b1cc Mon Sep 17 00:00:00 2001
From: Carl Shapiro <cshapiro@meta.com>
Date: Mon, 6 Oct 2025 15:45:38 -0700
Subject: [PATCH 340/395] Inline the value of an always false boolean local
 variable

Next to its use, which is always as an argument, we include the name
of the parameter in a constant.  This completes a partially
implemented cleanup suggested in an earlier commit.
---
 test/unit/psset.c | 64 +++++++++++++++++++++++------------------------
 1 file changed, 32 insertions(+), 32 deletions(-)

diff --git a/test/unit/psset.c b/test/unit/psset.c
index 3ce8e976..12d55941 100644
--- a/test/unit/psset.c
+++ b/test/unit/psset.c
@@ -124,8 +124,8 @@ TEST_BEGIN(test_empty) {
 	test_skip_if(hpa_hugepage_size_exceeds_limit());
 	bool     err;
 	hpdata_t pageslab;
-	bool     is_huge = false;
-	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, is_huge);
+	hpdata_init(
+	    &pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, /* is_huge */ false);
 
 	edata_t alloc;
 	edata_init_test(&alloc);
@@ -142,10 +142,10 @@ TEST_END
 TEST_BEGIN(test_fill) {
 	test_skip_if(hpa_hugepage_size_exceeds_limit());
 	bool err;
-	bool is_huge = false;
 
 	hpdata_t pageslab;
-	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, is_huge);
+	hpdata_init(
+	    &pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, /* is_huge */ false);
 
 	edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
 
@@ -181,8 +181,8 @@ TEST_BEGIN(test_reuse) {
 	hpdata_t *ps;
 
 	hpdata_t pageslab;
-	bool     is_huge = false;
-	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, is_huge);
+	hpdata_init(
+	    &pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, /* is_huge */ false);
 
 	edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
 
@@ -277,8 +277,8 @@ TEST_BEGIN(test_evict) {
 	hpdata_t *ps;
 
 	hpdata_t pageslab;
-	bool     is_huge = false;
-	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, is_huge);
+	hpdata_init(
+	    &pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, /* is_huge */ false);
 
 	edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
 
@@ -315,10 +315,10 @@ TEST_BEGIN(test_multi_pageslab) {
 	hpdata_t *ps;
 
 	hpdata_t pageslab[2];
-	bool     is_huge = false;
-	hpdata_init(&pageslab[0], PAGESLAB_ADDR, PAGESLAB_AGE, is_huge);
+	hpdata_init(
+	    &pageslab[0], PAGESLAB_ADDR, PAGESLAB_AGE, /* is_huge */ false);
 	hpdata_init(&pageslab[1], (void *)((uintptr_t)PAGESLAB_ADDR + HUGEPAGE),
-	    PAGESLAB_AGE + 1, is_huge);
+	    PAGESLAB_AGE + 1, /* is_huge */ false);
 
 	edata_t *alloc[2];
 	alloc[0] = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
@@ -381,8 +381,8 @@ TEST_END
 
 TEST_BEGIN(test_stats_merged) {
 	hpdata_t pageslab;
-	bool     is_huge = false;
-	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, is_huge);
+	hpdata_init(
+	    &pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, /* is_huge */ false);
 
 	edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
 
@@ -448,8 +448,8 @@ TEST_BEGIN(test_stats_huge) {
 	test_skip_if(hpa_hugepage_size_exceeds_limit());
 
 	hpdata_t pageslab;
-	bool     is_huge = false;
-	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, is_huge);
+	hpdata_init(
+	    &pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, /* is_huge */ false);
 
 	edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
 
@@ -577,8 +577,8 @@ TEST_BEGIN(test_stats_fullness) {
 	bool err;
 
 	hpdata_t pageslab;
-	bool     is_huge = false;
-	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, is_huge);
+	hpdata_init(
+	    &pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, /* is_huge */ false);
 
 	edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
 
@@ -628,15 +628,15 @@ static void
 init_test_pageslabs(psset_t *psset, hpdata_t *pageslab,
     hpdata_t *worse_pageslab, edata_t *alloc, edata_t *worse_alloc) {
 	bool err;
-	bool is_huge = false;
 
-	hpdata_init(pageslab, (void *)(10 * HUGEPAGE), PAGESLAB_AGE, is_huge);
+	hpdata_init(pageslab, (void *)(10 * HUGEPAGE), PAGESLAB_AGE,
+	    /* is_huge */ false);
 	/*
 	 * This pageslab would be better from an address-first-fit POV, but
 	 * worse from an age POV.
 	 */
-	hpdata_init(
-	    worse_pageslab, (void *)(9 * HUGEPAGE), PAGESLAB_AGE + 1, is_huge);
+	hpdata_init(worse_pageslab, (void *)(9 * HUGEPAGE), PAGESLAB_AGE + 1,
+	    /* is_huge */ false);
 
 	psset_init(psset);
 
@@ -773,15 +773,15 @@ TEST_BEGIN(test_purge_prefers_nonhuge) {
 	hpdata_t  hpdata_nonhuge[NHP];
 	uintptr_t nonhuge_begin = (uintptr_t)&hpdata_nonhuge[0];
 	uintptr_t nonhuge_end = (uintptr_t)&hpdata_nonhuge[NHP];
-	bool      is_huge = false;
 
 	for (size_t i = 0; i < NHP; i++) {
 		hpdata_init(&hpdata_huge[i], (void *)((10 + i) * HUGEPAGE),
-		    123 + i, is_huge);
+		    123 + i, /* is_huge */ false);
 		psset_insert(&psset, &hpdata_huge[i]);
 
 		hpdata_init(&hpdata_nonhuge[i],
-		    (void *)((10 + NHP + i) * HUGEPAGE), 456 + i, is_huge);
+		    (void *)((10 + NHP + i) * HUGEPAGE), 456 + i,
+		    /* is_huge */ false);
 		psset_insert(&psset, &hpdata_nonhuge[i]);
 	}
 	for (int i = 0; i < 2 * NHP; i++) {
@@ -911,10 +911,11 @@ TEST_BEGIN(test_purge_prefers_empty) {
 
 	hpdata_t hpdata_empty;
 	hpdata_t hpdata_nonempty;
-	bool     is_huge = false;
-	hpdata_init(&hpdata_empty, (void *)(10 * HUGEPAGE), 123, is_huge);
+	hpdata_init(
+	    &hpdata_empty, (void *)(10 * HUGEPAGE), 123, /* is_huge */ false);
 	psset_insert(&psset, &hpdata_empty);
-	hpdata_init(&hpdata_nonempty, (void *)(11 * HUGEPAGE), 456, is_huge);
+	hpdata_init(&hpdata_nonempty, (void *)(11 * HUGEPAGE), 456,
+	    /* is_huge */ false);
 	psset_insert(&psset, &hpdata_nonempty);
 
 	psset_update_begin(&psset, &hpdata_empty);
@@ -954,16 +955,15 @@ TEST_BEGIN(test_purge_prefers_empty_huge) {
 
 	uintptr_t cur_addr = 100 * HUGEPAGE;
 	uint64_t  cur_age = 123;
-	bool      is_huge = false;
 	for (int i = 0; i < NHP; i++) {
-		hpdata_init(
-		    &hpdata_huge[i], (void *)cur_addr, cur_age, is_huge);
+		hpdata_init(&hpdata_huge[i], (void *)cur_addr, cur_age,
+		    /* is_huge */ false);
 		cur_addr += HUGEPAGE;
 		cur_age++;
 		psset_insert(&psset, &hpdata_huge[i]);
 
-		hpdata_init(
-		    &hpdata_nonhuge[i], (void *)cur_addr, cur_age, is_huge);
+		hpdata_init(&hpdata_nonhuge[i], (void *)cur_addr, cur_age,
+		    /* is_huge */ false);
 		cur_addr += HUGEPAGE;
 		cur_age++;
 		psset_insert(&psset, &hpdata_nonhuge[i]);

From 87555dfbb22efb0c4bcfc59be0b7ccad19725edf Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Tue, 7 Oct 2025 16:42:15 -0700
Subject: [PATCH 341/395] Do not release the hpa_shard->mtx when inserting
 newly retrieved page from central before allocating from it

---
 src/hpa.c | 29 ++++++++++++++++++++---------
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/src/hpa.c b/src/hpa.c
index 27db53a9..a7875e89 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -840,6 +840,8 @@ hpa_shard_maybe_do_deferred_work(
 static edata_t *
 hpa_try_alloc_one_no_grow(
     tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom) {
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+
 	bool     err;
 	edata_t *edata = edata_cache_fast_get(tsdn, &shard->ecf);
 	if (edata == NULL) {
@@ -912,10 +914,10 @@ hpa_try_alloc_one_no_grow(
 }
 
 static size_t
-hpa_try_alloc_batch_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
-    bool *oom, size_t nallocs, edata_list_active_t *results,
+hpa_try_alloc_batch_no_grow_locked(tsdn_t *tsdn, hpa_shard_t *shard,
+    size_t size, bool *oom, size_t nallocs, edata_list_active_t *results,
     bool *deferred_work_generated) {
-	malloc_mutex_lock(tsdn, &shard->mtx);
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
 	size_t nsuccess = 0;
 	for (; nsuccess < nallocs; nsuccess++) {
 		edata_t *edata = hpa_try_alloc_one_no_grow(
@@ -928,6 +930,16 @@ hpa_try_alloc_batch_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 
 	hpa_shard_maybe_do_deferred_work(tsdn, shard, /* forced */ false);
 	*deferred_work_generated = hpa_shard_has_deferred_work(tsdn, shard);
+	return nsuccess;
+}
+
+static size_t
+hpa_try_alloc_batch_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
+    bool *oom, size_t nallocs, edata_list_active_t *results,
+    bool *deferred_work_generated) {
+	malloc_mutex_lock(tsdn, &shard->mtx);
+	size_t nsuccess = hpa_try_alloc_batch_no_grow_locked(
+	    tsdn, shard, size, oom, nallocs, results, deferred_work_generated);
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 	return nsuccess;
 }
@@ -976,17 +988,16 @@ hpa_alloc_batch_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 	}
 
 	/*
-	 * We got the pageslab; allocate from it.  This does an unlock followed
-	 * by a lock on the same mutex, and holds the grow mutex while doing
-	 * deferred work, but this is an uncommon path; the simplicity is worth
-	 * it.
+	 * We got the pageslab; allocate from it.  This holds the grow mutex
+	 * while doing deferred work, but this is an uncommon path; the
+	 * simplicity is worth it.
 	 */
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	psset_insert(&shard->psset, ps);
+	nsuccess += hpa_try_alloc_batch_no_grow_locked(tsdn, shard, size, &oom,
+	    nallocs - nsuccess, results, deferred_work_generated);
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 
-	nsuccess += hpa_try_alloc_batch_no_grow(tsdn, shard, size, &oom,
-	    nallocs - nsuccess, results, deferred_work_generated);
 	/*
 	 * Drop grow_mtx before doing deferred work; other threads blocked on it
 	 * should be allowed to proceed while we're working.

From 2cfa41913e71b0ff24788812f61d5485f04b647d Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@meta.com>
Date: Tue, 7 Oct 2025 16:18:07 -0700
Subject: [PATCH 342/395] Refactor init_system_thp_mode and print it in malloc
 stats.

---
 include/jemalloc/internal/pages.h | 18 +++++++++++---
 src/arena.c                       |  4 +--
 src/base.c                        |  4 +--
 src/pages.c                       | 41 ++++++++++++++++++++-----------
 src/stats.c                       | 16 ++++++++++++
 test/unit/pages.c                 |  2 +-
 6 files changed, 63 insertions(+), 22 deletions(-)

diff --git a/include/jemalloc/internal/pages.h b/include/jemalloc/internal/pages.h
index 31909934..a4282c9b 100644
--- a/include/jemalloc/internal/pages.h
+++ b/include/jemalloc/internal/pages.h
@@ -97,8 +97,12 @@ static const bool pages_can_hugify =
 #endif
     ;
 
+/*
+ * thp_mode_t are values for opt.thp, while system_thp_mode_t is for kernel thp
+ * settings, i.e., init_system_thp_mode.
+ */
 typedef enum {
-	thp_mode_default = 0, /* Do not change hugepage settings. */
+	thp_mode_do_nothing = 0, /* Respect kernel thp settings. */
 	thp_mode_always = 1,  /* Always set MADV_HUGEPAGE. */
 	thp_mode_never = 2,   /* Always set MADV_NOHUGEPAGE. */
 
@@ -106,10 +110,18 @@ typedef enum {
 	thp_mode_not_supported = 3 /* No THP support detected. */
 } thp_mode_t;
 
-#define THP_MODE_DEFAULT thp_mode_default
+typedef enum {
+	system_thp_mode_madvise = 0,     /* Kernel THP mode: madvise */
+	system_thp_mode_always = 1,      /* Kernel THP mode: always */
+	system_thp_mode_never = 2,       /* Kernel THP mode: never */
+	system_thp_mode_not_supported = 3 /* No THP support detected. */
+} system_thp_mode_t;
+
+#define THP_MODE_DEFAULT thp_mode_do_nothing
 extern thp_mode_t        opt_thp;
-extern thp_mode_t        init_system_thp_mode; /* Initial system wide state. */
+extern system_thp_mode_t init_system_thp_mode; /* Initial system wide state. */
 extern const char *const thp_mode_names[];
+extern const char *const system_thp_mode_names[];
 
 void *pages_map(void *addr, size_t size, size_t alignment, bool *commit);
 void  pages_unmap(void *addr, size_t size);
diff --git a/src/arena.c b/src/arena.c
index 962a325d..224a9b63 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1890,8 +1890,8 @@ arena_init_huge(tsdn_t *tsdn, arena_t *a0) {
 		/* Make sure that b0 thp auto-switch won't happen concurrently here. */
 		malloc_mutex_lock(tsdn, &b0->mtx);
 		(&huge_arena_pac_thp)->thp_madvise = opt_huge_arena_pac_thp
-		    && metadata_thp_enabled() && (opt_thp == thp_mode_default)
-		    && (init_system_thp_mode == thp_mode_default);
+		    && metadata_thp_enabled() && (opt_thp == thp_mode_do_nothing)
+		    && (init_system_thp_mode == system_thp_mode_madvise);
 		(&huge_arena_pac_thp)->auto_thp_switched =
 		    b0->auto_thp_switched;
 		malloc_mutex_init(&(&huge_arena_pac_thp)->lock, "pac_thp",
diff --git a/src/base.c b/src/base.c
index c494556c..ef7f0dd4 100644
--- a/src/base.c
+++ b/src/base.c
@@ -28,8 +28,8 @@ const char *const metadata_thp_mode_names[] = {"disabled", "auto", "always"};
 
 static inline bool
 metadata_thp_madvise(void) {
-	return (metadata_thp_enabled()
-	    && (init_system_thp_mode == thp_mode_default));
+	return (metadata_thp_enabled() &&
+	    (init_system_thp_mode == system_thp_mode_madvise));
 }
 
 static void *
diff --git a/src/pages.c b/src/pages.c
index 000b87fe..e7766fcc 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -44,8 +44,10 @@ static bool os_overcommits;
 
 const char *const thp_mode_names[] = {
     "default", "always", "never", "not supported"};
-thp_mode_t opt_thp = THP_MODE_DEFAULT;
-thp_mode_t init_system_thp_mode;
+const char *const system_thp_mode_names[] = {
+    "madvise", "always", "never", "not supported"};
+thp_mode_t        opt_thp = THP_MODE_DEFAULT;
+system_thp_mode_t init_system_thp_mode;
 
 /* Runtime support for lazy purge. Irrelevant when !pages_can_purge_lazy. */
 static bool pages_can_purge_lazy_runtime = true;
@@ -778,21 +780,31 @@ os_overcommits_proc(void) {
 }
 #endif
 
+static bool
+pages_should_skip_set_thp_state() {
+	if (opt_thp == thp_mode_do_nothing
+	    || (opt_thp == thp_mode_always
+	        && init_system_thp_mode == system_thp_mode_always)
+	    || (opt_thp == thp_mode_never
+	        && init_system_thp_mode == system_thp_mode_never)) {
+		return true;
+	}
+	return false;
+}
 void
 pages_set_thp_state(void *ptr, size_t size) {
-	if (opt_thp == thp_mode_default || opt_thp == init_system_thp_mode) {
+	if (pages_should_skip_set_thp_state()) {
 		return;
 	}
 	assert(opt_thp != thp_mode_not_supported
-	    && init_system_thp_mode != thp_mode_not_supported);
+	    && init_system_thp_mode != system_thp_mode_not_supported);
 
 	if (opt_thp == thp_mode_always
-	    && init_system_thp_mode != thp_mode_never) {
-		assert(init_system_thp_mode == thp_mode_default);
+	    && init_system_thp_mode == system_thp_mode_madvise) {
 		pages_huge_unaligned(ptr, size);
 	} else if (opt_thp == thp_mode_never) {
-		assert(init_system_thp_mode == thp_mode_default
-		    || init_system_thp_mode == thp_mode_always);
+		assert(init_system_thp_mode == system_thp_mode_madvise
+		    || init_system_thp_mode == system_thp_mode_always);
 		pages_nohuge_unaligned(ptr, size);
 	}
 }
@@ -825,16 +837,16 @@ init_thp_state(void) {
 	}
 
 	if (strncmp(buf, sys_state_madvise, (size_t)nread) == 0) {
-		init_system_thp_mode = thp_mode_default;
+		init_system_thp_mode = system_thp_mode_madvise;
 	} else if (strncmp(buf, sys_state_always, (size_t)nread) == 0) {
-		init_system_thp_mode = thp_mode_always;
+		init_system_thp_mode = system_thp_mode_always;
 	} else if (strncmp(buf, sys_state_never, (size_t)nread) == 0) {
-		init_system_thp_mode = thp_mode_never;
+		init_system_thp_mode = system_thp_mode_never;
 	} else {
 		goto label_error;
 	}
 	if (opt_hpa_opts.hugify_style == hpa_hugify_style_auto) {
-		if (init_system_thp_mode == thp_mode_default) {
+		if (init_system_thp_mode == system_thp_mode_madvise) {
 			opt_hpa_opts.hugify_style = hpa_hugify_style_lazy;
 		} else {
 			opt_hpa_opts.hugify_style = hpa_hugify_style_none;
@@ -842,14 +854,15 @@ init_thp_state(void) {
 	}
 	return;
 #elif defined(JEMALLOC_HAVE_MEMCNTL)
-	init_system_thp_mode = thp_mode_default;
+	init_system_thp_mode = system_thp_mode_madvise;
 	if (opt_hpa_opts.hugify_style == hpa_hugify_style_auto) {
 		opt_hpa_opts.hugify_style = hpa_hugify_style_eager;
 	}
 	return;
 #endif
 label_error:
-	opt_thp = init_system_thp_mode = thp_mode_not_supported;
+	opt_thp = thp_mode_not_supported;
+	init_system_thp_mode = system_thp_mode_not_supported;
 }
 
 bool
diff --git a/src/stats.c b/src/stats.c
index ea7a4e2e..366f96f7 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1516,6 +1516,22 @@ stats_general_print(emitter_t *emitter) {
 #undef CONFIG_WRITE_BOOL
 	emitter_dict_end(emitter); /* Close "config" dict. */
 
+	/* system. */
+	emitter_dict_begin(emitter, "system", "System configuration");
+
+	/*
+	 * This shows system's THP mode detected at jemalloc's init time.
+	 * jemalloc does not re-detect the mode even if it changes after
+	 * jemalloc's init.  It is assumed that system's THP mode is stable
+	 * during the process's lifetime and a violation could lead to
+	 * undefined behavior.
+	*/
+	const char *thp_mode_name = system_thp_mode_names[init_system_thp_mode];
+	emitter_kv(emitter, "thp_mode", "system.thp_mode", emitter_type_string,
+	    &thp_mode_name);
+
+	emitter_dict_end(emitter); /* Close "system". */
+
 	/* opt. */
 #define OPT_WRITE(name, var, size, emitter_type)                               \
 	if (je_mallctl("opt." name, (void *)&var, &size, NULL, 0) == 0) {      \
diff --git a/test/unit/pages.c b/test/unit/pages.c
index dbee2f0c..66afb84b 100644
--- a/test/unit/pages.c
+++ b/test/unit/pages.c
@@ -10,7 +10,7 @@ TEST_BEGIN(test_pages_huge) {
 	pages = pages_map(NULL, alloc_size, PAGE, &commit);
 	expect_ptr_not_null(pages, "Unexpected pages_map() error");
 
-	if (init_system_thp_mode == thp_mode_default) {
+	if (init_system_thp_mode == system_thp_mode_madvise) {
 		hugepage = (void *)(ALIGNMENT_CEILING(
 		    (uintptr_t)pages, HUGEPAGE));
 		expect_b_ne(pages_huge(hugepage, HUGEPAGE), have_madvise_huge,

From 3678a57c101b84400d6db85c96ad8ce18d5fcdf9 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Mon, 13 Oct 2025 11:37:49 -0700
Subject: [PATCH 343/395] When extracting from central, hugify_eager is
 different than start_as_huge

---
 Makefile.in                                   |   1 +
 .../internal/jemalloc_internal_externs.h      |   1 +
 src/ctl.c                                     |   5 +
 src/hpa.c                                     |   9 +-
 src/jemalloc.c                                |   6 +-
 src/stats.c                                   |   1 +
 test/unit/hpa.c                               |   8 +-
 test/unit/hpa.sh                              |   2 +-
 test/unit/hpa_background_thread.sh            |   2 +-
 test/unit/hpa_thp_always.c                    | 202 ++++++++++++++++++
 test/unit/hpa_thp_always.sh                   |   3 +
 test/unit/hpa_vectorized_madvise.sh           |   2 +-
 test/unit/mallctl.c                           |   2 +
 13 files changed, 235 insertions(+), 9 deletions(-)
 create mode 100644 test/unit/hpa_thp_always.c
 create mode 100644 test/unit/hpa_thp_always.sh

diff --git a/Makefile.in b/Makefile.in
index 047e05cb..c63e6f8f 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -228,6 +228,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/hash.c \
 	$(srcroot)test/unit/hook.c \
 	$(srcroot)test/unit/hpa.c \
+	$(srcroot)test/unit/hpa_thp_always.c \
 	$(srcroot)test/unit/hpa_vectorized_madvise.c \
 	$(srcroot)test/unit/hpa_vectorized_madvise_large_batch.c \
 	$(srcroot)test/unit/hpa_background_thread.c \
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index b502c7e7..a319dc81 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -15,6 +15,7 @@ extern bool malloc_slow;
 extern bool             opt_abort;
 extern bool             opt_abort_conf;
 extern bool             opt_trust_madvise;
+extern bool             opt_experimental_hpa_start_huge_if_thp_always;
 extern bool             opt_confirm_conf;
 extern bool             opt_hpa;
 extern hpa_shard_opts_t opt_hpa_opts;
diff --git a/src/ctl.c b/src/ctl.c
index 85583bec..d3443a13 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -98,6 +98,7 @@ CTL_PROTO(opt_abort_conf)
 CTL_PROTO(opt_cache_oblivious)
 CTL_PROTO(opt_debug_double_free_max_scan)
 CTL_PROTO(opt_trust_madvise)
+CTL_PROTO(opt_experimental_hpa_start_huge_if_thp_always)
 CTL_PROTO(opt_confirm_conf)
 CTL_PROTO(opt_hpa)
 CTL_PROTO(opt_hpa_slab_max_alloc)
@@ -464,6 +465,8 @@ static const ctl_named_node_t opt_node[] = {{NAME("abort"), CTL(opt_abort)},
     {NAME("abort_conf"), CTL(opt_abort_conf)},
     {NAME("cache_oblivious"), CTL(opt_cache_oblivious)},
     {NAME("trust_madvise"), CTL(opt_trust_madvise)},
+    {NAME("experimental_hpa_start_huge_if_thp_always"),
+        CTL(opt_experimental_hpa_start_huge_if_thp_always)},
     {NAME("confirm_conf"), CTL(opt_confirm_conf)}, {NAME("hpa"), CTL(opt_hpa)},
     {NAME("hpa_slab_max_alloc"), CTL(opt_hpa_slab_max_alloc)},
     {NAME("hpa_hugification_threshold"), CTL(opt_hpa_hugification_threshold)},
@@ -2131,6 +2134,8 @@ CTL_RO_NL_GEN(opt_cache_oblivious, opt_cache_oblivious, bool)
 CTL_RO_NL_GEN(
     opt_debug_double_free_max_scan, opt_debug_double_free_max_scan, unsigned)
 CTL_RO_NL_GEN(opt_trust_madvise, opt_trust_madvise, bool)
+CTL_RO_NL_GEN(opt_experimental_hpa_start_huge_if_thp_always,
+    opt_experimental_hpa_start_huge_if_thp_always, bool)
 CTL_RO_NL_GEN(opt_confirm_conf, opt_confirm_conf, bool)
 
 /* HPA options. */
diff --git a/src/hpa.c b/src/hpa.c
index a7875e89..3687e6ea 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -28,6 +28,8 @@ static uint64_t hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self);
 
 const char *const hpa_hugify_style_names[] = {"auto", "none", "eager", "lazy"};
 
+bool opt_experimental_hpa_start_huge_if_thp_always = true;
+
 bool
 hpa_hugepage_size_exceeds_limit(void) {
 	return HUGEPAGE > HUGEPAGE_MAX_EXPECTED_SIZE;
@@ -113,6 +115,9 @@ hpa_central_extract(tsdn_t *tsdn, hpa_central_t *central, size_t size,
 	*oom = false;
 
 	hpdata_t *ps = NULL;
+	bool      start_as_huge = hugify_eager
+	    || (init_system_thp_mode == system_thp_mode_always
+	        && opt_experimental_hpa_start_huge_if_thp_always);
 
 	/* Is eden a perfect fit? */
 	if (central->eden != NULL && central->eden_len == HUGEPAGE) {
@@ -122,7 +127,7 @@ hpa_central_extract(tsdn_t *tsdn, hpa_central_t *central, size_t size,
 			malloc_mutex_unlock(tsdn, &central->grow_mtx);
 			return NULL;
 		}
-		hpdata_init(ps, central->eden, age, hugify_eager);
+		hpdata_init(ps, central->eden, age, start_as_huge);
 		central->eden = NULL;
 		central->eden_len = 0;
 		malloc_mutex_unlock(tsdn, &central->grow_mtx);
@@ -170,7 +175,7 @@ hpa_central_extract(tsdn_t *tsdn, hpa_central_t *central, size_t size,
 	assert(central->eden_len % HUGEPAGE == 0);
 	assert(HUGEPAGE_ADDR2BASE(central->eden) == central->eden);
 
-	hpdata_init(ps, central->eden, age, hugify_eager);
+	hpdata_init(ps, central->eden, age, start_as_huge);
 
 	char *eden_char = (char *)central->eden;
 	eden_char += HUGEPAGE;
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 72216508..0f6ff0c3 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1302,6 +1302,9 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			CONF_HANDLE_BOOL(opt_abort_conf, "abort_conf")
 			CONF_HANDLE_BOOL(opt_cache_oblivious, "cache_oblivious")
 			CONF_HANDLE_BOOL(opt_trust_madvise, "trust_madvise")
+			CONF_HANDLE_BOOL(
+			    opt_experimental_hpa_start_huge_if_thp_always,
+			    "experimental_hpa_start_huge_if_thp_always")
 			CONF_HANDLE_BOOL(
 			    opt_huge_arena_pac_thp, "huge_arena_pac_thp")
 			if (strncmp("metadata_thp", k, klen) == 0) {
@@ -1647,7 +1650,8 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 
 			if (strncmp("hpa_hugify_style", k, klen) == 0) {
 				bool match = false;
-				for (int m = 0; m < hpa_hugify_style_limit; m++) {
+				for (int m = 0; m < hpa_hugify_style_limit;
+				     m++) {
 					if (strncmp(hpa_hugify_style_names[m],
 					        v, vlen)
 					    == 0) {
diff --git a/src/stats.c b/src/stats.c
index 366f96f7..4e04336e 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1604,6 +1604,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_BOOL("abort_conf")
 	OPT_WRITE_BOOL("cache_oblivious")
 	OPT_WRITE_BOOL("confirm_conf")
+	OPT_WRITE_BOOL("experimental_hpa_start_huge_if_thp_always")
 	OPT_WRITE_BOOL("retain")
 	OPT_WRITE_CHAR_P("dss")
 	OPT_WRITE_UNSIGNED("narenas")
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index df2c9d96..0398e21a 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -1416,7 +1416,6 @@ TEST_BEGIN(test_hpa_hugify_style_none_huge_no_syscall) {
 	nstime_init(&defer_curtime, 10 * 1000 * 1000);
 
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
-	/* First allocation makes the page huge */
 	enum { NALLOCS = HUGEPAGE_PAGES };
 	edata_t *edatas[NALLOCS];
 	ndefer_purge_calls = 0;
@@ -1426,14 +1425,17 @@ TEST_BEGIN(test_hpa_hugify_style_none_huge_no_syscall) {
 		expect_ptr_not_null(edatas[i], "Unexpected null edata");
 	}
 	hpdata_t *ps = psset_pick_alloc(&shard->psset, PAGE);
-	expect_false(hpdata_huge_get(ps), "Page should be non-huge");
+	expect_false(
+	    hpdata_huge_get(ps), "style=none, thp=madvise, should be non-huge");
 
 	ndefer_hugify_calls = 0;
 	ndefer_purge_calls = 0;
 	hpa_shard_do_deferred_work(tsdn, shard);
 	expect_zu_eq(ndefer_hugify_calls, 0, "Hugify none, no syscall");
 	ps = psset_pick_alloc(&shard->psset, PAGE);
-	expect_true(ps, "Page should be huge");
+	expect_ptr_not_null(ps, "Unexpected null page");
+	expect_false(
+	    hpdata_huge_get(ps), "style=none, thp=madvise, should be non-huge");
 
 	destroy_test_data(shard);
 }
diff --git a/test/unit/hpa.sh b/test/unit/hpa.sh
index fe0e0b67..22451f1d 100644
--- a/test/unit/hpa.sh
+++ b/test/unit/hpa.sh
@@ -1,3 +1,3 @@
 #!/bin/sh
 
-export MALLOC_CONF="process_madvise_max_batch:0"
+export MALLOC_CONF="process_madvise_max_batch:0,experimental_hpa_start_huge_if_thp_always:false"
diff --git a/test/unit/hpa_background_thread.sh b/test/unit/hpa_background_thread.sh
index 65a56a08..5c85d48b 100644
--- a/test/unit/hpa_background_thread.sh
+++ b/test/unit/hpa_background_thread.sh
@@ -1,4 +1,4 @@
 #!/bin/sh
 
-export MALLOC_CONF="hpa_dirty_mult:0,hpa_min_purge_interval_ms:50,hpa_sec_nshards:0"
+export MALLOC_CONF="hpa_dirty_mult:0,hpa_min_purge_interval_ms:50,hpa_sec_nshards:0,experimental_hpa_start_huge_if_thp_always:false"
 
diff --git a/test/unit/hpa_thp_always.c b/test/unit/hpa_thp_always.c
new file mode 100644
index 00000000..29c86cdd
--- /dev/null
+++ b/test/unit/hpa_thp_always.c
@@ -0,0 +1,202 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/hpa.h"
+#include "jemalloc/internal/nstime.h"
+
+#define SHARD_IND 111
+
+#define ALLOC_MAX (HUGEPAGE)
+
+typedef struct test_data_s test_data_t;
+struct test_data_s {
+	/*
+	 * Must be the first member -- we convert back and forth between the
+	 * test_data_t and the hpa_shard_t;
+	 */
+	hpa_shard_t   shard;
+	hpa_central_t central;
+	base_t       *base;
+	edata_cache_t shard_edata_cache;
+
+	emap_t emap;
+};
+
+static hpa_shard_opts_t test_hpa_shard_opts_aggressive = {
+    /* slab_max_alloc */
+    HUGEPAGE,
+    /* hugification_threshold */
+    0.9 * HUGEPAGE,
+    /* dirty_mult */
+    FXP_INIT_PERCENT(11),
+    /* deferral_allowed */
+    true,
+    /* hugify_delay_ms */
+    0,
+    /* hugify_sync */
+    false,
+    /* min_purge_interval_ms */
+    5,
+    /* experimental_max_purge_nhp */
+    -1,
+    /* purge_threshold */
+    HUGEPAGE - 5 * PAGE,
+    /* min_purge_delay_ms */
+    10,
+    /* hugify_style */
+    hpa_hugify_style_eager};
+
+static hpa_shard_t *
+create_test_data(const hpa_hooks_t *hooks, hpa_shard_opts_t *opts) {
+	bool    err;
+	base_t *base = base_new(TSDN_NULL, /* ind */ SHARD_IND,
+	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
+	assert_ptr_not_null(base, "");
+
+	test_data_t *test_data = malloc(sizeof(test_data_t));
+	assert_ptr_not_null(test_data, "");
+
+	test_data->base = base;
+
+	err = edata_cache_init(&test_data->shard_edata_cache, base);
+	assert_false(err, "");
+
+	err = emap_init(&test_data->emap, test_data->base, /* zeroed */ false);
+	assert_false(err, "");
+
+	err = hpa_central_init(&test_data->central, test_data->base, hooks);
+	assert_false(err, "");
+
+	err = hpa_shard_init(&test_data->shard, &test_data->central,
+	    &test_data->emap, test_data->base, &test_data->shard_edata_cache,
+	    SHARD_IND, opts);
+	assert_false(err, "");
+
+	return (hpa_shard_t *)test_data;
+}
+
+static void
+destroy_test_data(hpa_shard_t *shard) {
+	test_data_t *test_data = (test_data_t *)shard;
+	base_delete(TSDN_NULL, test_data->base);
+	free(test_data);
+}
+
+static uintptr_t defer_bump_ptr = HUGEPAGE * 123;
+static void *
+defer_test_map(size_t size) {
+	void *result = (void *)defer_bump_ptr;
+	defer_bump_ptr += size;
+	return result;
+}
+
+static void
+defer_test_unmap(void *ptr, size_t size) {
+	(void)ptr;
+	(void)size;
+}
+
+static size_t ndefer_purge_calls = 0;
+static size_t npurge_size = 0;
+static void
+defer_test_purge(void *ptr, size_t size) {
+	(void)ptr;
+	npurge_size = size;
+	++ndefer_purge_calls;
+}
+
+static bool defer_vectorized_purge_called = false;
+static bool
+defer_vectorized_purge(void *vec, size_t vlen, size_t nbytes) {
+	(void)vec;
+	(void)nbytes;
+	++ndefer_purge_calls;
+	defer_vectorized_purge_called = true;
+	return false;
+}
+
+static size_t ndefer_hugify_calls = 0;
+static bool
+defer_test_hugify(void *ptr, size_t size, bool sync) {
+	++ndefer_hugify_calls;
+	return false;
+}
+
+static size_t ndefer_dehugify_calls = 0;
+static void
+defer_test_dehugify(void *ptr, size_t size) {
+	++ndefer_dehugify_calls;
+}
+
+static nstime_t defer_curtime;
+static void
+defer_test_curtime(nstime_t *r_time, bool first_reading) {
+	*r_time = defer_curtime;
+}
+
+static uint64_t
+defer_test_ms_since(nstime_t *past_time) {
+	return (nstime_ns(&defer_curtime) - nstime_ns(past_time)) / 1000 / 1000;
+}
+
+TEST_BEGIN(test_hpa_hugify_style_none_huge_no_syscall_thp_always) {
+	test_skip_if(!hpa_supported() || (opt_process_madvise_max_batch != 0));
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_aggressive;
+	opts.deferral_allowed = true;
+	opts.purge_threshold = PAGE;
+	opts.min_purge_delay_ms = 0;
+	opts.hugification_threshold = HUGEPAGE * 0.25;
+	opts.dirty_mult = FXP_INIT_PERCENT(10);
+	opts.hugify_style = hpa_hugify_style_none;
+	opts.min_purge_interval_ms = 0;
+	opts.hugify_delay_ms = 0;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+	bool         deferred_work_generated = false;
+	/* Current time = 10ms */
+	nstime_init(&defer_curtime, 10 * 1000 * 1000);
+
+	/* Fake that system is in thp_always mode */
+	system_thp_mode_t old_mode = init_system_thp_mode;
+	init_system_thp_mode = system_thp_mode_always;
+
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	enum { NALLOCS = HUGEPAGE_PAGES };
+	edata_t *edatas[NALLOCS];
+	ndefer_purge_calls = 0;
+	for (int i = 0; i < NALLOCS / 2; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	hpdata_t *ps = psset_pick_alloc(&shard->psset, PAGE);
+	expect_true(hpdata_huge_get(ps),
+	    "Page should be huge because thp=always and hugify_style is none");
+
+	ndefer_hugify_calls = 0;
+	ndefer_purge_calls = 0;
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_zu_eq(ndefer_hugify_calls, 0, "style=none, no syscall");
+	expect_zu_eq(ndefer_dehugify_calls, 0, "style=none, no syscall");
+	expect_zu_eq(ndefer_purge_calls, 1, "purge should happen");
+
+	destroy_test_data(shard);
+	init_system_thp_mode = old_mode;
+}
+TEST_END
+
+int
+main(void) {
+	return test_no_reentrancy(
+	    test_hpa_hugify_style_none_huge_no_syscall_thp_always);
+}
diff --git a/test/unit/hpa_thp_always.sh b/test/unit/hpa_thp_always.sh
new file mode 100644
index 00000000..8b93006d
--- /dev/null
+++ b/test/unit/hpa_thp_always.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+export MALLOC_CONF="process_madvise_max_batch:0,experimental_hpa_start_huge_if_thp_always:true"
diff --git a/test/unit/hpa_vectorized_madvise.sh b/test/unit/hpa_vectorized_madvise.sh
index c5d66afa..35d7e6b6 100644
--- a/test/unit/hpa_vectorized_madvise.sh
+++ b/test/unit/hpa_vectorized_madvise.sh
@@ -1,3 +1,3 @@
 #!/bin/sh
 
-export MALLOC_CONF="process_madvise_max_batch:2"
+export MALLOC_CONF="process_madvise_max_batch:2,experimental_hpa_start_huge_if_thp_always:false"
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index d1974e0f..2415fda1 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -300,6 +300,8 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(bool, abort_conf, always);
 	TEST_MALLCTL_OPT(bool, cache_oblivious, always);
 	TEST_MALLCTL_OPT(bool, trust_madvise, always);
+	TEST_MALLCTL_OPT(
+	    bool, experimental_hpa_start_huge_if_thp_always, always);
 	TEST_MALLCTL_OPT(bool, confirm_conf, always);
 	TEST_MALLCTL_OPT(const char *, metadata_thp, always);
 	TEST_MALLCTL_OPT(bool, retain, always);

From 6d4611197e62285ae69fd0237e6b3a29494213c0 Mon Sep 17 00:00:00 2001
From: Shirui Cheng <sherrycheng@meta.com>
Date: Tue, 21 Oct 2025 18:53:35 -0700
Subject: [PATCH 344/395] move fill/flush pointer array out of tcache.c

---
 include/jemalloc/internal/arena_externs.h |  17 +-
 include/jemalloc/internal/arena_types.h   |   3 +-
 include/jemalloc/internal/cache_bin.h     |  12 +
 src/arena.c                               | 378 ++++++++++++++++++++-
 src/tcache.c                              | 384 ++--------------------
 5 files changed, 404 insertions(+), 390 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 39d2099d..cf191aeb 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -61,13 +61,13 @@ bool arena_decay_ms_set(
 ssize_t arena_decay_ms_get(arena_t *arena, extent_state_t state);
 void    arena_decay(
        tsdn_t *tsdn, arena_t *arena, bool is_background_thread, bool all);
-uint64_t arena_time_until_deferred(tsdn_t *tsdn, arena_t *arena);
-void     arena_do_deferred_work(tsdn_t *tsdn, arena_t *arena);
-void     arena_reset(tsd_t *tsd, arena_t *arena);
-void     arena_destroy(tsd_t *tsd, arena_t *arena);
-void     arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena,
-        cache_bin_t *cache_bin, szind_t binind, const cache_bin_sz_t nfill_min,
-        const cache_bin_sz_t nfill_max);
+uint64_t       arena_time_until_deferred(tsdn_t *tsdn, arena_t *arena);
+void           arena_do_deferred_work(tsdn_t *tsdn, arena_t *arena);
+void           arena_reset(tsd_t *tsd, arena_t *arena);
+void           arena_destroy(tsd_t *tsd, arena_t *arena);
+cache_bin_sz_t arena_ptr_array_fill_small(tsdn_t *tsdn, arena_t *arena,
+    szind_t binind, cache_bin_ptr_array_t *arr, const cache_bin_sz_t nfill_min,
+    const cache_bin_sz_t nfill_max, cache_bin_stats_t merge_stats);
 
 void *arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind,
     bool zero, bool slab);
@@ -84,6 +84,9 @@ void arena_dalloc_bin_locked_handle_newly_empty(
 void arena_dalloc_bin_locked_handle_newly_nonempty(
     tsdn_t *tsdn, arena_t *arena, edata_t *slab, bin_t *bin);
 void  arena_dalloc_small(tsdn_t *tsdn, void *ptr);
+void  arena_ptr_array_flush(tsd_t *tsd, szind_t binind,
+     cache_bin_ptr_array_t *arr, unsigned nflush, bool small,
+     arena_t *stats_arena, cache_bin_stats_t merge_stats);
 bool  arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
      size_t extra, bool zero, size_t *newsize);
 void *arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
diff --git a/include/jemalloc/internal/arena_types.h b/include/jemalloc/internal/arena_types.h
index 7ed2b968..c586164f 100644
--- a/include/jemalloc/internal/arena_types.h
+++ b/include/jemalloc/internal/arena_types.h
@@ -12,8 +12,7 @@
 /* Maximum length of the arena name. */
 #define ARENA_NAME_LEN 32
 
-typedef struct arena_decay_s arena_decay_t;
-typedef struct arena_s       arena_t;
+typedef struct arena_s arena_t;
 
 typedef enum {
 	percpu_arena_mode_names_base = 0, /* Used for options processing. */
diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 08ee0d6a..bea3a2fc 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -690,6 +690,10 @@ cache_bin_finish_fill(
 		    nfilled * sizeof(void *));
 	}
 	bin->stack_head = empty_position - nfilled;
+	/* Reset the bin stats as it's merged during fill. */
+	if (config_stats) {
+		bin->tstats.nrequests = 0;
+	}
 }
 
 /*
@@ -711,6 +715,10 @@ cache_bin_finish_flush(
 	    bin->stack_head + nflushed, bin->stack_head, rem * sizeof(void *));
 	bin->stack_head += nflushed;
 	cache_bin_low_water_adjust(bin);
+	/* Reset the bin stats as it's merged during flush. */
+	if (config_stats) {
+		bin->tstats.nrequests = 0;
+	}
 }
 
 static inline void
@@ -731,6 +739,10 @@ cache_bin_finish_flush_stashed(cache_bin_t *bin) {
 	/* Reset the bin local full position. */
 	bin->low_bits_full = (uint16_t)(uintptr_t)low_bound;
 	assert(cache_bin_nstashed_get_local(bin) == 0);
+	/* Reset the bin stats as it's merged during flush. */
+	if (config_stats) {
+		bin->tstats.nrequests = 0;
+	}
 }
 
 /*
diff --git a/src/arena.c b/src/arena.c
index 224a9b63..664ed6a3 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -693,7 +693,7 @@ arena_bin_reset(tsd_t *tsd, arena_t *arena, bin_t *bin) {
 		malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
 	}
 	for (slab = edata_list_active_first(&bin->slabs_full); slab != NULL;
-	     slab = edata_list_active_first(&bin->slabs_full)) {
+	    slab = edata_list_active_first(&bin->slabs_full)) {
 		arena_bin_slabs_full_remove(arena, bin, slab);
 		malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
 		arena_slab_dalloc(tsd_tsdn(tsd), arena, slab);
@@ -799,7 +799,7 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 	malloc_mutex_lock(tsd_tsdn(tsd), &arena->large_mtx);
 
 	for (edata_t *edata = edata_list_active_first(&arena->large);
-	     edata != NULL; edata = edata_list_active_first(&arena->large)) {
+	    edata != NULL; edata = edata_list_active_first(&arena->large)) {
 		void  *ptr = edata_base_get(edata);
 		size_t usize;
 
@@ -1052,18 +1052,13 @@ arena_bin_choose(
 	return arena_get_bin(arena, binind, binshard);
 }
 
-void
-arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena, cache_bin_t *cache_bin,
-    szind_t binind, const cache_bin_sz_t nfill_min,
-    const cache_bin_sz_t nfill_max) {
-	assert(cache_bin_ncached_get_local(cache_bin) == 0);
+cache_bin_sz_t
+arena_ptr_array_fill_small(tsdn_t *tsdn, arena_t *arena, szind_t binind,
+    cache_bin_ptr_array_t *arr, const cache_bin_sz_t nfill_min,
+    const cache_bin_sz_t nfill_max, cache_bin_stats_t merge_stats) {
 	assert(nfill_min > 0 && nfill_min <= nfill_max);
-	assert(nfill_max <= cache_bin_ncached_max_get(cache_bin));
 
 	const bin_info_t *bin_info = &bin_infos[binind];
-
-	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nfill_max);
-	cache_bin_init_ptr_array_for_fill(cache_bin, &ptrs, nfill_max);
 	/*
 	 * Bin-local resources are used first: 1) bin->slabcur, and 2) nonfull
 	 * slabs.  After both are exhausted, new slabs will be allocated through
@@ -1115,7 +1110,7 @@ label_refill:
 			}
 
 			arena_slab_reg_alloc_batch(
-			    slabcur, bin_info, cnt, &ptrs.ptr[filled]);
+			    slabcur, bin_info, cnt, &arr->ptr[filled]);
 			made_progress = true;
 			filled += cnt;
 			continue;
@@ -1153,10 +1148,9 @@ label_refill:
 
 	if (config_stats && !alloc_and_retry) {
 		bin->stats.nmalloc += filled;
-		bin->stats.nrequests += cache_bin->tstats.nrequests;
+		bin->stats.nrequests += merge_stats.nrequests;
 		bin->stats.curregs += filled;
 		bin->stats.nfills++;
-		cache_bin->tstats.nrequests = 0;
 	}
 
 	malloc_mutex_unlock(tsdn, &bin->lock);
@@ -1184,8 +1178,8 @@ label_refill:
 		fresh_slab = NULL;
 	}
 
-	cache_bin_finish_fill(cache_bin, &ptrs, filled);
 	arena_decay_tick(tsdn, arena);
+	return filled;
 }
 
 size_t
@@ -1472,6 +1466,357 @@ arena_dalloc_small(tsdn_t *tsdn, void *ptr) {
 	arena_decay_tick(tsdn, arena);
 }
 
+static const void *
+arena_ptr_array_flush_ptr_getter(void *arr_ctx, size_t ind) {
+	cache_bin_ptr_array_t *arr = (cache_bin_ptr_array_t *)arr_ctx;
+	return arr->ptr[ind];
+}
+
+static void
+arena_ptr_array_flush_metadata_visitor(
+    void *szind_sum_ctx, emap_full_alloc_ctx_t *alloc_ctx) {
+	size_t *szind_sum = (size_t *)szind_sum_ctx;
+	*szind_sum -= alloc_ctx->szind;
+	util_prefetch_write_range(alloc_ctx->edata, sizeof(edata_t));
+}
+
+JEMALLOC_NOINLINE static void
+arena_ptr_array_flush_size_check_fail(cache_bin_ptr_array_t *arr, szind_t szind,
+    size_t nptrs, emap_batch_lookup_result_t *edatas) {
+	bool found_mismatch = false;
+	for (size_t i = 0; i < nptrs; i++) {
+		szind_t true_szind = edata_szind_get(edatas[i].edata);
+		if (true_szind != szind) {
+			found_mismatch = true;
+			safety_check_fail_sized_dealloc(
+			    /* current_dealloc */ false,
+			    /* ptr */ arena_ptr_array_flush_ptr_getter(arr, i),
+			    /* true_size */ sz_index2size(true_szind),
+			    /* input_size */ sz_index2size(szind));
+		}
+	}
+	assert(found_mismatch);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+arena_ptr_array_flush_impl_small(tsdn_t *tsdn, szind_t binind,
+    cache_bin_ptr_array_t *arr, emap_batch_lookup_result_t *item_edata,
+    cache_bin_sz_t nflush, arena_t *stats_arena,
+    cache_bin_stats_t **merge_stats) {
+	/*
+	 * The slabs where we freed the last remaining object in the slab (and
+	 * so need to free the slab itself).
+	 * Used only if small == true.
+	 */
+	unsigned dalloc_count = 0;
+	VARIABLE_ARRAY(edata_t *, dalloc_slabs, nflush + 1);
+	/*
+	 * We're about to grab a bunch of locks.  If one of them happens to be
+	 * the one guarding the arena-level stats counters we flush our
+	 * thread-local ones to, we do so under one critical section.
+	 */
+	/*
+	 * We maintain the invariant that all edatas yet to be flushed are
+	 * contained in the half-open range [flush_start, flush_end).  We'll
+	 * repeatedly partition the array so that the unflushed items are at the
+	 * end.
+	 */
+	unsigned flush_start = 0;
+
+	while (flush_start < nflush) {
+		/*
+		 * After our partitioning step, all objects to flush will be in
+		 * the half-open range [prev_flush_start, flush_start), and
+		 * flush_start will be updated to correspond to the next loop
+		 * iteration.
+		 */
+		unsigned prev_flush_start = flush_start;
+
+		edata_t *cur_edata = item_edata[flush_start].edata;
+		unsigned cur_arena_ind = edata_arena_ind_get(cur_edata);
+		arena_t *cur_arena = arena_get(tsdn, cur_arena_ind, false);
+
+		unsigned cur_binshard = edata_binshard_get(cur_edata);
+		bin_t *cur_bin = arena_get_bin(cur_arena, binind, cur_binshard);
+		assert(cur_binshard < bin_infos[binind].n_shards);
+		/*
+		 * Start off the partition; item_edata[i] always matches itself
+		 * of course.
+		 */
+		flush_start++;
+		for (unsigned i = flush_start; i < nflush; i++) {
+			void    *ptr = arr->ptr[i];
+			edata_t *edata = item_edata[i].edata;
+			assert(ptr != NULL && edata != NULL);
+			assert(
+			    (uintptr_t)ptr >= (uintptr_t)edata_addr_get(edata));
+			assert(
+			    (uintptr_t)ptr < (uintptr_t)edata_past_get(edata));
+			if (edata_arena_ind_get(edata) == cur_arena_ind
+			    && edata_binshard_get(edata) == cur_binshard) {
+				/* Swap the edatas. */
+				emap_batch_lookup_result_t temp_edata =
+				    item_edata[flush_start];
+				item_edata[flush_start] = item_edata[i];
+				item_edata[i] = temp_edata;
+				/* Swap the pointers */
+				void *temp_ptr = arr->ptr[flush_start];
+				arr->ptr[flush_start] = arr->ptr[i];
+				arr->ptr[i] = temp_ptr;
+				flush_start++;
+			}
+		}
+		/* Make sure we implemented partitioning correctly. */
+		if (config_debug) {
+			for (unsigned i = prev_flush_start; i < flush_start;
+			    i++) {
+				edata_t *edata = item_edata[i].edata;
+				unsigned arena_ind = edata_arena_ind_get(edata);
+				assert(arena_ind == cur_arena_ind);
+				unsigned binshard = edata_binshard_get(edata);
+				assert(binshard == cur_binshard);
+			}
+			for (unsigned i = flush_start; i < nflush; i++) {
+				edata_t *edata = item_edata[i].edata;
+				assert(
+				    edata_arena_ind_get(edata) != cur_arena_ind
+				    || edata_binshard_get(edata)
+				        != cur_binshard);
+			}
+		}
+
+		/* Actually do the flushing. */
+		malloc_mutex_lock(tsdn, &cur_bin->lock);
+
+		/*
+		 * Flush stats first, if that was the right lock.  Note that we
+		 * don't actually have to flush stats into the current thread's
+		 * binshard. Flushing into any binshard in the same arena is
+		 * enough; we don't expose stats on per-binshard basis (just
+		 * per-bin).
+		 */
+		if (config_stats && stats_arena == cur_arena
+		    && *merge_stats != NULL) {
+			cur_bin->stats.nflushes++;
+			cur_bin->stats.nrequests += (*merge_stats)->nrequests;
+			*merge_stats = NULL;
+		}
+
+		/* Next flush objects. */
+		/* Init only to avoid used-uninitialized warning. */
+		arena_dalloc_bin_locked_info_t dalloc_bin_info = {0};
+		arena_dalloc_bin_locked_begin(&dalloc_bin_info, binind);
+		for (unsigned i = prev_flush_start; i < flush_start; i++) {
+			void    *ptr = arr->ptr[i];
+			edata_t *edata = item_edata[i].edata;
+			if (arena_dalloc_bin_locked_step(tsdn, cur_arena,
+			        cur_bin, &dalloc_bin_info, binind, edata,
+			        ptr)) {
+				dalloc_slabs[dalloc_count] = edata;
+				dalloc_count++;
+			}
+		}
+
+		arena_dalloc_bin_locked_finish(
+		    tsdn, cur_arena, cur_bin, &dalloc_bin_info);
+		malloc_mutex_unlock(tsdn, &cur_bin->lock);
+
+		arena_decay_ticks(
+		    tsdn, cur_arena, flush_start - prev_flush_start);
+	}
+
+	/* Handle all deferred slab dalloc. */
+	for (unsigned i = 0; i < dalloc_count; i++) {
+		edata_t *slab = dalloc_slabs[i];
+		arena_slab_dalloc(tsdn, arena_get_from_edata(slab), slab);
+	}
+
+	if (config_stats && *merge_stats != NULL) {
+		/*
+		 * The flush loop didn't happen to flush to this
+		 * thread's arena, so the stats didn't get merged.
+		 * Manually do so now.
+		 */
+		bin_t *bin = arena_bin_choose(tsdn, stats_arena, binind, NULL);
+		malloc_mutex_lock(tsdn, &bin->lock);
+		bin->stats.nflushes++;
+		bin->stats.nrequests += (*merge_stats)->nrequests;
+		*merge_stats = NULL;
+		malloc_mutex_unlock(tsdn, &bin->lock);
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE void
+arena_ptr_array_flush_impl_large(tsdn_t *tsdn, szind_t binind,
+    cache_bin_ptr_array_t *arr, emap_batch_lookup_result_t *item_edata,
+    cache_bin_sz_t nflush, arena_t *stats_arena,
+    cache_bin_stats_t **merge_stats) {
+	/*
+	 * We're about to grab a bunch of locks.  If one of them happens to be
+	 * the one guarding the arena-level stats counters we flush our
+	 * thread-local ones to, we do so under one critical section.
+	 */
+	while (nflush > 0) {
+		/* Lock the arena, or bin, associated with the first object. */
+		edata_t *edata = item_edata[0].edata;
+		unsigned cur_arena_ind = edata_arena_ind_get(edata);
+		arena_t *cur_arena = arena_get(tsdn, cur_arena_ind, false);
+
+		if (!arena_is_auto(cur_arena)) {
+			malloc_mutex_lock(tsdn, &cur_arena->large_mtx);
+		}
+
+		/*
+		 * If we acquired the right lock and have some stats to flush,
+		 * flush them.
+		 */
+		if (config_stats && stats_arena == cur_arena
+		    && *merge_stats != NULL) {
+			arena_stats_large_flush_nrequests_add(tsdn,
+			    &stats_arena->stats, binind,
+			    (*merge_stats)->nrequests);
+			*merge_stats = NULL;
+		}
+
+		/*
+		 * Large allocations need special prep done.  Afterwards, we can
+		 * drop the large lock.
+		 */
+		for (unsigned i = 0; i < nflush; i++) {
+			void *ptr = arr->ptr[i];
+			edata = item_edata[i].edata;
+			assert(ptr != NULL && edata != NULL);
+
+			if (edata_arena_ind_get(edata) == cur_arena_ind) {
+				large_dalloc_prep_locked(tsdn, edata);
+			}
+		}
+		if (!arena_is_auto(cur_arena)) {
+			malloc_mutex_unlock(tsdn, &cur_arena->large_mtx);
+		}
+
+		/* Deallocate whatever we can. */
+		unsigned ndeferred = 0;
+		for (unsigned i = 0; i < nflush; i++) {
+			void *ptr = arr->ptr[i];
+			edata = item_edata[i].edata;
+			assert(ptr != NULL && edata != NULL);
+			if (edata_arena_ind_get(edata) != cur_arena_ind) {
+				/*
+				 * The object was allocated either via a
+				 * different arena, or a different bin in this
+				 * arena.  Either way, stash the object so that
+				 * it can be handled in a future pass.
+				 */
+				arr->ptr[ndeferred] = ptr;
+				item_edata[ndeferred].edata = edata;
+				ndeferred++;
+				continue;
+			}
+			if (large_dalloc_safety_checks(
+			        edata, ptr, sz_index2size(binind))) {
+				/* See the comment in isfree. */
+				continue;
+			}
+			large_dalloc_finish(tsdn, edata);
+		}
+		arena_decay_ticks(tsdn, cur_arena, nflush - ndeferred);
+		nflush = ndeferred;
+	}
+
+	if (config_stats && *merge_stats != NULL) {
+		arena_stats_large_flush_nrequests_add(tsdn, &stats_arena->stats,
+		    binind, (*merge_stats)->nrequests);
+		*merge_stats = NULL;
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE void
+arena_ptr_array_flush_impl(tsd_t *tsd, szind_t binind,
+    cache_bin_ptr_array_t *arr, unsigned nflush, bool small,
+    arena_t *stats_arena, cache_bin_stats_t **merge_stats) {
+	/*
+	 * A couple lookup calls take tsdn; declare it once for convenience
+	 * instead of calling tsd_tsdn(tsd) all the time.
+	 */
+	tsdn_t *tsdn = tsd_tsdn(tsd);
+	/*
+	 * Variable length array must have > 0 length; the last element is never
+	 * touched (it's just included to satisfy the no-zero-length rule).
+	 */
+	VARIABLE_ARRAY(emap_batch_lookup_result_t, item_edata, nflush + 1);
+	/*
+	 * This gets compiled away when config_opt_safety_checks is false.
+	 * Checks for sized deallocation bugs, failing early rather than
+	 * corrupting metadata.
+	 */
+	size_t szind_sum = binind * nflush;
+	emap_edata_lookup_batch(tsd, &arena_emap_global, nflush,
+	    &arena_ptr_array_flush_ptr_getter, (void *)arr,
+	    &arena_ptr_array_flush_metadata_visitor, (void *)&szind_sum,
+	    item_edata);
+	if (config_opt_safety_checks && unlikely(szind_sum != 0)) {
+		arena_ptr_array_flush_size_check_fail(
+		    arr, binind, nflush, item_edata);
+	}
+
+	/*
+	 * The small/large flush logic is very similar; you might conclude that
+	 * it's a good opportunity to share code.  We've tried this, and by and
+	 * large found this to obscure more than it helps; there are so many
+	 * fiddly bits around things like stats handling, precisely when and
+	 * which mutexes are acquired, etc., that almost all code ends up being
+	 * gated behind 'if (small) { ... } else { ... }'.  Even though the
+	 * '...' is morally equivalent, the code itself needs slight tweaks.
+	 */
+	if (small) {
+		return arena_ptr_array_flush_impl_small(tsdn, binind, arr,
+		    item_edata, nflush, stats_arena, merge_stats);
+	} else {
+		return arena_ptr_array_flush_impl_large(tsdn, binind, arr,
+		    item_edata, nflush, stats_arena, merge_stats);
+	}
+}
+
+/*
+ * In practice, pointers are flushed back to their original allocation arenas,
+ * so multiple arenas may be involved here. The input stats_arena simply
+ * indicates where the cache stats should be merged into.
+ */
+void
+arena_ptr_array_flush(tsd_t *tsd, szind_t binind, cache_bin_ptr_array_t *arr,
+    unsigned nflush, bool small, arena_t *stats_arena,
+    cache_bin_stats_t merge_stats) {
+	assert(arr != NULL && arr->ptr != NULL);
+	/*
+     * The input cache bin stats represent a snapshot taken when the pointer
+	 * array is set up, and will be merged into the next-level bin stats.
+     * The original bin stats will be reset by the caller itself.
+     * This separation ensures that each layer operates independently and
+     * does not modify another layer's data directly.
+     */
+	cache_bin_stats_t    *stats = &merge_stats;
+	unsigned              nflush_batch, nflushed = 0;
+	cache_bin_ptr_array_t ptrs_batch;
+	do {
+		nflush_batch = nflush - nflushed;
+		if (nflush_batch > CACHE_BIN_NFLUSH_BATCH_MAX) {
+			nflush_batch = CACHE_BIN_NFLUSH_BATCH_MAX;
+		}
+		assert(nflush_batch <= CACHE_BIN_NFLUSH_BATCH_MAX);
+		(&ptrs_batch)->n = (cache_bin_sz_t)nflush_batch;
+		(&ptrs_batch)->ptr = arr->ptr + nflushed;
+		arena_ptr_array_flush_impl(tsd, binind, &ptrs_batch,
+		    nflush_batch, small, stats_arena, &stats);
+		nflushed += nflush_batch;
+	} while (nflushed < nflush);
+	assert(nflush == nflushed);
+	assert((arr->ptr + nflush) == ((&ptrs_batch)->ptr + nflush_batch));
+	if (config_stats) {
+		assert(stats == NULL);
+	}
+}
+
 bool
 arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
     size_t extra, bool zero, size_t *newsize) {
@@ -1890,7 +2235,8 @@ arena_init_huge(tsdn_t *tsdn, arena_t *a0) {
 		/* Make sure that b0 thp auto-switch won't happen concurrently here. */
 		malloc_mutex_lock(tsdn, &b0->mtx);
 		(&huge_arena_pac_thp)->thp_madvise = opt_huge_arena_pac_thp
-		    && metadata_thp_enabled() && (opt_thp == thp_mode_do_nothing)
+		    && metadata_thp_enabled()
+		    && (opt_thp == thp_mode_do_nothing)
 		    && (init_system_thp_mode == system_thp_mode_madvise);
 		(&huge_arena_pac_thp)->auto_thp_switched =
 		    b0->auto_thp_switched;
diff --git a/src/tcache.c b/src/tcache.c
index 2d73237b..74ff4718 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -601,15 +601,26 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 
 	assert(tcache_slow->arena != NULL);
 	assert(!tcache_bin_disabled(binind, cache_bin, tcache_slow));
+	assert(cache_bin_ncached_get_local(cache_bin) == 0);
 	cache_bin_sz_t nfill = cache_bin_ncached_max_get(cache_bin)
 	    >> tcache_nfill_small_lg_div_get(tcache_slow, binind);
 	if (nfill == 0) {
 		nfill = 1;
 	}
-	arena_cache_bin_fill_small(tsdn, arena, cache_bin, binind,
-	    /* nfill_min */
-	    opt_experimental_tcache_gc ? ((nfill >> 1) + 1) : nfill,
-	    /* nfill_max */ nfill);
+	cache_bin_sz_t nfill_min = opt_experimental_tcache_gc
+	    ? ((nfill >> 1) + 1)
+	    : nfill;
+	cache_bin_sz_t nfill_max = nfill;
+	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nfill_max);
+	cache_bin_init_ptr_array_for_fill(cache_bin, &ptrs, nfill_max);
+
+	cache_bin_sz_t filled = arena_ptr_array_fill_small(tsdn, arena, binind,
+	    &ptrs, /* nfill_min */ nfill_min, /* nfill_max */ nfill_max,
+	    cache_bin->tstats);
+	cache_bin_finish_fill(cache_bin, &ptrs, filled);
+	assert(filled >= nfill_min && filled <= nfill_max);
+	assert(cache_bin_ncached_get_local(cache_bin) == filled);
+
 	tcache_slow->bin_refilled[binind] = true;
 	tcache_nfill_small_burst_prepare(tcache_slow, binind);
 	ret = cache_bin_alloc(cache_bin, tcache_success);
@@ -617,363 +628,6 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 	return ret;
 }
 
-static const void *
-tcache_bin_flush_ptr_getter(void *arr_ctx, size_t ind) {
-	cache_bin_ptr_array_t *arr = (cache_bin_ptr_array_t *)arr_ctx;
-	return arr->ptr[ind];
-}
-
-static void
-tcache_bin_flush_metadata_visitor(
-    void *szind_sum_ctx, emap_full_alloc_ctx_t *alloc_ctx) {
-	size_t *szind_sum = (size_t *)szind_sum_ctx;
-	*szind_sum -= alloc_ctx->szind;
-	util_prefetch_write_range(alloc_ctx->edata, sizeof(edata_t));
-}
-
-JEMALLOC_NOINLINE static void
-tcache_bin_flush_size_check_fail(cache_bin_ptr_array_t *arr, szind_t szind,
-    size_t nptrs, emap_batch_lookup_result_t *edatas) {
-	bool found_mismatch = false;
-	for (size_t i = 0; i < nptrs; i++) {
-		szind_t true_szind = edata_szind_get(edatas[i].edata);
-		if (true_szind != szind) {
-			found_mismatch = true;
-			safety_check_fail_sized_dealloc(
-			    /* current_dealloc */ false,
-			    /* ptr */ tcache_bin_flush_ptr_getter(arr, i),
-			    /* true_size */ sz_index2size(true_szind),
-			    /* input_size */ sz_index2size(szind));
-		}
-	}
-	assert(found_mismatch);
-}
-
-static void
-tcache_bin_flush_edatas_lookup(tsd_t *tsd, cache_bin_ptr_array_t *arr,
-    szind_t binind, size_t nflush, emap_batch_lookup_result_t *edatas) {
-	/*
-	 * This gets compiled away when config_opt_safety_checks is false.
-	 * Checks for sized deallocation bugs, failing early rather than
-	 * corrupting metadata.
-	 */
-	size_t szind_sum = binind * nflush;
-	emap_edata_lookup_batch(tsd, &arena_emap_global, nflush,
-	    &tcache_bin_flush_ptr_getter, (void *)arr,
-	    &tcache_bin_flush_metadata_visitor, (void *)&szind_sum, edatas);
-	if (config_opt_safety_checks && unlikely(szind_sum != 0)) {
-		tcache_bin_flush_size_check_fail(arr, binind, nflush, edatas);
-	}
-}
-
-JEMALLOC_ALWAYS_INLINE void
-tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache,
-    cache_bin_t *cache_bin, szind_t binind, cache_bin_ptr_array_t *ptrs,
-    unsigned nflush) {
-	tcache_slow_t *tcache_slow = tcache->tcache_slow;
-	/*
-	 * A couple lookup calls take tsdn; declare it once for convenience
-	 * instead of calling tsd_tsdn(tsd) all the time.
-	 */
-	tsdn_t *tsdn = tsd_tsdn(tsd);
-
-	assert(binind < SC_NBINS);
-	arena_t *tcache_arena = tcache_slow->arena;
-	assert(tcache_arena != NULL);
-
-	/*
-	 * Variable length array must have > 0 length; the last element is never
-	 * touched (it's just included to satisfy the no-zero-length rule).
-	 */
-	VARIABLE_ARRAY(emap_batch_lookup_result_t, item_edata, nflush + 1);
-	tcache_bin_flush_edatas_lookup(tsd, ptrs, binind, nflush, item_edata);
-
-	/*
-	 * The slabs where we freed the last remaining object in the slab (and
-	 * so need to free the slab itself).
-	 * Used only if small == true.
-	 */
-	unsigned dalloc_count = 0;
-	VARIABLE_ARRAY(edata_t *, dalloc_slabs, nflush + 1);
-
-	/*
-	 * We're about to grab a bunch of locks.  If one of them happens to be
-	 * the one guarding the arena-level stats counters we flush our
-	 * thread-local ones to, we do so under one critical section.
-	 */
-	bool merged_stats = false;
-	/*
-	 * We maintain the invariant that all edatas yet to be flushed are
-	 * contained in the half-open range [flush_start, flush_end).  We'll
-	 * repeatedly partition the array so that the unflushed items are at the
-	 * end.
-	 */
-	unsigned flush_start = 0;
-
-	while (flush_start < nflush) {
-		/*
-		 * After our partitioning step, all objects to flush will be in
-		 * the half-open range [prev_flush_start, flush_start), and
-		 * flush_start will be updated to correspond to the next loop
-		 * iteration.
-		 */
-		unsigned prev_flush_start = flush_start;
-
-		edata_t *cur_edata = item_edata[flush_start].edata;
-		unsigned cur_arena_ind = edata_arena_ind_get(cur_edata);
-		arena_t *cur_arena = arena_get(tsdn, cur_arena_ind, false);
-
-		unsigned cur_binshard = edata_binshard_get(cur_edata);
-		bin_t *cur_bin = arena_get_bin(cur_arena, binind, cur_binshard);
-		assert(cur_binshard < bin_infos[binind].n_shards);
-		/*
-		 * Start off the partition; item_edata[i] always matches itself
-		 * of course.
-		 */
-		flush_start++;
-		for (unsigned i = flush_start; i < nflush; i++) {
-			void    *ptr = ptrs->ptr[i];
-			edata_t *edata = item_edata[i].edata;
-			assert(ptr != NULL && edata != NULL);
-			assert(
-			    (uintptr_t)ptr >= (uintptr_t)edata_addr_get(edata));
-			assert(
-			    (uintptr_t)ptr < (uintptr_t)edata_past_get(edata));
-			if (edata_arena_ind_get(edata) == cur_arena_ind
-			    && edata_binshard_get(edata) == cur_binshard) {
-				/* Swap the edatas. */
-				emap_batch_lookup_result_t temp_edata =
-				    item_edata[flush_start];
-				item_edata[flush_start] = item_edata[i];
-				item_edata[i] = temp_edata;
-				/* Swap the pointers */
-				void *temp_ptr = ptrs->ptr[flush_start];
-				ptrs->ptr[flush_start] = ptrs->ptr[i];
-				ptrs->ptr[i] = temp_ptr;
-				flush_start++;
-			}
-		}
-		/* Make sure we implemented partitioning correctly. */
-		if (config_debug) {
-			for (unsigned i = prev_flush_start; i < flush_start;
-			     i++) {
-				edata_t *edata = item_edata[i].edata;
-				unsigned arena_ind = edata_arena_ind_get(edata);
-				assert(arena_ind == cur_arena_ind);
-				unsigned binshard = edata_binshard_get(edata);
-				assert(binshard == cur_binshard);
-			}
-			for (unsigned i = flush_start; i < nflush; i++) {
-				edata_t *edata = item_edata[i].edata;
-				assert(
-				    edata_arena_ind_get(edata) != cur_arena_ind
-				    || edata_binshard_get(edata)
-				        != cur_binshard);
-			}
-		}
-
-		/* Actually do the flushing. */
-		malloc_mutex_lock(tsdn, &cur_bin->lock);
-
-		/*
-		 * Flush stats first, if that was the right lock.  Note that we
-		 * don't actually have to flush stats into the current thread's
-		 * binshard. Flushing into any binshard in the same arena is
-		 * enough; we don't expose stats on per-binshard basis (just
-		 * per-bin).
-		 */
-		if (config_stats && tcache_arena == cur_arena
-		    && !merged_stats) {
-			merged_stats = true;
-			cur_bin->stats.nflushes++;
-			cur_bin->stats.nrequests += cache_bin->tstats.nrequests;
-			cache_bin->tstats.nrequests = 0;
-		}
-
-		/* Next flush objects. */
-		/* Init only to avoid used-uninitialized warning. */
-		arena_dalloc_bin_locked_info_t dalloc_bin_info = {0};
-		arena_dalloc_bin_locked_begin(&dalloc_bin_info, binind);
-		for (unsigned i = prev_flush_start; i < flush_start; i++) {
-			void    *ptr = ptrs->ptr[i];
-			edata_t *edata = item_edata[i].edata;
-			if (arena_dalloc_bin_locked_step(tsdn, cur_arena,
-			        cur_bin, &dalloc_bin_info, binind, edata,
-			        ptr)) {
-				dalloc_slabs[dalloc_count] = edata;
-				dalloc_count++;
-			}
-		}
-
-		arena_dalloc_bin_locked_finish(
-		    tsdn, cur_arena, cur_bin, &dalloc_bin_info);
-		malloc_mutex_unlock(tsdn, &cur_bin->lock);
-
-		arena_decay_ticks(
-		    tsdn, cur_arena, flush_start - prev_flush_start);
-	}
-
-	/* Handle all deferred slab dalloc. */
-	for (unsigned i = 0; i < dalloc_count; i++) {
-		edata_t *slab = dalloc_slabs[i];
-		arena_slab_dalloc(tsdn, arena_get_from_edata(slab), slab);
-	}
-
-	if (config_stats && !merged_stats) {
-		/*
-		 * The flush loop didn't happen to flush to this
-		 * thread's arena, so the stats didn't get merged.
-		 * Manually do so now.
-		 */
-		bin_t *bin = arena_bin_choose(tsdn, tcache_arena, binind, NULL);
-		malloc_mutex_lock(tsdn, &bin->lock);
-		bin->stats.nflushes++;
-		bin->stats.nrequests += cache_bin->tstats.nrequests;
-		cache_bin->tstats.nrequests = 0;
-		malloc_mutex_unlock(tsdn, &bin->lock);
-	}
-}
-
-JEMALLOC_ALWAYS_INLINE void
-tcache_bin_flush_impl_large(tsd_t *tsd, tcache_t *tcache,
-    cache_bin_t *cache_bin, szind_t binind, cache_bin_ptr_array_t *ptrs,
-    unsigned nflush) {
-	tcache_slow_t *tcache_slow = tcache->tcache_slow;
-	/*
-	 * A couple lookup calls take tsdn; declare it once for convenience
-	 * instead of calling tsd_tsdn(tsd) all the time.
-	 */
-	tsdn_t *tsdn = tsd_tsdn(tsd);
-
-	assert(binind < tcache_nbins_get(tcache_slow));
-	arena_t *tcache_arena = tcache_slow->arena;
-	assert(tcache_arena != NULL);
-
-	/*
-	 * Variable length array must have > 0 length; the last element is never
-	 * touched (it's just included to satisfy the no-zero-length rule).
-	 */
-	VARIABLE_ARRAY(emap_batch_lookup_result_t, item_edata, nflush + 1);
-	tcache_bin_flush_edatas_lookup(tsd, ptrs, binind, nflush, item_edata);
-
-	/*
-	 * We're about to grab a bunch of locks.  If one of them happens to be
-	 * the one guarding the arena-level stats counters we flush our
-	 * thread-local ones to, we do so under one critical section.
-	 */
-	bool merged_stats = false;
-	while (nflush > 0) {
-		/* Lock the arena, or bin, associated with the first object. */
-		edata_t *edata = item_edata[0].edata;
-		unsigned cur_arena_ind = edata_arena_ind_get(edata);
-		arena_t *cur_arena = arena_get(tsdn, cur_arena_ind, false);
-
-		if (!arena_is_auto(cur_arena)) {
-			malloc_mutex_lock(tsdn, &cur_arena->large_mtx);
-		}
-
-		/*
-		 * If we acquired the right lock and have some stats to flush,
-		 * flush them.
-		 */
-		if (config_stats && tcache_arena == cur_arena
-		    && !merged_stats) {
-			merged_stats = true;
-			arena_stats_large_flush_nrequests_add(tsdn,
-			    &tcache_arena->stats, binind,
-			    cache_bin->tstats.nrequests);
-			cache_bin->tstats.nrequests = 0;
-		}
-
-		/*
-		 * Large allocations need special prep done.  Afterwards, we can
-		 * drop the large lock.
-		 */
-		for (unsigned i = 0; i < nflush; i++) {
-			void *ptr = ptrs->ptr[i];
-			edata = item_edata[i].edata;
-			assert(ptr != NULL && edata != NULL);
-
-			if (edata_arena_ind_get(edata) == cur_arena_ind) {
-				large_dalloc_prep_locked(tsdn, edata);
-			}
-		}
-		if (!arena_is_auto(cur_arena)) {
-			malloc_mutex_unlock(tsdn, &cur_arena->large_mtx);
-		}
-
-		/* Deallocate whatever we can. */
-		unsigned ndeferred = 0;
-		for (unsigned i = 0; i < nflush; i++) {
-			void *ptr = ptrs->ptr[i];
-			edata = item_edata[i].edata;
-			assert(ptr != NULL && edata != NULL);
-			if (edata_arena_ind_get(edata) != cur_arena_ind) {
-				/*
-				 * The object was allocated either via a
-				 * different arena, or a different bin in this
-				 * arena.  Either way, stash the object so that
-				 * it can be handled in a future pass.
-				 */
-				ptrs->ptr[ndeferred] = ptr;
-				item_edata[ndeferred].edata = edata;
-				ndeferred++;
-				continue;
-			}
-			if (large_dalloc_safety_checks(
-			        edata, ptr, sz_index2size(binind))) {
-				/* See the comment in isfree. */
-				continue;
-			}
-			large_dalloc_finish(tsdn, edata);
-		}
-		arena_decay_ticks(tsdn, cur_arena, nflush - ndeferred);
-		nflush = ndeferred;
-	}
-
-	if (config_stats && !merged_stats) {
-		arena_stats_large_flush_nrequests_add(tsdn,
-		    &tcache_arena->stats, binind, cache_bin->tstats.nrequests);
-		cache_bin->tstats.nrequests = 0;
-	}
-}
-
-JEMALLOC_ALWAYS_INLINE void
-tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
-    szind_t binind, cache_bin_ptr_array_t *ptrs, unsigned nflush, bool small) {
-	assert(ptrs != NULL && ptrs->ptr != NULL);
-	unsigned              nflush_batch, nflushed = 0;
-	cache_bin_ptr_array_t ptrs_batch;
-	do {
-		nflush_batch = nflush - nflushed;
-		if (nflush_batch > CACHE_BIN_NFLUSH_BATCH_MAX) {
-			nflush_batch = CACHE_BIN_NFLUSH_BATCH_MAX;
-		}
-		assert(nflush_batch <= CACHE_BIN_NFLUSH_BATCH_MAX);
-		(&ptrs_batch)->n = (cache_bin_sz_t)nflush_batch;
-		(&ptrs_batch)->ptr = ptrs->ptr + nflushed;
-		/*
-		 * The small/large flush logic is very similar; you might conclude that
-		 * it's a good opportunity to share code.  We've tried this, and by and
-		 * large found this to obscure more than it helps; there are so many
-		 * fiddly bits around things like stats handling, precisely when and
-		 * which mutexes are acquired, etc., that almost all code ends up being
-		 * gated behind 'if (small) { ... } else { ... }'.  Even though the
-		 * '...' is morally equivalent, the code itself needs slight tweaks.
-		 */
-		if (small) {
-			tcache_bin_flush_impl_small(tsd, tcache, cache_bin,
-			    binind, &ptrs_batch, nflush_batch);
-		} else {
-			tcache_bin_flush_impl_large(tsd, tcache, cache_bin,
-			    binind, &ptrs_batch, nflush_batch);
-		}
-		nflushed += nflush_batch;
-	} while (nflushed < nflush);
-	assert(nflush == nflushed);
-	assert((ptrs->ptr + nflush) == ((&ptrs_batch)->ptr + nflush_batch));
-}
-
 JEMALLOC_ALWAYS_INLINE void
 tcache_bin_flush_bottom(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
     szind_t binind, unsigned rem, bool small) {
@@ -1001,8 +655,8 @@ tcache_bin_flush_bottom(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nflush);
 	cache_bin_init_ptr_array_for_flush(cache_bin, &ptrs, nflush);
 
-	tcache_bin_flush_impl(
-	    tsd, tcache, cache_bin, binind, &ptrs, nflush, small);
+	arena_ptr_array_flush(tsd, binind, &ptrs, nflush, small,
+	    tcache->tcache_slow->arena, cache_bin->tstats);
 
 	cache_bin_finish_flush(cache_bin, &ptrs, nflush);
 }
@@ -1054,8 +708,8 @@ tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 	cache_bin_init_ptr_array_for_stashed(
 	    cache_bin, binind, &ptrs, nstashed);
 	san_check_stashed_ptrs(ptrs.ptr, nstashed, sz_index2size(binind));
-	tcache_bin_flush_impl(
-	    tsd, tcache, cache_bin, binind, &ptrs, nstashed, is_small);
+	arena_ptr_array_flush(tsd, binind, &ptrs, nstashed, is_small,
+	    tcache->tcache_slow->arena, cache_bin->tstats);
 	cache_bin_finish_flush_stashed(cache_bin);
 
 	assert(cache_bin_nstashed_get_local(cache_bin) == 0);

From 47aeff1d08806deb4ea8f91535f5470d7de89915 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Wed, 19 Nov 2025 14:25:58 -0800
Subject: [PATCH 345/395] Add experimental_enforce_hugify

---
 .../internal/jemalloc_internal_externs.h      |  1 +
 src/ctl.c                                     | 61 +++++++-------
 src/hpa.c                                     | 10 ++-
 src/jemalloc.c                                |  6 +-
 src/stats.c                                   |  3 +-
 test/unit/hpa.c                               | 83 ++++++++++++++++++-
 test/unit/mallctl.c                           | 17 ++--
 7 files changed, 133 insertions(+), 48 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index a319dc81..ea739ea8 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -16,6 +16,7 @@ extern bool             opt_abort;
 extern bool             opt_abort_conf;
 extern bool             opt_trust_madvise;
 extern bool             opt_experimental_hpa_start_huge_if_thp_always;
+extern bool             opt_experimental_hpa_enforce_hugify;
 extern bool             opt_confirm_conf;
 extern bool             opt_hpa;
 extern hpa_shard_opts_t opt_hpa_opts;
diff --git a/src/ctl.c b/src/ctl.c
index d3443a13..3e65e23f 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -99,6 +99,7 @@ CTL_PROTO(opt_cache_oblivious)
 CTL_PROTO(opt_debug_double_free_max_scan)
 CTL_PROTO(opt_trust_madvise)
 CTL_PROTO(opt_experimental_hpa_start_huge_if_thp_always)
+CTL_PROTO(opt_experimental_hpa_enforce_hugify)
 CTL_PROTO(opt_confirm_conf)
 CTL_PROTO(opt_hpa)
 CTL_PROTO(opt_hpa_slab_max_alloc)
@@ -467,6 +468,8 @@ static const ctl_named_node_t opt_node[] = {{NAME("abort"), CTL(opt_abort)},
     {NAME("trust_madvise"), CTL(opt_trust_madvise)},
     {NAME("experimental_hpa_start_huge_if_thp_always"),
         CTL(opt_experimental_hpa_start_huge_if_thp_always)},
+    {NAME("experimental_hpa_enforce_hugify"),
+        CTL(opt_experimental_hpa_enforce_hugify)},
     {NAME("confirm_conf"), CTL(opt_confirm_conf)}, {NAME("hpa"), CTL(opt_hpa)},
     {NAME("hpa_slab_max_alloc"), CTL(opt_hpa_slab_max_alloc)},
     {NAME("hpa_hugification_threshold"), CTL(opt_hpa_hugification_threshold)},
@@ -1108,30 +1111,30 @@ ctl_arena_stats_sdmerge(
 		}
 
 		ctl_accum_locked_u64(&sdstats->astats.pa_shard_stats.pac_stats
-		                          .decay_dirty.npurge,
+		                         .decay_dirty.npurge,
 		    &astats->astats.pa_shard_stats.pac_stats.decay_dirty
-		         .npurge);
+		        .npurge);
 		ctl_accum_locked_u64(&sdstats->astats.pa_shard_stats.pac_stats
-		                          .decay_dirty.nmadvise,
+		                         .decay_dirty.nmadvise,
 		    &astats->astats.pa_shard_stats.pac_stats.decay_dirty
-		         .nmadvise);
+		        .nmadvise);
 		ctl_accum_locked_u64(&sdstats->astats.pa_shard_stats.pac_stats
-		                          .decay_dirty.purged,
+		                         .decay_dirty.purged,
 		    &astats->astats.pa_shard_stats.pac_stats.decay_dirty
-		         .purged);
+		        .purged);
 
 		ctl_accum_locked_u64(&sdstats->astats.pa_shard_stats.pac_stats
-		                          .decay_muzzy.npurge,
+		                         .decay_muzzy.npurge,
 		    &astats->astats.pa_shard_stats.pac_stats.decay_muzzy
-		         .npurge);
+		        .npurge);
 		ctl_accum_locked_u64(&sdstats->astats.pa_shard_stats.pac_stats
-		                          .decay_muzzy.nmadvise,
+		                         .decay_muzzy.nmadvise,
 		    &astats->astats.pa_shard_stats.pac_stats.decay_muzzy
-		         .nmadvise);
+		        .nmadvise);
 		ctl_accum_locked_u64(&sdstats->astats.pa_shard_stats.pac_stats
-		                          .decay_muzzy.purged,
+		                         .decay_muzzy.purged,
 		    &astats->astats.pa_shard_stats.pac_stats.decay_muzzy
-		         .purged);
+		        .purged);
 
 #define OP(mtx)                                                                \
 	malloc_mutex_prof_merge(                                               \
@@ -1390,7 +1393,7 @@ ctl_refresh(tsdn_t *tsdn) {
 			    background_thread_lock);
 		} else {
 			memset(&ctl_stats->mutex_prof_data
-			            [global_prof_mutex_background_thread],
+			           [global_prof_mutex_background_thread],
 			    0, sizeof(mutex_prof_data_t));
 		}
 		/* We own ctl mutex already. */
@@ -2136,6 +2139,8 @@ CTL_RO_NL_GEN(
 CTL_RO_NL_GEN(opt_trust_madvise, opt_trust_madvise, bool)
 CTL_RO_NL_GEN(opt_experimental_hpa_start_huge_if_thp_always,
     opt_experimental_hpa_start_huge_if_thp_always, bool)
+CTL_RO_NL_GEN(opt_experimental_hpa_enforce_hugify,
+    opt_experimental_hpa_enforce_hugify, bool)
 CTL_RO_NL_GEN(opt_confirm_conf, opt_confirm_conf, bool)
 
 /* HPA options. */
@@ -3770,35 +3775,29 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_extent_avail,
     arenas_i(mib[2])->astats->astats.pa_shard_stats.edata_avail, size_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_npurge,
-    locked_read_u64_unsynchronized(
-        &arenas_i(mib[2])
-             ->astats->astats.pa_shard_stats.pac_stats.decay_dirty.npurge),
+    locked_read_u64_unsynchronized(&arenas_i(mib[2])
+            ->astats->astats.pa_shard_stats.pac_stats.decay_dirty.npurge),
     uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_nmadvise,
-    locked_read_u64_unsynchronized(
-        &arenas_i(mib[2])
-             ->astats->astats.pa_shard_stats.pac_stats.decay_dirty.nmadvise),
+    locked_read_u64_unsynchronized(&arenas_i(mib[2])
+            ->astats->astats.pa_shard_stats.pac_stats.decay_dirty.nmadvise),
     uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_purged,
-    locked_read_u64_unsynchronized(
-        &arenas_i(mib[2])
-             ->astats->astats.pa_shard_stats.pac_stats.decay_dirty.purged),
+    locked_read_u64_unsynchronized(&arenas_i(mib[2])
+            ->astats->astats.pa_shard_stats.pac_stats.decay_dirty.purged),
     uint64_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_npurge,
-    locked_read_u64_unsynchronized(
-        &arenas_i(mib[2])
-             ->astats->astats.pa_shard_stats.pac_stats.decay_muzzy.npurge),
+    locked_read_u64_unsynchronized(&arenas_i(mib[2])
+            ->astats->astats.pa_shard_stats.pac_stats.decay_muzzy.npurge),
     uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_nmadvise,
-    locked_read_u64_unsynchronized(
-        &arenas_i(mib[2])
-             ->astats->astats.pa_shard_stats.pac_stats.decay_muzzy.nmadvise),
+    locked_read_u64_unsynchronized(&arenas_i(mib[2])
+            ->astats->astats.pa_shard_stats.pac_stats.decay_muzzy.nmadvise),
     uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_purged,
-    locked_read_u64_unsynchronized(
-        &arenas_i(mib[2])
-             ->astats->astats.pa_shard_stats.pac_stats.decay_muzzy.purged),
+    locked_read_u64_unsynchronized(&arenas_i(mib[2])
+            ->astats->astats.pa_shard_stats.pac_stats.decay_muzzy.purged),
     uint64_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_base,
diff --git a/src/hpa.c b/src/hpa.c
index 3687e6ea..f6d46b25 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -29,6 +29,7 @@ static uint64_t hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self);
 const char *const hpa_hugify_style_names[] = {"auto", "none", "eager", "lazy"};
 
 bool opt_experimental_hpa_start_huge_if_thp_always = true;
+bool opt_experimental_hpa_enforce_hugify = false;
 
 bool
 hpa_hugepage_size_exceeds_limit(void) {
@@ -430,7 +431,7 @@ hpa_update_purge_hugify_eligibility(
 		/* Assume it is huge without the need to madvise */
 		hpa_assume_huge(tsdn, shard, ps);
 	}
-	if (hpa_is_hugify_lazy(shard)
+	if ((hpa_is_hugify_lazy(shard) || opt_experimental_hpa_enforce_hugify)
 	    && hpa_good_hugification_candidate(shard, ps)
 	    && !hpdata_huge_get(ps)) {
 		nstime_t now;
@@ -538,8 +539,9 @@ hpa_purge_actual_unlocked(
 
 static inline bool
 hpa_needs_dehugify(hpa_shard_t *shard, const hpdata_t *ps) {
-	return hpa_is_hugify_lazy(shard) && hpdata_huge_get(ps)
-	    && !hpdata_empty(ps);
+	return (hpa_is_hugify_lazy(shard)
+	           || opt_experimental_hpa_enforce_hugify)
+	    && hpdata_huge_get(ps) && !hpdata_empty(ps);
 }
 
 /* Prepare purge of one page. Return number of dirty regular pages on it
@@ -736,7 +738,7 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
 	 * what user believes is the truth on the target system, but we won't
 	 * update nhugifies stat as system call is not being made.
 	 */
-	if (hpa_is_hugify_lazy(shard)) {
+	if (hpa_is_hugify_lazy(shard) || opt_experimental_hpa_enforce_hugify) {
 		malloc_mutex_unlock(tsdn, &shard->mtx);
 		bool err = shard->central->hooks.hugify(
 		    hpdata_addr_get(to_hugify), HUGEPAGE,
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 0f6ff0c3..6844da5a 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1305,6 +1305,8 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			CONF_HANDLE_BOOL(
 			    opt_experimental_hpa_start_huge_if_thp_always,
 			    "experimental_hpa_start_huge_if_thp_always")
+			CONF_HANDLE_BOOL(opt_experimental_hpa_enforce_hugify,
+			    "experimental_hpa_enforce_hugify")
 			CONF_HANDLE_BOOL(
 			    opt_huge_arena_pac_thp, "huge_arena_pac_thp")
 			if (strncmp("metadata_thp", k, klen) == 0) {
@@ -1554,7 +1556,7 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			if (strncmp("percpu_arena", k, klen) == 0) {
 				bool match = false;
 				for (int m = percpu_arena_mode_names_base;
-				     m < percpu_arena_mode_names_limit; m++) {
+				    m < percpu_arena_mode_names_limit; m++) {
 					if (strncmp(percpu_arena_mode_names[m],
 					        v, vlen)
 					    == 0) {
@@ -1651,7 +1653,7 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			if (strncmp("hpa_hugify_style", k, klen) == 0) {
 				bool match = false;
 				for (int m = 0; m < hpa_hugify_style_limit;
-				     m++) {
+				    m++) {
 					if (strncmp(hpa_hugify_style_names[m],
 					        v, vlen)
 					    == 0) {
diff --git a/src/stats.c b/src/stats.c
index 4e04336e..2ccac6c9 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1115,7 +1115,7 @@ stats_arena_mutexes_print(
 	CTL_LEAF_PREPARE(stats_arenas_mib, 3, "mutexes");
 
 	for (mutex_prof_arena_ind_t i = 0; i < mutex_prof_num_arena_mutexes;
-	     i++) {
+	    i++) {
 		const char *name = arena_mutex_names[i];
 		emitter_json_object_kv_begin(emitter, name);
 		mutex_stats_read_arena(
@@ -1605,6 +1605,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_BOOL("cache_oblivious")
 	OPT_WRITE_BOOL("confirm_conf")
 	OPT_WRITE_BOOL("experimental_hpa_start_huge_if_thp_always")
+	OPT_WRITE_BOOL("experimental_hpa_enforce_hugify")
 	OPT_WRITE_BOOL("retain")
 	OPT_WRITE_CHAR_P("dss")
 	OPT_WRITE_UNSIGNED("narenas")
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index 0398e21a..5937601e 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -286,7 +286,7 @@ TEST_BEGIN(test_stress) {
 
 	size_t ntreenodes = 0;
 	for (mem_contents_t *contents = mem_tree_first(&tree); contents != NULL;
-	     contents = mem_tree_next(&tree, contents)) {
+	    contents = mem_tree_next(&tree, contents)) {
 		ntreenodes++;
 		node_check(&tree, contents);
 	}
@@ -1441,6 +1441,84 @@ TEST_BEGIN(test_hpa_hugify_style_none_huge_no_syscall) {
 }
 TEST_END
 
+TEST_BEGIN(test_experimental_hpa_enforce_hugify) {
+	test_skip_if(!hpa_supported() || (opt_process_madvise_max_batch != 0)
+	    || !config_stats);
+
+	bool old_opt_value = opt_experimental_hpa_enforce_hugify;
+	opt_experimental_hpa_enforce_hugify = true;
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
+
+	/* Use eager so hugify would normally not be made on threshold */
+	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
+	opts.hugify_style = hpa_hugify_style_eager;
+	opts.deferral_allowed = true;
+	opts.hugify_delay_ms = 0;
+	opts.min_purge_interval_ms = 0;
+	opts.hugification_threshold = 0.9 * HUGEPAGE;
+
+	ndefer_hugify_calls = 0;
+	ndefer_dehugify_calls = 0;
+	ndefer_purge_calls = 0;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+	bool         deferred_work_generated = false;
+	nstime_init2(&defer_curtime, 100, 0);
+
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	enum { NALLOCS = HUGEPAGE_PAGES * 95 / 100 };
+	edata_t *edatas[NALLOCS];
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+
+	ndefer_hugify_calls = 0;
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_zu_eq(ndefer_hugify_calls, 0, "Page was already huge");
+
+	ndefer_hugify_calls = 0;
+	ndefer_dehugify_calls = 0;
+	ndefer_purge_calls = 0;
+
+	/* Deallocate half to trigger purge */
+	for (int i = 0; i < NALLOCS / 2; i++) {
+		pai_dalloc(
+		    tsdn, &shard->pai, edatas[i], &deferred_work_generated);
+	}
+
+	hpa_shard_do_deferred_work(tsdn, shard);
+	/*
+	 * Enforce hugify should have triggered dehugify syscall during purge
+	 * when the page is huge and not empty.
+	 */
+	expect_zu_ge(ndefer_dehugify_calls, 1,
+	    "Should have triggered dehugify syscall with eager style");
+
+	for (int i = 0; i < NALLOCS / 2; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	ndefer_hugify_calls = 0;
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_zu_eq(ndefer_hugify_calls, 1, "");
+
+	opt_experimental_hpa_enforce_hugify = old_opt_value;
+	destroy_test_data(shard);
+}
+TEST_END
+
 int
 main(void) {
 	/*
@@ -1464,5 +1542,6 @@ main(void) {
 	    test_assume_huge_purge_fully, test_eager_with_purge_threshold,
 	    test_delay_when_not_allowed_deferral, test_deferred_until_time,
 	    test_eager_no_hugify_on_threshold,
-	    test_hpa_hugify_style_none_huge_no_syscall);
+	    test_hpa_hugify_style_none_huge_no_syscall,
+	    test_experimental_hpa_enforce_hugify);
 }
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 2415fda1..f409f687 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -302,6 +302,7 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(bool, trust_madvise, always);
 	TEST_MALLCTL_OPT(
 	    bool, experimental_hpa_start_huge_if_thp_always, always);
+	TEST_MALLCTL_OPT(bool, experimental_hpa_enforce_hugify, always);
 	TEST_MALLCTL_OPT(bool, confirm_conf, always);
 	TEST_MALLCTL_OPT(const char *, metadata_thp, always);
 	TEST_MALLCTL_OPT(bool, retain, always);
@@ -616,8 +617,8 @@ TEST_BEGIN(test_arena_i_dirty_decay_ms) {
 	    0, "Unexpected mallctl() failure");
 
 	for (prev_dirty_decay_ms = dirty_decay_ms, dirty_decay_ms = -1;
-	     dirty_decay_ms < 20;
-	     prev_dirty_decay_ms = dirty_decay_ms, dirty_decay_ms++) {
+	    dirty_decay_ms < 20;
+	    prev_dirty_decay_ms = dirty_decay_ms, dirty_decay_ms++) {
 		ssize_t old_dirty_decay_ms;
 
 		expect_d_eq(mallctl("arena.0.dirty_decay_ms",
@@ -649,8 +650,8 @@ TEST_BEGIN(test_arena_i_muzzy_decay_ms) {
 	    0, "Unexpected mallctl() failure");
 
 	for (prev_muzzy_decay_ms = muzzy_decay_ms, muzzy_decay_ms = -1;
-	     muzzy_decay_ms < 20;
-	     prev_muzzy_decay_ms = muzzy_decay_ms, muzzy_decay_ms++) {
+	    muzzy_decay_ms < 20;
+	    prev_muzzy_decay_ms = muzzy_decay_ms, muzzy_decay_ms++) {
 		ssize_t old_muzzy_decay_ms;
 
 		expect_d_eq(mallctl("arena.0.muzzy_decay_ms",
@@ -869,8 +870,8 @@ TEST_BEGIN(test_arenas_dirty_decay_ms) {
 	    0, "Expected mallctl() failure");
 
 	for (prev_dirty_decay_ms = dirty_decay_ms, dirty_decay_ms = -1;
-	     dirty_decay_ms < 20;
-	     prev_dirty_decay_ms = dirty_decay_ms, dirty_decay_ms++) {
+	    dirty_decay_ms < 20;
+	    prev_dirty_decay_ms = dirty_decay_ms, dirty_decay_ms++) {
 		ssize_t old_dirty_decay_ms;
 
 		expect_d_eq(mallctl("arenas.dirty_decay_ms",
@@ -902,8 +903,8 @@ TEST_BEGIN(test_arenas_muzzy_decay_ms) {
 	    0, "Expected mallctl() failure");
 
 	for (prev_muzzy_decay_ms = muzzy_decay_ms, muzzy_decay_ms = -1;
-	     muzzy_decay_ms < 20;
-	     prev_muzzy_decay_ms = muzzy_decay_ms, muzzy_decay_ms++) {
+	    muzzy_decay_ms < 20;
+	    prev_muzzy_decay_ms = muzzy_decay_ms, muzzy_decay_ms++) {
 		ssize_t old_muzzy_decay_ms;
 
 		expect_d_eq(mallctl("arenas.muzzy_decay_ms",

From 355774270dc41a66e38565b4c5573fd53a8c090f Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Tue, 30 Sep 2025 08:36:19 -0700
Subject: [PATCH 346/395] [EASY] Encapsulate better, do not pass hpa_shard when
 hooks are enough, move shard independent actions to hpa_utils

---
 Makefile.in                                   |  1 +
 include/jemalloc/internal/hpa_utils.h         | 74 ++++++++++++----
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |  3 +-
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |  6 ++
 .../projects/vc2017/jemalloc/jemalloc.vcxproj |  3 +-
 .../vc2017/jemalloc/jemalloc.vcxproj.filters  |  6 ++
 .../projects/vc2019/jemalloc/jemalloc.vcxproj |  3 +-
 .../vc2019/jemalloc/jemalloc.vcxproj.filters  |  6 ++
 .../projects/vc2022/jemalloc/jemalloc.vcxproj |  3 +-
 .../vc2022/jemalloc/jemalloc.vcxproj.filters  |  6 ++
 src/hpa.c                                     | 87 +------------------
 src/hpa_utils.c                               | 33 +++++++
 test/unit/hpa_vectorized_madvise.c            | 71 +--------------
 .../unit/hpa_vectorized_madvise_large_batch.c | 71 ++++++++++++++-
 14 files changed, 198 insertions(+), 175 deletions(-)
 create mode 100644 src/hpa_utils.c

diff --git a/Makefile.in b/Makefile.in
index c63e6f8f..4dd4ce85 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -124,6 +124,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/hook.c \
 	$(srcroot)src/hpa.c \
 	$(srcroot)src/hpa_hooks.c \
+	$(srcroot)src/hpa_utils.c \
 	$(srcroot)src/hpdata.c \
 	$(srcroot)src/inspect.c \
 	$(srcroot)src/large.c \
diff --git a/include/jemalloc/internal/hpa_utils.h b/include/jemalloc/internal/hpa_utils.h
index 53bcb670..6b006cff 100644
--- a/include/jemalloc/internal/hpa_utils.h
+++ b/include/jemalloc/internal/hpa_utils.h
@@ -2,8 +2,20 @@
 #define JEMALLOC_INTERNAL_HPA_UTILS_H
 
 #include "jemalloc/internal/hpa.h"
+#include "jemalloc/internal/extent.h"
 
 #define HPA_MIN_VAR_VEC_SIZE 8
+/*
+ * This is used for jemalloc internal tuning and may change in the future based
+ * on production traffic.
+ *
+ * This value protects two things:
+ *    1. Stack size
+ *    2. Number of huge pages that are being purged in a batch as we do not
+ *       allow allocations while making madvise syscall.
+ */
+#define HPA_PURGE_BATCH_MAX 16
+
 #ifdef JEMALLOC_HAVE_PROCESS_MADVISE
 typedef struct iovec hpa_io_vector_t;
 #else
@@ -13,27 +25,35 @@ typedef struct {
 } hpa_io_vector_t;
 #endif
 
+static inline size_t
+hpa_process_madvise_max_iovec_len(void) {
+	assert(
+	    opt_process_madvise_max_batch <= PROCESS_MADVISE_MAX_BATCH_LIMIT);
+	return opt_process_madvise_max_batch == 0
+	    ? HPA_MIN_VAR_VEC_SIZE
+	    : opt_process_madvise_max_batch;
+}
+
 /* Actually invoke hooks. If we fail vectorized, use single purges */
 static void
 hpa_try_vectorized_purge(
-    hpa_shard_t *shard, hpa_io_vector_t *vec, size_t vlen, size_t nbytes) {
+    hpa_hooks_t *hooks, hpa_io_vector_t *vec, size_t vlen, size_t nbytes) {
 	bool success = opt_process_madvise_max_batch > 0
-	    && !shard->central->hooks.vectorized_purge(vec, vlen, nbytes);
+	    && !hooks->vectorized_purge(vec, vlen, nbytes);
 	if (!success) {
 		/* On failure, it is safe to purge again (potential perf
-         * penalty) If kernel can tell exactly which regions
-         * failed, we could avoid that penalty.
-         */
+		 * penalty) If kernel can tell exactly which regions
+		 * failed, we could avoid that penalty.
+		 */
 		for (size_t i = 0; i < vlen; ++i) {
-			shard->central->hooks.purge(
-			    vec[i].iov_base, vec[i].iov_len);
+			hooks->purge(vec[i].iov_base, vec[i].iov_len);
 		}
 	}
 }
 
 /*
- * This struct accumulates the regions for process_madvise.
- * It invokes the hook when batch limit is reached
+ * This structure accumulates the regions for process_madvise. It invokes the
+ * hook when batch limit is reached.
  */
 typedef struct {
 	hpa_io_vector_t *vp;
@@ -51,16 +71,16 @@ hpa_range_accum_init(hpa_range_accum_t *ra, hpa_io_vector_t *v, size_t sz) {
 }
 
 static inline void
-hpa_range_accum_flush(hpa_range_accum_t *ra, hpa_shard_t *shard) {
+hpa_range_accum_flush(hpa_range_accum_t *ra, hpa_hooks_t *hooks) {
 	assert(ra->total_bytes > 0 && ra->cur > 0);
-	hpa_try_vectorized_purge(shard, ra->vp, ra->cur, ra->total_bytes);
+	hpa_try_vectorized_purge(hooks, ra->vp, ra->cur, ra->total_bytes);
 	ra->cur = 0;
 	ra->total_bytes = 0;
 }
 
 static inline void
 hpa_range_accum_add(
-    hpa_range_accum_t *ra, void *addr, size_t sz, hpa_shard_t *shard) {
+    hpa_range_accum_t *ra, void *addr, size_t sz, hpa_hooks_t *hooks) {
 	assert(ra->cur < ra->capacity);
 
 	ra->vp[ra->cur].iov_base = addr;
@@ -69,14 +89,14 @@ hpa_range_accum_add(
 	ra->cur++;
 
 	if (ra->cur == ra->capacity) {
-		hpa_range_accum_flush(ra, shard);
+		hpa_range_accum_flush(ra, hooks);
 	}
 }
 
 static inline void
-hpa_range_accum_finish(hpa_range_accum_t *ra, hpa_shard_t *shard) {
+hpa_range_accum_finish(hpa_range_accum_t *ra, hpa_hooks_t *hooks) {
 	if (ra->cur > 0) {
-		hpa_range_accum_flush(ra, shard);
+		hpa_range_accum_flush(ra, hooks);
 	}
 }
 
@@ -114,4 +134,28 @@ struct hpa_purge_batch_s {
 	size_t npurged_hp_total;
 };
 
+static inline bool
+hpa_batch_full(hpa_purge_batch_t *b) {
+	/* It's okay for ranges to go above */
+	return b->npurged_hp_total == b->max_hp
+	    || b->item_cnt == b->items_capacity
+	    || b->nranges >= b->range_watermark;
+}
+
+static inline void
+hpa_batch_pass_start(hpa_purge_batch_t *b) {
+	b->item_cnt = 0;
+	b->nranges = 0;
+	b->ndirty_in_batch = 0;
+}
+
+static inline bool
+hpa_batch_empty(hpa_purge_batch_t *b) {
+	return b->item_cnt == 0;
+}
+
+/* Purge pages in a batch using given hooks */
+void hpa_purge_batch(
+    hpa_hooks_t *hooks, hpa_purge_item_t *batch, size_t batch_sz);
+
 #endif /* JEMALLOC_INTERNAL_HPA_UTILS_H */
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index fff77a4b..abdeb7b7 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -62,6 +62,7 @@
     <ClCompile Include="..\..\..\..\src\hook.c" />
     <ClCompile Include="..\..\..\..\src\hpa.c" />
     <ClCompile Include="..\..\..\..\src\hpa_hooks.c" />
+    <ClCompile Include="..\..\..\..\src\hpa_utils.c" />
     <ClCompile Include="..\..\..\..\src\hpdata.c" />
     <ClCompile Include="..\..\..\..\src\inspect.c" />
     <ClCompile Include="..\..\..\..\src\jemalloc.c" />
@@ -380,4 +381,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index c8236a12..7ce66945 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -70,6 +70,9 @@
     <ClCompile Include="..\..\..\..\src\hpa_hooks.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpa_utils.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\hpdata.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -163,6 +166,9 @@
     <ClCompile Include="..\..\..\..\src\thread_event.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\thread_event_registry.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\ticker.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 53d4af8d..1f39cb91 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -62,6 +62,7 @@
     <ClCompile Include="..\..\..\..\src\hook.c" />
     <ClCompile Include="..\..\..\..\src\hpa.c" />
     <ClCompile Include="..\..\..\..\src\hpa_hooks.c" />
+    <ClCompile Include="..\..\..\..\src\hpa_utils.c" />
     <ClCompile Include="..\..\..\..\src\hpdata.c" />
     <ClCompile Include="..\..\..\..\src\inspect.c" />
     <ClCompile Include="..\..\..\..\src\jemalloc.c" />
@@ -379,4 +380,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index c8236a12..7ce66945 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -70,6 +70,9 @@
     <ClCompile Include="..\..\..\..\src\hpa_hooks.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpa_utils.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\hpdata.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -163,6 +166,9 @@
     <ClCompile Include="..\..\..\..\src\thread_event.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\thread_event_registry.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\ticker.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
index 10514d35..0b1e1707 100644
--- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
@@ -62,6 +62,7 @@
     <ClCompile Include="..\..\..\..\src\hook.c" />
     <ClCompile Include="..\..\..\..\src\hpa.c" />
     <ClCompile Include="..\..\..\..\src\hpa_hooks.c" />
+    <ClCompile Include="..\..\..\..\src\hpa_utils.c" />
     <ClCompile Include="..\..\..\..\src\hpdata.c" />
     <ClCompile Include="..\..\..\..\src\inspect.c" />
     <ClCompile Include="..\..\..\..\src\jemalloc.c" />
@@ -379,4 +380,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
index c8236a12..7ce66945 100644
--- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
@@ -70,6 +70,9 @@
     <ClCompile Include="..\..\..\..\src\hpa_hooks.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpa_utils.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\hpdata.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -163,6 +166,9 @@
     <ClCompile Include="..\..\..\..\src\thread_event.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\thread_event_registry.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\ticker.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
index cda827be..54462516 100644
--- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
@@ -62,6 +62,7 @@
     <ClCompile Include="..\..\..\..\src\hook.c" />
     <ClCompile Include="..\..\..\..\src\hpa.c" />
     <ClCompile Include="..\..\..\..\src\hpa_hooks.c" />
+    <ClCompile Include="..\..\..\..\src\hpa_utils.c" />
     <ClCompile Include="..\..\..\..\src\hpdata.c" />
     <ClCompile Include="..\..\..\..\src\inspect.c" />
     <ClCompile Include="..\..\..\..\src\jemalloc.c" />
@@ -379,4 +380,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
index c8236a12..7ce66945 100644
--- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
@@ -70,6 +70,9 @@
     <ClCompile Include="..\..\..\..\src\hpa_hooks.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpa_utils.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\hpdata.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -163,6 +166,9 @@
     <ClCompile Include="..\..\..\..\src\thread_event.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\thread_event_registry.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\ticker.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/hpa.c b/src/hpa.c
index f6d46b25..5e3727a1 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -473,70 +473,6 @@ hpa_shard_has_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard) {
 	return to_hugify != NULL || hpa_should_purge(tsdn, shard);
 }
 
-/*
- * This is used for jemalloc internal tuning and may change in the
- * future based on production traffic.
- *
- * This value protects two things:
- *    1. Stack size
- *    2. Number of huge pages that are being purged in a batch as
- *       we do not allow allocations while making madvise syscall.
- */
-#define HPA_PURGE_BATCH_MAX_DEFAULT 16
-
-#ifndef JEMALLOC_JET
-#	define HPA_PURGE_BATCH_MAX HPA_PURGE_BATCH_MAX_DEFAULT
-#else
-size_t hpa_purge_max_batch_size_for_test = HPA_PURGE_BATCH_MAX_DEFAULT;
-size_t
-hpa_purge_max_batch_size_for_test_set(size_t new_size) {
-	size_t old_size = hpa_purge_max_batch_size_for_test;
-	hpa_purge_max_batch_size_for_test = new_size;
-	return old_size;
-}
-#	define HPA_PURGE_BATCH_MAX hpa_purge_max_batch_size_for_test
-#endif
-
-static inline size_t
-hpa_process_madvise_max_iovec_len(void) {
-	assert(
-	    opt_process_madvise_max_batch <= PROCESS_MADVISE_MAX_BATCH_LIMIT);
-	return opt_process_madvise_max_batch == 0
-	    ? HPA_MIN_VAR_VEC_SIZE
-	    : opt_process_madvise_max_batch;
-}
-
-static inline void
-hpa_purge_actual_unlocked(
-    hpa_shard_t *shard, hpa_purge_item_t *batch, size_t batch_sz) {
-	assert(batch_sz > 0);
-
-	size_t len = hpa_process_madvise_max_iovec_len();
-	VARIABLE_ARRAY(hpa_io_vector_t, vec, len);
-
-	hpa_range_accum_t accum;
-	hpa_range_accum_init(&accum, vec, len);
-
-	for (size_t i = 0; i < batch_sz; ++i) {
-		/* Actually do the purging, now that the lock is dropped. */
-		if (batch[i].dehugify) {
-			shard->central->hooks.dehugify(
-			    hpdata_addr_get(batch[i].hp), HUGEPAGE);
-		}
-		void  *purge_addr;
-		size_t purge_size;
-		size_t total_purged_on_one_hp = 0;
-		while (hpdata_purge_next(
-		    batch[i].hp, &batch[i].state, &purge_addr, &purge_size)) {
-			total_purged_on_one_hp += purge_size;
-			assert(total_purged_on_one_hp <= HUGEPAGE);
-			hpa_range_accum_add(
-			    &accum, purge_addr, purge_size, shard);
-		}
-	}
-	hpa_range_accum_finish(&accum, shard);
-}
-
 static inline bool
 hpa_needs_dehugify(hpa_shard_t *shard, const hpdata_t *ps) {
 	return (hpa_is_hugify_lazy(shard)
@@ -624,26 +560,6 @@ hpa_purge_finish_hp(
 	psset_update_end(&shard->psset, hp_item->hp);
 }
 
-static inline bool
-hpa_batch_full(hpa_purge_batch_t *b) {
-	/* It's okay for ranges to go above */
-	return b->npurged_hp_total == b->max_hp
-	    || b->item_cnt == b->items_capacity
-	    || b->nranges >= b->range_watermark;
-}
-
-static inline void
-hpa_batch_pass_start(hpa_purge_batch_t *b) {
-	b->item_cnt = 0;
-	b->nranges = 0;
-	b->ndirty_in_batch = 0;
-}
-
-static inline bool
-hpa_batch_empty(hpa_purge_batch_t *b) {
-	return b->item_cnt == 0;
-}
-
 /* Returns number of huge pages purged. */
 static inline size_t
 hpa_purge(tsdn_t *tsdn, hpa_shard_t *shard, size_t max_hp) {
@@ -679,8 +595,9 @@ hpa_purge(tsdn_t *tsdn, hpa_shard_t *shard, size_t max_hp) {
 		if (hpa_batch_empty(&batch)) {
 			break;
 		}
+		hpa_hooks_t *hooks = &shard->central->hooks;
 		malloc_mutex_unlock(tsdn, &shard->mtx);
-		hpa_purge_actual_unlocked(shard, batch.items, batch.item_cnt);
+		hpa_purge_batch(hooks, batch.items, batch.item_cnt);
 		malloc_mutex_lock(tsdn, &shard->mtx);
 
 		/* The shard updates */
diff --git a/src/hpa_utils.c b/src/hpa_utils.c
new file mode 100644
index 00000000..59bb0d1f
--- /dev/null
+++ b/src/hpa_utils.c
@@ -0,0 +1,33 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/hpa_utils.h"
+
+void
+hpa_purge_batch(hpa_hooks_t *hooks, hpa_purge_item_t *batch, size_t batch_sz) {
+	assert(batch_sz > 0);
+
+	size_t len = hpa_process_madvise_max_iovec_len();
+	VARIABLE_ARRAY(hpa_io_vector_t, vec, len);
+
+	hpa_range_accum_t accum;
+	hpa_range_accum_init(&accum, vec, len);
+
+	for (size_t i = 0; i < batch_sz; ++i) {
+		/* Actually do the purging, now that the lock is dropped. */
+		if (batch[i].dehugify) {
+			hooks->dehugify(hpdata_addr_get(batch[i].hp), HUGEPAGE);
+		}
+		void  *purge_addr;
+		size_t purge_size;
+		size_t total_purged_on_one_hp = 0;
+		while (hpdata_purge_next(
+		    batch[i].hp, &batch[i].state, &purge_addr, &purge_size)) {
+			total_purged_on_one_hp += purge_size;
+			assert(total_purged_on_one_hp <= HUGEPAGE);
+			hpa_range_accum_add(
+			    &accum, purge_addr, purge_size, hooks);
+		}
+	}
+	hpa_range_accum_finish(&accum, hooks);
+}
diff --git a/test/unit/hpa_vectorized_madvise.c b/test/unit/hpa_vectorized_madvise.c
index c66811e1..e82f0ffb 100644
--- a/test/unit/hpa_vectorized_madvise.c
+++ b/test/unit/hpa_vectorized_madvise.c
@@ -253,77 +253,8 @@ TEST_BEGIN(test_more_regions_purged_from_one_page) {
 }
 TEST_END
 
-size_t hpa_purge_max_batch_size_for_test_set(size_t new_size);
-TEST_BEGIN(test_more_pages_than_batch_page_size) {
-	test_skip_if(!hpa_supported() || (opt_process_madvise_max_batch == 0)
-	    || HUGEPAGE_PAGES <= 4);
-
-	size_t old_page_batch = hpa_purge_max_batch_size_for_test_set(1);
-
-	hpa_hooks_t hooks;
-	hooks.map = &defer_test_map;
-	hooks.unmap = &defer_test_unmap;
-	hooks.purge = &defer_test_purge;
-	hooks.hugify = &defer_test_hugify;
-	hooks.dehugify = &defer_test_dehugify;
-	hooks.curtime = &defer_test_curtime;
-	hooks.ms_since = &defer_test_ms_since;
-	hooks.vectorized_purge = &defer_vectorized_purge;
-
-	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
-	opts.deferral_allowed = true;
-	opts.min_purge_interval_ms = 0;
-	ndefer_vec_purge_calls = 0;
-	ndefer_purge_calls = 0;
-
-	hpa_shard_t *shard = create_test_data(&hooks, &opts);
-
-	bool deferred_work_generated = false;
-
-	nstime_init(&defer_curtime, 0);
-	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
-
-	enum { NALLOCS = 8 * HUGEPAGE_PAGES };
-	edata_t *edatas[NALLOCS];
-	for (int i = 0; i < NALLOCS; i++) {
-		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
-		    false, false, &deferred_work_generated);
-		expect_ptr_not_null(edatas[i], "Unexpected null edata");
-	}
-	for (int i = 0; i < 3 * (int)HUGEPAGE_PAGES; i++) {
-		pai_dalloc(
-		    tsdn, &shard->pai, edatas[i], &deferred_work_generated);
-	}
-
-	hpa_shard_do_deferred_work(tsdn, shard);
-
-	/*
-	 * Strict minimum purge interval is not set, we should purge as long as
-	 * we have dirty pages.
-	 */
-	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
-	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
-
-	/* We have page batch size = 1.
-	 * we have 5 * HP active pages, 3 * HP dirty pages
-	 * To achieve the balance of 25% max dirty we need to
-	 * purge 2 pages. Since batch is 1 that must be 2 calls
-	 * no matter what opt_process_madvise_max_batch is
-	 */
-	size_t nexpected = 2;
-	expect_zu_eq(nexpected, ndefer_vec_purge_calls, "Expect purge");
-	expect_zu_eq(0, ndefer_purge_calls, "Expect no non-vec purge");
-	ndefer_vec_purge_calls = 0;
-
-	hpa_purge_max_batch_size_for_test_set(old_page_batch);
-
-	destroy_test_data(shard);
-}
-TEST_END
-
 int
 main(void) {
 	return test_no_reentrancy(test_vectorized_failure_fallback,
-	    test_more_regions_purged_from_one_page,
-	    test_more_pages_than_batch_page_size);
+	    test_more_regions_purged_from_one_page);
 }
diff --git a/test/unit/hpa_vectorized_madvise_large_batch.c b/test/unit/hpa_vectorized_madvise_large_batch.c
index 8e7be7c0..d542f72a 100644
--- a/test/unit/hpa_vectorized_madvise_large_batch.c
+++ b/test/unit/hpa_vectorized_madvise_large_batch.c
@@ -1,6 +1,7 @@
 #include "test/jemalloc_test.h"
 
 #include "jemalloc/internal/hpa.h"
+#include "jemalloc/internal/hpa_utils.h"
 #include "jemalloc/internal/nstime.h"
 
 #define SHARD_IND 111
@@ -195,7 +196,75 @@ TEST_BEGIN(test_vectorized_purge) {
 }
 TEST_END
 
+TEST_BEGIN(test_purge_more_than_one_batch_pages) {
+	test_skip_if(!hpa_supported()
+	    || (opt_process_madvise_max_batch < HPA_PURGE_BATCH_MAX)
+	    || HUGEPAGE_PAGES <= 4);
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
+	opts.deferral_allowed = true;
+	opts.min_purge_interval_ms = 0;
+	opts.dirty_mult = FXP_INIT_PERCENT(1);
+	ndefer_vec_purge_calls = 0;
+	ndefer_purge_calls = 0;
+	ndefer_hugify_calls = 0;
+	ndefer_dehugify_calls = 0;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+
+	bool deferred_work_generated = false;
+
+	nstime_init(&defer_curtime, 0);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+
+	enum { NALLOCS = HPA_PURGE_BATCH_MAX * 3 * HUGEPAGE_PAGES };
+	edata_t *edatas[NALLOCS];
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	for (int i = 0; i < HPA_PURGE_BATCH_MAX * 2 * (int)HUGEPAGE_PAGES;
+	    i++) {
+		pai_dalloc(
+		    tsdn, &shard->pai, edatas[i], &deferred_work_generated);
+	}
+
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	/*
+	 * Strict minimum purge interval is not set, we should purge as long as
+	 * we have dirty pages.
+	 */
+	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
+
+	/* We have page batch size = HPA_PURGE_BATCH_MAX.  We have
+	 * HPA_PURGE_BATCH_MAX active pages, 2 * HPA_PURGE_BATCH_MAX dirty.
+	 * To achieve the balance of 1% max dirty we need to purge more than one
+	 * batch.
+	 */
+	size_t nexpected = 2;
+	expect_zu_eq(nexpected, ndefer_vec_purge_calls, "Expect purge");
+	expect_zu_eq(0, ndefer_purge_calls, "Expect no non-vec purge");
+	ndefer_vec_purge_calls = 0;
+
+	destroy_test_data(shard);
+}
+TEST_END
+
 int
 main(void) {
-	return test_no_reentrancy(test_vectorized_purge);
+	return test_no_reentrancy(
+	    test_vectorized_purge, test_purge_more_than_one_batch_pages);
 }

From 8a06b086f3b514764c1924451ec453a67444470b Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Tue, 30 Sep 2025 09:37:09 -0700
Subject: [PATCH 347/395] [EASY] Extract hpa_central component from hpa source
 file

---
 Makefile.in                                   |   1 +
 include/jemalloc/internal/hpa.h               |  27 +---
 include/jemalloc/internal/hpa_central.h       |  41 ++++++
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |   1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |   5 +-
 .../projects/vc2017/jemalloc/jemalloc.vcxproj |   1 +
 .../vc2017/jemalloc/jemalloc.vcxproj.filters  |   5 +-
 .../projects/vc2019/jemalloc/jemalloc.vcxproj |   1 +
 .../vc2019/jemalloc/jemalloc.vcxproj.filters  |   5 +-
 .../projects/vc2022/jemalloc/jemalloc.vcxproj |   1 +
 .../vc2022/jemalloc/jemalloc.vcxproj.filters  |   5 +-
 src/hpa.c                                     | 115 -----------------
 src/hpa_central.c                             | 121 ++++++++++++++++++
 13 files changed, 184 insertions(+), 145 deletions(-)
 create mode 100644 include/jemalloc/internal/hpa_central.h
 create mode 100644 src/hpa_central.c

diff --git a/Makefile.in b/Makefile.in
index 4dd4ce85..7365a923 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -123,6 +123,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/san_bump.c \
 	$(srcroot)src/hook.c \
 	$(srcroot)src/hpa.c \
+	$(srcroot)src/hpa_central.c \
 	$(srcroot)src/hpa_hooks.c \
 	$(srcroot)src/hpa_utils.c \
 	$(srcroot)src/hpdata.c \
diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index 131bbb90..06567740 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -6,36 +6,13 @@
 #include "jemalloc/internal/edata_cache.h"
 #include "jemalloc/internal/emap.h"
 #include "jemalloc/internal/exp_grow.h"
+#include "jemalloc/internal/hpa_central.h"
 #include "jemalloc/internal/hpa_hooks.h"
 #include "jemalloc/internal/hpa_opts.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/pai.h"
 #include "jemalloc/internal/psset.h"
 
-typedef struct hpa_central_s hpa_central_t;
-struct hpa_central_s {
-	/*
-	 * Guards expansion of eden.  We separate this from the regular mutex so
-	 * that cheaper operations can still continue while we're doing the OS
-	 * call.
-	 */
-	malloc_mutex_t grow_mtx;
-	/*
-	 * Either NULL (if empty), or some integer multiple of a
-	 * hugepage-aligned number of hugepages.  We carve them off one at a
-	 * time to satisfy new pageslab requests.
-	 *
-	 * Guarded by grow_mtx.
-	 */
-	void  *eden;
-	size_t eden_len;
-	/* Source for metadata. */
-	base_t *base;
-
-	/* The HPA hooks. */
-	hpa_hooks_t hooks;
-};
-
 typedef struct hpa_shard_nonderived_stats_s hpa_shard_nonderived_stats_t;
 struct hpa_shard_nonderived_stats_s {
 	/*
@@ -165,8 +142,6 @@ bool hpa_hugepage_size_exceeds_limit(void);
  * just that it can function properly given the system it's running on.
  */
 bool hpa_supported(void);
-bool hpa_central_init(
-    hpa_central_t *central, base_t *base, const hpa_hooks_t *hooks);
 bool hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap,
     base_t *base, edata_cache_t *edata_cache, unsigned ind,
     const hpa_shard_opts_t *opts);
diff --git a/include/jemalloc/internal/hpa_central.h b/include/jemalloc/internal/hpa_central.h
new file mode 100644
index 00000000..3e0ff7da
--- /dev/null
+++ b/include/jemalloc/internal/hpa_central.h
@@ -0,0 +1,41 @@
+#ifndef JEMALLOC_INTERNAL_HPA_CENTRAL_H
+#define JEMALLOC_INTERNAL_HPA_CENTRAL_H
+
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/base.h"
+#include "jemalloc/internal/hpa_hooks.h"
+#include "jemalloc/internal/hpdata.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/tsd_types.h"
+
+typedef struct hpa_central_s hpa_central_t;
+struct hpa_central_s {
+	/*
+	 * Guards expansion of eden.  We separate this from the regular mutex so
+	 * that cheaper operations can still continue while we're doing the OS
+	 * call.
+	 */
+	malloc_mutex_t grow_mtx;
+	/*
+	 * Either NULL (if empty), or some integer multiple of a
+	 * hugepage-aligned number of hugepages.  We carve them off one at a
+	 * time to satisfy new pageslab requests.
+	 *
+	 * Guarded by grow_mtx.
+	 */
+	void  *eden;
+	size_t eden_len;
+	/* Source for metadata. */
+	base_t *base;
+
+	/* The HPA hooks. */
+	hpa_hooks_t hooks;
+};
+
+bool hpa_central_init(
+    hpa_central_t *central, base_t *base, const hpa_hooks_t *hooks);
+
+hpdata_t *hpa_central_extract(tsdn_t *tsdn, hpa_central_t *central, size_t size,
+    uint64_t age, bool hugify_eager, bool *oom);
+
+#endif /* JEMALLOC_INTERNAL_HPA_CENTRAL_H */
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index abdeb7b7..bfb62d78 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -61,6 +61,7 @@
     <ClCompile Include="..\..\..\..\src\fxp.c" />
     <ClCompile Include="..\..\..\..\src\hook.c" />
     <ClCompile Include="..\..\..\..\src\hpa.c" />
+    <ClCompile Include="..\..\..\..\src\hpa_central.c" />
     <ClCompile Include="..\..\..\..\src\hpa_hooks.c" />
     <ClCompile Include="..\..\..\..\src\hpa_utils.c" />
     <ClCompile Include="..\..\..\..\src\hpdata.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 7ce66945..26408c8e 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -67,6 +67,9 @@
     <ClCompile Include="..\..\..\..\src\hpa.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpa_central.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\hpa_hooks.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -206,4 +209,4 @@
       <Filter>Source Files</Filter>
     </ClCompile>
   </ItemGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 1f39cb91..037eb724 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -61,6 +61,7 @@
     <ClCompile Include="..\..\..\..\src\fxp.c" />
     <ClCompile Include="..\..\..\..\src\hook.c" />
     <ClCompile Include="..\..\..\..\src\hpa.c" />
+    <ClCompile Include="..\..\..\..\src\hpa_central.c" />
     <ClCompile Include="..\..\..\..\src\hpa_hooks.c" />
     <ClCompile Include="..\..\..\..\src\hpa_utils.c" />
     <ClCompile Include="..\..\..\..\src\hpdata.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 7ce66945..26408c8e 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -67,6 +67,9 @@
     <ClCompile Include="..\..\..\..\src\hpa.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpa_central.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\hpa_hooks.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -206,4 +209,4 @@
       <Filter>Source Files</Filter>
     </ClCompile>
   </ItemGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
index 0b1e1707..bd6595b1 100644
--- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
@@ -61,6 +61,7 @@
     <ClCompile Include="..\..\..\..\src\fxp.c" />
     <ClCompile Include="..\..\..\..\src\hook.c" />
     <ClCompile Include="..\..\..\..\src\hpa.c" />
+    <ClCompile Include="..\..\..\..\src\hpa_central.c" />
     <ClCompile Include="..\..\..\..\src\hpa_hooks.c" />
     <ClCompile Include="..\..\..\..\src\hpa_utils.c" />
     <ClCompile Include="..\..\..\..\src\hpdata.c" />
diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
index 7ce66945..26408c8e 100644
--- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
@@ -67,6 +67,9 @@
     <ClCompile Include="..\..\..\..\src\hpa.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpa_central.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\hpa_hooks.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -206,4 +209,4 @@
       <Filter>Source Files</Filter>
     </ClCompile>
   </ItemGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
index 54462516..3f880176 100644
--- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
@@ -61,6 +61,7 @@
     <ClCompile Include="..\..\..\..\src\fxp.c" />
     <ClCompile Include="..\..\..\..\src\hook.c" />
     <ClCompile Include="..\..\..\..\src\hpa.c" />
+    <ClCompile Include="..\..\..\..\src\hpa_central.c" />
     <ClCompile Include="..\..\..\..\src\hpa_hooks.c" />
     <ClCompile Include="..\..\..\..\src\hpa_utils.c" />
     <ClCompile Include="..\..\..\..\src\hpdata.c" />
diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
index 7ce66945..26408c8e 100644
--- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
@@ -67,6 +67,9 @@
     <ClCompile Include="..\..\..\..\src\hpa.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpa_central.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\hpa_hooks.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -206,4 +209,4 @@
       <Filter>Source Files</Filter>
     </ClCompile>
   </ItemGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/src/hpa.c b/src/hpa.c
index 5e3727a1..cc330379 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -8,8 +8,6 @@
 #include "jemalloc/internal/witness.h"
 #include "jemalloc/internal/jemalloc_probe.h"
 
-#define HPA_EDEN_SIZE (128 * HUGEPAGE)
-
 static edata_t *hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
     size_t alignment, bool zero, bool guarded, bool frequent_reuse,
     bool *deferred_work_generated);
@@ -75,119 +73,6 @@ hpa_do_consistency_checks(hpa_shard_t *shard) {
 	assert(shard->base != NULL);
 }
 
-bool
-hpa_central_init(
-    hpa_central_t *central, base_t *base, const hpa_hooks_t *hooks) {
-	/* malloc_conf processing should have filtered out these cases. */
-	assert(hpa_supported());
-	bool err;
-	err = malloc_mutex_init(&central->grow_mtx, "hpa_central_grow",
-	    WITNESS_RANK_HPA_CENTRAL_GROW, malloc_mutex_rank_exclusive);
-	if (err) {
-		return true;
-	}
-
-	central->base = base;
-	central->eden = NULL;
-	central->eden_len = 0;
-	central->hooks = *hooks;
-	return false;
-}
-
-static hpdata_t *
-hpa_alloc_ps(tsdn_t *tsdn, hpa_central_t *central) {
-	return (hpdata_t *)base_alloc(
-	    tsdn, central->base, sizeof(hpdata_t), CACHELINE);
-}
-
-static hpdata_t *
-hpa_central_extract(tsdn_t *tsdn, hpa_central_t *central, size_t size,
-    uint64_t age, bool hugify_eager, bool *oom) {
-	/* Don't yet support big allocations; these should get filtered out. */
-	assert(size <= HUGEPAGE);
-	/*
-	 * Should only try to extract from the central allocator if the local
-	 * shard is exhausted.  We should hold the grow_mtx on that shard.
-	 */
-	witness_assert_positive_depth_to_rank(
-	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_HPA_SHARD_GROW);
-
-	malloc_mutex_lock(tsdn, &central->grow_mtx);
-	*oom = false;
-
-	hpdata_t *ps = NULL;
-	bool      start_as_huge = hugify_eager
-	    || (init_system_thp_mode == system_thp_mode_always
-	        && opt_experimental_hpa_start_huge_if_thp_always);
-
-	/* Is eden a perfect fit? */
-	if (central->eden != NULL && central->eden_len == HUGEPAGE) {
-		ps = hpa_alloc_ps(tsdn, central);
-		if (ps == NULL) {
-			*oom = true;
-			malloc_mutex_unlock(tsdn, &central->grow_mtx);
-			return NULL;
-		}
-		hpdata_init(ps, central->eden, age, start_as_huge);
-		central->eden = NULL;
-		central->eden_len = 0;
-		malloc_mutex_unlock(tsdn, &central->grow_mtx);
-		return ps;
-	}
-
-	/*
-	 * We're about to try to allocate from eden by splitting.  If eden is
-	 * NULL, we have to allocate it too.  Otherwise, we just have to
-	 * allocate an edata_t for the new psset.
-	 */
-	if (central->eden == NULL) {
-		/* Allocate address space, bailing if we fail. */
-		void *new_eden = central->hooks.map(HPA_EDEN_SIZE);
-		if (new_eden == NULL) {
-			*oom = true;
-			malloc_mutex_unlock(tsdn, &central->grow_mtx);
-			return NULL;
-		}
-		if (hugify_eager) {
-			central->hooks.hugify(
-			    new_eden, HPA_EDEN_SIZE, /* sync */ false);
-		}
-		ps = hpa_alloc_ps(tsdn, central);
-		if (ps == NULL) {
-			central->hooks.unmap(new_eden, HPA_EDEN_SIZE);
-			*oom = true;
-			malloc_mutex_unlock(tsdn, &central->grow_mtx);
-			return NULL;
-		}
-		central->eden = new_eden;
-		central->eden_len = HPA_EDEN_SIZE;
-	} else {
-		/* Eden is already nonempty; only need an edata for ps. */
-		ps = hpa_alloc_ps(tsdn, central);
-		if (ps == NULL) {
-			*oom = true;
-			malloc_mutex_unlock(tsdn, &central->grow_mtx);
-			return NULL;
-		}
-	}
-	assert(ps != NULL);
-	assert(central->eden != NULL);
-	assert(central->eden_len > HUGEPAGE);
-	assert(central->eden_len % HUGEPAGE == 0);
-	assert(HUGEPAGE_ADDR2BASE(central->eden) == central->eden);
-
-	hpdata_init(ps, central->eden, age, start_as_huge);
-
-	char *eden_char = (char *)central->eden;
-	eden_char += HUGEPAGE;
-	central->eden = (void *)eden_char;
-	central->eden_len -= HUGEPAGE;
-
-	malloc_mutex_unlock(tsdn, &central->grow_mtx);
-
-	return ps;
-}
-
 bool
 hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap,
     base_t *base, edata_cache_t *edata_cache, unsigned ind,
diff --git a/src/hpa_central.c b/src/hpa_central.c
new file mode 100644
index 00000000..b4f770c2
--- /dev/null
+++ b/src/hpa_central.c
@@ -0,0 +1,121 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/hpa_central.h"
+#include "jemalloc/internal/tsd.h"
+#include "jemalloc/internal/witness.h"
+
+#define HPA_EDEN_SIZE (128 * HUGEPAGE)
+
+bool
+hpa_central_init(
+    hpa_central_t *central, base_t *base, const hpa_hooks_t *hooks) {
+	/* malloc_conf processing should have filtered out these cases. */
+	assert(hpa_supported());
+	bool err;
+	err = malloc_mutex_init(&central->grow_mtx, "hpa_central_grow",
+	    WITNESS_RANK_HPA_CENTRAL_GROW, malloc_mutex_rank_exclusive);
+	if (err) {
+		return true;
+	}
+
+	central->base = base;
+	central->eden = NULL;
+	central->eden_len = 0;
+	central->hooks = *hooks;
+	return false;
+}
+
+static hpdata_t *
+hpa_alloc_ps(tsdn_t *tsdn, hpa_central_t *central) {
+	return (hpdata_t *)base_alloc(
+	    tsdn, central->base, sizeof(hpdata_t), CACHELINE);
+}
+
+hpdata_t *
+hpa_central_extract(tsdn_t *tsdn, hpa_central_t *central, size_t size,
+    uint64_t age, bool hugify_eager, bool *oom) {
+	/* Don't yet support big allocations; these should get filtered out. */
+	assert(size <= HUGEPAGE);
+	/*
+	 * Should only try to extract from the central allocator if the local
+	 * shard is exhausted.  We should hold the grow_mtx on that shard.
+	 */
+	witness_assert_positive_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_HPA_SHARD_GROW);
+
+	malloc_mutex_lock(tsdn, &central->grow_mtx);
+	*oom = false;
+
+	hpdata_t *ps = NULL;
+	bool      start_as_huge = hugify_eager
+	    || (init_system_thp_mode == system_thp_mode_always
+	        && opt_experimental_hpa_start_huge_if_thp_always);
+
+	/* Is eden a perfect fit? */
+	if (central->eden != NULL && central->eden_len == HUGEPAGE) {
+		ps = hpa_alloc_ps(tsdn, central);
+		if (ps == NULL) {
+			*oom = true;
+			malloc_mutex_unlock(tsdn, &central->grow_mtx);
+			return NULL;
+		}
+		hpdata_init(ps, central->eden, age, start_as_huge);
+		central->eden = NULL;
+		central->eden_len = 0;
+		malloc_mutex_unlock(tsdn, &central->grow_mtx);
+		return ps;
+	}
+
+	/*
+	 * We're about to try to allocate from eden by splitting.  If eden is
+	 * NULL, we have to allocate it too.  Otherwise, we just have to
+	 * allocate an edata_t for the new psset.
+	 */
+	if (central->eden == NULL) {
+		/* Allocate address space, bailing if we fail. */
+		void *new_eden = central->hooks.map(HPA_EDEN_SIZE);
+		if (new_eden == NULL) {
+			*oom = true;
+			malloc_mutex_unlock(tsdn, &central->grow_mtx);
+			return NULL;
+		}
+		if (hugify_eager) {
+			central->hooks.hugify(
+			    new_eden, HPA_EDEN_SIZE, /* sync */ false);
+		}
+		ps = hpa_alloc_ps(tsdn, central);
+		if (ps == NULL) {
+			central->hooks.unmap(new_eden, HPA_EDEN_SIZE);
+			*oom = true;
+			malloc_mutex_unlock(tsdn, &central->grow_mtx);
+			return NULL;
+		}
+		central->eden = new_eden;
+		central->eden_len = HPA_EDEN_SIZE;
+	} else {
+		/* Eden is already nonempty; only need an edata for ps. */
+		ps = hpa_alloc_ps(tsdn, central);
+		if (ps == NULL) {
+			*oom = true;
+			malloc_mutex_unlock(tsdn, &central->grow_mtx);
+			return NULL;
+		}
+	}
+	assert(ps != NULL);
+	assert(central->eden != NULL);
+	assert(central->eden_len > HUGEPAGE);
+	assert(central->eden_len % HUGEPAGE == 0);
+	assert(HUGEPAGE_ADDR2BASE(central->eden) == central->eden);
+
+	hpdata_init(ps, central->eden, age, start_as_huge);
+
+	char *eden_char = (char *)central->eden;
+	eden_char += HUGEPAGE;
+	central->eden = (void *)eden_char;
+	central->eden_len -= HUGEPAGE;
+
+	malloc_mutex_unlock(tsdn, &central->grow_mtx);
+
+	return ps;
+}

From 0988583d7cd67cb9a5327c5e326b56d63f89cf16 Mon Sep 17 00:00:00 2001
From: Guangli Dai <gdai@uh.edu>
Date: Tue, 25 Nov 2025 16:26:49 -0800
Subject: [PATCH 348/395] Add a mallctl for users to get an approximate of
 active bytes.

---
 src/ctl.c         |  45 ++++++++++++++++++++
 test/unit/stats.c | 106 +++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 150 insertions(+), 1 deletion(-)

diff --git a/src/ctl.c b/src/ctl.c
index 3e65e23f..553c58ad 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -353,6 +353,7 @@ CTL_PROTO(stats_resident)
 CTL_PROTO(stats_mapped)
 CTL_PROTO(stats_retained)
 CTL_PROTO(stats_zero_reallocs)
+CTL_PROTO(approximate_stats_active)
 CTL_PROTO(experimental_hooks_install)
 CTL_PROTO(experimental_hooks_remove)
 CTL_PROTO(experimental_hooks_prof_backtrace)
@@ -853,6 +854,10 @@ static const ctl_named_node_t stats_mutexes_node[] = {
     {NAME("reset"), CTL(stats_mutexes_reset)}};
 #undef MUTEX_PROF_DATA_NODE
 
+static const ctl_named_node_t approximate_stats_node[] = {
+    {NAME("active"), CTL(approximate_stats_active)},
+};
+
 static const ctl_named_node_t stats_node[] = {
     {NAME("allocated"), CTL(stats_allocated)},
     {NAME("active"), CTL(stats_active)},
@@ -920,6 +925,7 @@ static const ctl_named_node_t root_node[] = {{NAME("version"), CTL(version)},
     {NAME("arena"), CHILD(indexed, arena)},
     {NAME("arenas"), CHILD(named, arenas)}, {NAME("prof"), CHILD(named, prof)},
     {NAME("stats"), CHILD(named, stats)},
+    {NAME("approximate_stats"), CHILD(named, approximate_stats)},
     {NAME("experimental"), CHILD(named, experimental)}};
 static const ctl_named_node_t super_root_node[] = {
     {NAME(""), CHILD(named, root)}};
@@ -3756,6 +3762,45 @@ CTL_RO_CGEN(config_stats, stats_background_thread_run_interval,
 CTL_RO_CGEN(config_stats, stats_zero_reallocs,
     atomic_load_zu(&zero_realloc_count, ATOMIC_RELAXED), size_t)
 
+/*
+ * approximate_stats.active returns a result that is informative itself,
+ * but the returned value SHOULD NOT be compared against other stats retrieved.
+ * For instance, approximate_stats.active should not be compared against
+ * any stats, e.g., stats.active or stats.resident, because there is no
+ * guarantee in the comparison results.  Results returned by stats.*, on the
+ * other hand, provides such guarantees, i.e., stats.active <= stats.resident,
+ * as long as epoch is called right before the queries.
+ */
+
+static int
+approximate_stats_active_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int    ret;
+	size_t approximate_nactive = 0;
+	size_t approximate_active_bytes = 0;
+
+	READONLY();
+
+	tsdn_t  *tsdn = tsd_tsdn(tsd);
+	unsigned n = narenas_total_get();
+
+	for (unsigned i = 0; i < n; i++) {
+		arena_t *arena = arena_get(tsdn, i, false);
+		if (!arena) {
+			continue;
+		}
+		/* Accumulate nactive pages from each arena's pa_shard */
+		approximate_nactive += pa_shard_nactive(&arena->pa_shard);
+	}
+
+	approximate_active_bytes = approximate_nactive << LG_PAGE;
+	READ(approximate_active_bytes, size_t);
+
+	ret = 0;
+label_return:
+	return ret;
+}
+
 CTL_RO_GEN(stats_arenas_i_dss, arenas_i(mib[2])->dss, const char *)
 CTL_RO_GEN(
     stats_arenas_i_dirty_decay_ms, arenas_i(mib[2])->dirty_decay_ms, ssize_t)
diff --git a/test/unit/stats.c b/test/unit/stats.c
index 26516fa8..d2719db2 100644
--- a/test/unit/stats.c
+++ b/test/unit/stats.c
@@ -1,5 +1,7 @@
 #include "test/jemalloc_test.h"
 
+#include "jemalloc/internal/arena_structs.h"
+
 #define STRINGIFY_HELPER(x) #x
 #define STRINGIFY(x) STRINGIFY_HELPER(x)
 
@@ -445,11 +447,113 @@ TEST_BEGIN(test_stats_tcache_bytes_large) {
 }
 TEST_END
 
+TEST_BEGIN(test_approximate_stats_active) {
+	/*
+	 * Test 1: create a manual arena that we exclusively control and use it
+	 * to verify the values returned by pa_shard_nactive() is accurate.
+	 * This also helps verify the correctness of approximate_stats.active
+	 * since it simply sums the pa_shard_nactive() of all arenas.
+	 */
+	tsdn_t  *tsdn = tsdn_fetch();
+	unsigned arena_ind;
+	size_t   sz = sizeof(unsigned);
+	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
+	    0, "Arena creation failed");
+
+	arena_t *arena = arena_get(tsdn, arena_ind, false);
+	expect_ptr_not_null(arena, "Failed to get arena");
+
+	size_t nactive_initial = pa_shard_nactive(&arena->pa_shard);
+
+	/*
+	 * Allocate a small size from this arena.  Use MALLOCX_TCACHE_NONE
+	 * to bypass tcache and ensure the allocation goes directly to the
+	 * arena's pa_shard.
+	 */
+	size_t small_alloc_size = 128;
+	void  *p_small = mallocx(
+            small_alloc_size, MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE);
+	expect_ptr_not_null(p_small, "Unexpected mallocx() failure for small");
+
+	size_t nactive_after_small = pa_shard_nactive(&arena->pa_shard);
+	/*
+	 * For small allocations, jemalloc allocates a slab.  The slab size can
+	 * be looked up via bin_infos[szind].slab_size.  The assertion allows
+	 * for extra overhead from profiling, HPA, or sanitizer guard pages.
+	 */
+	size_t small_usize = nallocx(
+	    small_alloc_size, MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE);
+	szind_t small_szind = sz_size2index(small_usize);
+	size_t  expected_small_pages = bin_infos[small_szind].slab_size / PAGE;
+	expect_zu_ge(nactive_after_small - nactive_initial,
+	    expected_small_pages,
+	    "nactive increase should be at least the slab size in pages");
+
+	/*
+	 * Allocate a large size from this arena.
+	 */
+	size_t large_alloc_size = SC_LARGE_MINCLASS;
+	void  *p_large = mallocx(
+            large_alloc_size, MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE);
+	expect_ptr_not_null(p_large, "Unexpected mallocx() failure for large");
+
+	size_t nactive_after_large = pa_shard_nactive(&arena->pa_shard);
+	/*
+	 * For large allocations, the increase in pa_shard_nactive should be at
+	 * least the allocation size in pages with sz_large_pad considered.
+	 * The assertion allows for extra overhead from profiling, HPA, or
+	 * sanitizer guard pages.
+	 */
+	size_t large_usize = nallocx(
+	    large_alloc_size, MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE);
+	size_t expected_large_pages = (large_usize + sz_large_pad) / PAGE;
+	expect_zu_ge(nactive_after_large - nactive_after_small,
+	    expected_large_pages,
+	    "nactive increase should be at least the large allocation size in pages");
+
+	/*
+	 * Deallocate both allocations and verify nactive returns to the
+	 * original value.
+	 */
+	dallocx(p_small, MALLOCX_TCACHE_NONE);
+	dallocx(p_large, MALLOCX_TCACHE_NONE);
+
+	size_t nactive_final = pa_shard_nactive(&arena->pa_shard);
+	expect_zu_ge(nactive_final - nactive_after_large,
+	    expected_small_pages + expected_large_pages,
+	    "nactive should return to original value after deallocation");
+
+	/*
+	 * Test 2: allocate a large allocation in the auto arena and confirm
+	 * that approximate_stats.active increases.  Since there may be other
+	 * allocs/dallocs going on, cannot make more accurate assertions like
+	 * Test 1.
+	 */
+	size_t approximate_active_before = 0;
+	size_t approximate_active_after = 0;
+	sz = sizeof(size_t);
+	expect_d_eq(mallctl("approximate_stats.active",
+	                (void *)&approximate_active_before, &sz, NULL, 0),
+	    0, "Unexpected mallctl() result");
+
+	void *p0 = mallocx(4 * SC_SMALL_MAXCLASS, MALLOCX_TCACHE_NONE);
+	expect_ptr_not_null(p0, "Unexpected mallocx() failure");
+
+	expect_d_eq(mallctl("approximate_stats.active",
+	                (void *)&approximate_active_after, &sz, NULL, 0),
+	    0, "Unexpected mallctl() result");
+	expect_zu_gt(approximate_active_after, approximate_active_before,
+	    "approximate_stats.active should increase after the allocation");
+
+	free(p0);
+}
+TEST_END
+
 int
 main(void) {
 	return test_no_reentrancy(test_stats_summary, test_stats_large,
 	    test_stats_arenas_summary, test_stats_arenas_small,
 	    test_stats_arenas_large, test_stats_arenas_bins,
 	    test_stats_arenas_lextents, test_stats_tcache_bytes_small,
-	    test_stats_tcache_bytes_large);
+	    test_stats_tcache_bytes_large, test_approximate_stats_active);
 }

From 441e840df77b88c2fb32d07f56483097261c2f5c Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@meta.com>
Date: Fri, 5 Dec 2025 19:45:17 -0500
Subject: [PATCH 349/395] Add a script to generate github actions instead of
 Travis CI and Cirrus

---
 .github/workflows/freebsd-ci.yml |  66 +++
 .github/workflows/linux-ci.yml   | 695 +++++++++++++++++++++++++++++++
 .github/workflows/macos-ci.yml   | 212 ++++++++++
 .github/workflows/windows-ci.yml | 155 +++++++
 scripts/README_GH_ACTIONS.md     | 181 ++++++++
 scripts/gen_gh_actions.py        | 686 ++++++++++++++++++++++++++++++
 6 files changed, 1995 insertions(+)
 create mode 100644 .github/workflows/freebsd-ci.yml
 create mode 100644 .github/workflows/linux-ci.yml
 create mode 100644 .github/workflows/macos-ci.yml
 create mode 100644 .github/workflows/windows-ci.yml
 create mode 100644 scripts/README_GH_ACTIONS.md
 create mode 100755 scripts/gen_gh_actions.py

diff --git a/.github/workflows/freebsd-ci.yml b/.github/workflows/freebsd-ci.yml
new file mode 100644
index 00000000..6c702d88
--- /dev/null
+++ b/.github/workflows/freebsd-ci.yml
@@ -0,0 +1,66 @@
+# This config file is generated by ./scripts/gen_gh_actions.py.
+# Do not edit by hand.
+
+name: FreeBSD CI
+
+on:
+  push:
+    branches: [ dev, ci_travis ]
+  pull_request:
+    branches: [ dev ]
+
+jobs:
+  test-freebsd:
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        debug: ['--enable-debug', '--disable-debug']
+        prof: ['--enable-prof', '--disable-prof']
+        arch: ['64-bit', '32-bit']
+        uncommon:
+          - ''
+          - '--with-lg-page=16 --with-malloc-conf=tcache:false'
+
+    name: FreeBSD (${{ matrix.arch }}, debug=${{ matrix.debug }}, prof=${{ matrix.prof }}${{ matrix.uncommon && ', uncommon' || '' }})
+
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        fetch-depth: 1
+
+    - name: Test on FreeBSD
+      uses: vmactions/freebsd-vm@v1
+      with:
+        release: '15.0'
+        usesh: true
+        prepare: |
+          pkg install -y autoconf gmake
+        run: |
+          # Verify we're running in FreeBSD
+          echo "==== System Information ===="
+          uname -a
+          freebsd-version
+          echo "============================"
+
+          # Set compiler flags for 32-bit if needed
+          if [ "${{ matrix.arch }}" = "32-bit" ]; then
+            export CC="cc -m32"
+            export CXX="c++ -m32"
+          fi
+
+          # Generate configure script
+          autoconf
+
+          # Configure with matrix options
+          ./configure --with-jemalloc-prefix=ci_ ${{ matrix.debug }} ${{ matrix.prof }} ${{ matrix.uncommon }}
+
+          # Get CPU count for parallel builds
+          export JFLAG=$(sysctl -n kern.smp.cpus)
+
+          gmake -j${JFLAG}
+          gmake -j${JFLAG} tests
+          gmake check
+
+
+
diff --git a/.github/workflows/linux-ci.yml b/.github/workflows/linux-ci.yml
new file mode 100644
index 00000000..c5e0c9aa
--- /dev/null
+++ b/.github/workflows/linux-ci.yml
@@ -0,0 +1,695 @@
+# This config file is generated by ./scripts/gen_gh_actions.py.
+# Do not edit by hand.
+
+name: Linux CI
+
+on:
+  push:
+    branches: [ dev, ci_travis ]
+  pull_request:
+    branches: [ dev ]
+
+jobs:
+  test-linux:
+    runs-on: ubuntu-24.04
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - env:
+              CC: gcc
+              CXX: g++
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: clang
+              CXX: clang++
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+          - env:
+              CC: gcc
+              CXX: g++
+              CROSS_COMPILE_32BIT: yes
+              COMPILER_FLAGS: -m32
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --enable-debug
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --enable-prof
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --disable-stats
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --disable-libdl
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --enable-opt-safety-checks
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --with-lg-page=16
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-prof --enable-prof-frameptr"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-malloc-conf=tcache:false"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-malloc-conf=dss:primary"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-malloc-conf=percpu_arena:percpu"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-malloc-conf=background_thread:true"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: clang
+              CXX: clang++
+              CROSS_COMPILE_32BIT: yes
+              COMPILER_FLAGS: -m32
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+          - env:
+              CC: clang
+              CXX: clang++
+              CONFIGURE_FLAGS: --enable-debug
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+          - env:
+              CC: clang
+              CXX: clang++
+              CONFIGURE_FLAGS: --enable-prof
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+          - env:
+              CC: clang
+              CXX: clang++
+              CONFIGURE_FLAGS: --disable-stats
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+          - env:
+              CC: clang
+              CXX: clang++
+              CONFIGURE_FLAGS: --disable-libdl
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+          - env:
+              CC: clang
+              CXX: clang++
+              CONFIGURE_FLAGS: --enable-opt-safety-checks
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+          - env:
+              CC: clang
+              CXX: clang++
+              CONFIGURE_FLAGS: --with-lg-page=16
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+          - env:
+              CC: clang
+              CXX: clang++
+              CONFIGURE_FLAGS: "--enable-prof --enable-prof-frameptr"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+          - env:
+              CC: clang
+              CXX: clang++
+              CONFIGURE_FLAGS: "--with-malloc-conf=tcache:false"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+          - env:
+              CC: clang
+              CXX: clang++
+              CONFIGURE_FLAGS: "--with-malloc-conf=dss:primary"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+          - env:
+              CC: clang
+              CXX: clang++
+              CONFIGURE_FLAGS: "--with-malloc-conf=percpu_arena:percpu"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+          - env:
+              CC: clang
+              CXX: clang++
+              CONFIGURE_FLAGS: "--with-malloc-conf=background_thread:true"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+          - env:
+              CC: gcc
+              CXX: g++
+              CROSS_COMPILE_32BIT: yes
+              COMPILER_FLAGS: -m32
+              CONFIGURE_FLAGS: --enable-debug
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CROSS_COMPILE_32BIT: yes
+              COMPILER_FLAGS: -m32
+              CONFIGURE_FLAGS: --enable-prof
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CROSS_COMPILE_32BIT: yes
+              COMPILER_FLAGS: -m32
+              CONFIGURE_FLAGS: --disable-stats
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CROSS_COMPILE_32BIT: yes
+              COMPILER_FLAGS: -m32
+              CONFIGURE_FLAGS: --disable-libdl
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CROSS_COMPILE_32BIT: yes
+              COMPILER_FLAGS: -m32
+              CONFIGURE_FLAGS: --enable-opt-safety-checks
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CROSS_COMPILE_32BIT: yes
+              COMPILER_FLAGS: -m32
+              CONFIGURE_FLAGS: --with-lg-page=16
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CROSS_COMPILE_32BIT: yes
+              COMPILER_FLAGS: -m32
+              CONFIGURE_FLAGS: "--enable-prof --enable-prof-frameptr"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CROSS_COMPILE_32BIT: yes
+              COMPILER_FLAGS: -m32
+              CONFIGURE_FLAGS: "--with-malloc-conf=tcache:false"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CROSS_COMPILE_32BIT: yes
+              COMPILER_FLAGS: -m32
+              CONFIGURE_FLAGS: "--with-malloc-conf=dss:primary"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CROSS_COMPILE_32BIT: yes
+              COMPILER_FLAGS: -m32
+              CONFIGURE_FLAGS: "--with-malloc-conf=percpu_arena:percpu"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CROSS_COMPILE_32BIT: yes
+              COMPILER_FLAGS: -m32
+              CONFIGURE_FLAGS: "--with-malloc-conf=background_thread:true"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-debug --enable-prof"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-debug --disable-stats"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-debug --disable-libdl"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-debug --enable-opt-safety-checks"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-debug --with-lg-page=16"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-debug --enable-prof --enable-prof-frameptr"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-debug --with-malloc-conf=tcache:false"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-debug --with-malloc-conf=dss:primary"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-debug --with-malloc-conf=percpu_arena:percpu"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-debug --with-malloc-conf=background_thread:true"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-prof --disable-stats"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-prof --disable-libdl"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-prof --enable-opt-safety-checks"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-prof --with-lg-page=16"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-prof --enable-prof --enable-prof-frameptr"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-prof --with-malloc-conf=tcache:false"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-prof --with-malloc-conf=dss:primary"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-prof --with-malloc-conf=percpu_arena:percpu"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-prof --with-malloc-conf=background_thread:true"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--disable-stats --disable-libdl"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--disable-stats --enable-opt-safety-checks"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--disable-stats --with-lg-page=16"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--disable-stats --enable-prof --enable-prof-frameptr"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--disable-stats --with-malloc-conf=tcache:false"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--disable-stats --with-malloc-conf=dss:primary"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--disable-stats --with-malloc-conf=percpu_arena:percpu"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--disable-stats --with-malloc-conf=background_thread:true"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--disable-libdl --enable-opt-safety-checks"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--disable-libdl --with-lg-page=16"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--disable-libdl --enable-prof --enable-prof-frameptr"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--disable-libdl --with-malloc-conf=tcache:false"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--disable-libdl --with-malloc-conf=dss:primary"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--disable-libdl --with-malloc-conf=percpu_arena:percpu"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--disable-libdl --with-malloc-conf=background_thread:true"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-opt-safety-checks --with-lg-page=16"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-opt-safety-checks --enable-prof --enable-prof-frameptr"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-opt-safety-checks --with-malloc-conf=tcache:false"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-opt-safety-checks --with-malloc-conf=dss:primary"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-opt-safety-checks --with-malloc-conf=percpu_arena:percpu"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-opt-safety-checks --with-malloc-conf=background_thread:true"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-lg-page=16 --enable-prof --enable-prof-frameptr"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-lg-page=16 --with-malloc-conf=tcache:false"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-lg-page=16 --with-malloc-conf=dss:primary"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-lg-page=16 --with-malloc-conf=percpu_arena:percpu"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-lg-page=16 --with-malloc-conf=background_thread:true"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-prof --enable-prof-frameptr --with-malloc-conf=tcache:false"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-prof --enable-prof-frameptr --with-malloc-conf=dss:primary"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-prof --enable-prof-frameptr --with-malloc-conf=percpu_arena:percpu"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-prof --enable-prof-frameptr --with-malloc-conf=background_thread:true"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-malloc-conf=tcache:false,dss:primary"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-malloc-conf=tcache:false,percpu_arena:percpu"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-malloc-conf=tcache:false,background_thread:true"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-malloc-conf=dss:primary,percpu_arena:percpu"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-malloc-conf=dss:primary,background_thread:true"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-malloc-conf=percpu_arena:percpu,background_thread:true"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-debug --disable-cache-oblivious --enable-stats --enable-log --enable-prof"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-debug --enable-experimental-smallocx --enable-stats --enable-prof"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Show OS version
+      run: |
+        echo "=== System Information ==="
+        uname -a
+        echo ""
+        echo "=== Architecture ==="
+        uname -m
+        arch
+        echo ""
+        echo "=== OS Release ==="
+        cat /etc/os-release || true
+        echo ""
+        echo "=== CPU Info ==="
+        lscpu | grep -E "Architecture|CPU op-mode|Byte Order|CPU\(s\):" || true
+
+    - name: Install dependencies (32-bit)
+      if: matrix.env.CROSS_COMPILE_32BIT == 'yes'
+      run: |
+        sudo dpkg --add-architecture i386
+        sudo apt-get update
+        sudo apt-get install -y gcc-multilib g++-multilib libc6-dev-i386
+
+    - name: Build and test
+      env:
+        CC: ${{ matrix.env.CC }}
+        CXX: ${{ matrix.env.CXX }}
+        COMPILER_FLAGS: ${{ matrix.env.COMPILER_FLAGS }}
+        CONFIGURE_FLAGS: ${{ matrix.env.CONFIGURE_FLAGS }}
+        EXTRA_CFLAGS: ${{ matrix.env.EXTRA_CFLAGS }}
+      run: |
+        # Verify the script generates the same output
+        ./scripts/gen_gh_actions.py > gh_actions_script.yml
+
+        # Run autoconf
+        autoconf
+
+        # Configure with flags
+        if [ -n "$COMPILER_FLAGS" ]; then
+          ./configure CC="${CC} ${COMPILER_FLAGS}" CXX="${CXX} ${COMPILER_FLAGS}" $CONFIGURE_FLAGS
+        else
+          ./configure $CONFIGURE_FLAGS
+        fi
+
+        # Build
+        make -j3
+        make -j3 tests
+
+        # Run tests
+        make check
+
+
+  test-linux-arm64:
+    runs-on: ubuntu-24.04-arm
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - env:
+              CC: gcc
+              CXX: g++
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: clang
+              CXX: clang++
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --enable-debug
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --enable-prof
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --disable-stats
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --disable-libdl
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --enable-opt-safety-checks
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --with-lg-page=16
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-lg-page=16 --with-lg-hugepage=29"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-prof --enable-prof-frameptr"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-malloc-conf=tcache:false"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-malloc-conf=dss:primary"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-malloc-conf=percpu_arena:percpu"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-malloc-conf=background_thread:true"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Show OS version
+      run: |
+        echo "=== System Information ==="
+        uname -a
+        echo ""
+        echo "=== Architecture ==="
+        uname -m
+        arch
+        echo ""
+        echo "=== OS Release ==="
+        cat /etc/os-release || true
+        echo ""
+        echo "=== CPU Info ==="
+        lscpu | grep -E "Architecture|CPU op-mode|Byte Order|CPU\(s\):" || true
+
+    - name: Install dependencies (32-bit)
+      if: matrix.env.CROSS_COMPILE_32BIT == 'yes'
+      run: |
+        sudo dpkg --add-architecture i386
+        sudo apt-get update
+        sudo apt-get install -y gcc-multilib g++-multilib libc6-dev-i386
+
+    - name: Build and test
+      env:
+        CC: ${{ matrix.env.CC }}
+        CXX: ${{ matrix.env.CXX }}
+        COMPILER_FLAGS: ${{ matrix.env.COMPILER_FLAGS }}
+        CONFIGURE_FLAGS: ${{ matrix.env.CONFIGURE_FLAGS }}
+        EXTRA_CFLAGS: ${{ matrix.env.EXTRA_CFLAGS }}
+      run: |
+        # Verify the script generates the same output
+        ./scripts/gen_gh_actions.py > gh_actions_script.yml
+
+        # Run autoconf
+        autoconf
+
+        # Configure with flags
+        if [ -n "$COMPILER_FLAGS" ]; then
+          ./configure CC="${CC} ${COMPILER_FLAGS}" CXX="${CXX} ${COMPILER_FLAGS}" $CONFIGURE_FLAGS
+        else
+          ./configure $CONFIGURE_FLAGS
+        fi
+
+        # Build
+        make -j3
+        make -j3 tests
+
+        # Run tests
+        make check
+
+
+
diff --git a/.github/workflows/macos-ci.yml b/.github/workflows/macos-ci.yml
new file mode 100644
index 00000000..585551d0
--- /dev/null
+++ b/.github/workflows/macos-ci.yml
@@ -0,0 +1,212 @@
+# This config file is generated by ./scripts/gen_gh_actions.py.
+# Do not edit by hand.
+
+name: macOS CI
+
+on:
+  push:
+    branches: [ dev, ci_travis ]
+  pull_request:
+    branches: [ dev ]
+
+jobs:
+  test-macos:
+    runs-on: macos-15-intel
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - env:
+              CC: gcc
+              CXX: g++
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+          - env:
+              CC: gcc
+              CXX: g++
+              CROSS_COMPILE_32BIT: yes
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --enable-debug
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --disable-stats
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --disable-libdl
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --enable-opt-safety-checks
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --with-lg-page=16
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-malloc-conf=tcache:false"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-malloc-conf=percpu_arena:percpu"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Show OS version
+      run: |
+        echo "=== macOS Version ==="
+        sw_vers
+        echo ""
+        echo "=== Architecture ==="
+        uname -m
+        arch
+        echo ""
+        echo "=== CPU Info ==="
+        sysctl -n machdep.cpu.brand_string
+        sysctl -n hw.machine
+
+    - name: Install dependencies
+      run: |
+        brew install autoconf
+
+    - name: Build and test
+      env:
+        CC: ${{ matrix.env.CC || 'gcc' }}
+        CXX: ${{ matrix.env.CXX || 'g++' }}
+        COMPILER_FLAGS: ${{ matrix.env.COMPILER_FLAGS }}
+        CONFIGURE_FLAGS: ${{ matrix.env.CONFIGURE_FLAGS }}
+        EXTRA_CFLAGS: ${{ matrix.env.EXTRA_CFLAGS }}
+      run: |
+        # Run autoconf
+        autoconf
+
+        # Configure with flags
+        if [ -n "$COMPILER_FLAGS" ]; then
+          ./configure CC="${CC} ${COMPILER_FLAGS}" CXX="${CXX} ${COMPILER_FLAGS}" $CONFIGURE_FLAGS
+        else
+          ./configure $CONFIGURE_FLAGS
+        fi
+
+        # Build
+        make -j3
+        make -j3 tests
+
+        # Run tests
+        make check
+
+
+  test-macos-arm64:
+    runs-on: macos-15
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - env:
+              CC: gcc
+              CXX: g++
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+          - env:
+              CC: gcc
+              CXX: g++
+              CROSS_COMPILE_32BIT: yes
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --enable-debug
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --disable-stats
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --disable-libdl
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --enable-opt-safety-checks
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --with-lg-page=16
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-lg-page=16 --with-lg-hugepage=29"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-malloc-conf=tcache:false"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-malloc-conf=percpu_arena:percpu"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Show OS version
+      run: |
+        echo "=== macOS Version ==="
+        sw_vers
+        echo ""
+        echo "=== Architecture ==="
+        uname -m
+        arch
+        echo ""
+        echo "=== CPU Info ==="
+        sysctl -n machdep.cpu.brand_string
+        sysctl -n hw.machine
+
+    - name: Install dependencies
+      run: |
+        brew install autoconf
+
+    - name: Build and test
+      env:
+        CC: ${{ matrix.env.CC || 'gcc' }}
+        CXX: ${{ matrix.env.CXX || 'g++' }}
+        COMPILER_FLAGS: ${{ matrix.env.COMPILER_FLAGS }}
+        CONFIGURE_FLAGS: ${{ matrix.env.CONFIGURE_FLAGS }}
+        EXTRA_CFLAGS: ${{ matrix.env.EXTRA_CFLAGS }}
+      run: |
+        # Run autoconf
+        autoconf
+
+        # Configure with flags
+        if [ -n "$COMPILER_FLAGS" ]; then
+          ./configure CC="${CC} ${COMPILER_FLAGS}" CXX="${CXX} ${COMPILER_FLAGS}" $CONFIGURE_FLAGS
+        else
+          ./configure $CONFIGURE_FLAGS
+        fi
+
+        # Build
+        make -j3
+        make -j3 tests
+
+        # Run tests
+        make check
+
+
+
diff --git a/.github/workflows/windows-ci.yml b/.github/workflows/windows-ci.yml
new file mode 100644
index 00000000..f40ba086
--- /dev/null
+++ b/.github/workflows/windows-ci.yml
@@ -0,0 +1,155 @@
+# This config file is generated by ./scripts/gen_gh_actions.py.
+# Do not edit by hand.
+
+name: Windows CI
+
+on:
+  push:
+    branches: [ dev, ci_travis ]
+  pull_request:
+    branches: [ dev ]
+
+jobs:
+  test-windows:
+    runs-on: windows-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - env:
+              CC: gcc
+              CXX: g++
+              EXTRA_CFLAGS: -fcommon
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --enable-debug
+              EXTRA_CFLAGS: -fcommon
+          - env:
+              CC: cl.exe
+              CXX: cl.exe
+          - env:
+              CC: gcc
+              CXX: g++
+              CROSS_COMPILE_32BIT: yes
+              EXTRA_CFLAGS: -fcommon
+          - env:
+              CC: cl.exe
+              CXX: cl.exe
+              CONFIGURE_FLAGS: --enable-debug
+          - env:
+              CC: gcc
+              CXX: g++
+              CROSS_COMPILE_32BIT: yes
+              CONFIGURE_FLAGS: --enable-debug
+              EXTRA_CFLAGS: -fcommon
+          - env:
+              CC: cl.exe
+              CXX: cl.exe
+              CROSS_COMPILE_32BIT: yes
+          - env:
+              CC: cl.exe
+              CXX: cl.exe
+              CROSS_COMPILE_32BIT: yes
+              CONFIGURE_FLAGS: --enable-debug
+
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Show OS version
+      shell: cmd
+      run: |
+        echo === Windows Version ===
+        systeminfo | findstr /B /C:"OS Name" /C:"OS Version"
+        ver
+        echo.
+        echo === Architecture ===
+        echo PROCESSOR_ARCHITECTURE=%PROCESSOR_ARCHITECTURE%
+        echo.
+
+    - name: Setup MSYS2
+      uses: msys2/setup-msys2@v2
+      with:
+        msystem: ${{ matrix.env.CROSS_COMPILE_32BIT == 'yes' && 'MINGW32' || 'MINGW64' }}
+        update: true
+        install: >-
+          autotools
+          git
+        pacboy: >-
+          make:p
+          gcc:p
+          binutils:p
+
+    - name: Build and test (MinGW-GCC)
+      if: matrix.env.CC != 'cl.exe'
+      shell: msys2 {0}
+      env:
+        CC: ${{ matrix.env.CC || 'gcc' }}
+        CXX: ${{ matrix.env.CXX || 'g++' }}
+        COMPILER_FLAGS: ${{ matrix.env.COMPILER_FLAGS }}
+        CONFIGURE_FLAGS: ${{ matrix.env.CONFIGURE_FLAGS }}
+        EXTRA_CFLAGS: ${{ matrix.env.EXTRA_CFLAGS }}
+      run: |
+        # Run autoconf
+        autoconf
+
+        # Configure with flags
+        if [ -n "$COMPILER_FLAGS" ]; then
+          ./configure CC="${CC} ${COMPILER_FLAGS}" CXX="${CXX} ${COMPILER_FLAGS}" $CONFIGURE_FLAGS
+        else
+          ./configure $CONFIGURE_FLAGS
+        fi
+
+        # Build (mingw32-make is the "make" command in MSYS2)
+        mingw32-make -j3
+        mingw32-make tests
+
+        # Run tests
+        mingw32-make -k check
+
+    - name: Setup MSVC environment
+      if: matrix.env.CC == 'cl.exe'
+      uses: ilammy/msvc-dev-cmd@v1
+      with:
+        arch: ${{ matrix.env.CROSS_COMPILE_32BIT == 'yes' && 'x86' || 'x64' }}
+
+    - name: Build and test (MSVC)
+      if: matrix.env.CC == 'cl.exe'
+      shell: msys2 {0}
+      env:
+        CONFIGURE_FLAGS: ${{ matrix.env.CONFIGURE_FLAGS }}
+        MSYS2_PATH_TYPE: inherit
+      run: |
+        # Export MSVC environment variables for configure
+        export CC=cl.exe
+        export CXX=cl.exe
+        export AR=lib.exe
+        export NM=dumpbin.exe
+        export RANLIB=:
+
+        # Verify cl.exe is accessible (should be in PATH via inherit)
+        if ! which cl.exe > /dev/null 2>&1; then
+          echo "cl.exe not found, trying to locate MSVC..."
+          # Find and add MSVC bin directory to PATH
+          MSVC_BIN=$(cmd.exe /c "echo %VCToolsInstallDir%" | tr -d '\\r' | sed 's/\\\\\\\\/\//g' | sed 's/C:/\\/c/g')
+          if [ -n "$MSVC_BIN" ]; then
+            export PATH="$PATH:$MSVC_BIN/bin/Hostx64/x64:$MSVC_BIN/bin/Hostx86/x86"
+          fi
+        fi
+
+        # Run autoconf
+        autoconf
+
+        # Configure with MSVC
+        ./configure CC=cl.exe CXX=cl.exe AR=lib.exe $CONFIGURE_FLAGS
+
+        # Build (mingw32-make is the "make" command in MSYS2)
+        mingw32-make -j3
+        # Build tests sequentially due to PDB file issues
+        mingw32-make tests
+
+        # Run tests
+        mingw32-make -k check
+
+
+
diff --git a/scripts/README_GH_ACTIONS.md b/scripts/README_GH_ACTIONS.md
new file mode 100644
index 00000000..1cb236ad
--- /dev/null
+++ b/scripts/README_GH_ACTIONS.md
@@ -0,0 +1,181 @@
+# GitHub Actions Workflow Generator
+
+This directory contains `gen_gh_actions.py`, a script to generate GitHub Actions CI workflows from the same configuration logic used for Travis CI.
+
+## Usage
+
+The script can generate workflows for different platforms:
+
+```bash
+# Generate Linux CI workflow (default)
+./scripts/gen_gh_actions.py linux > .github/workflows/linux-ci.yml
+
+# Generate macOS CI workflow
+./scripts/gen_gh_actions.py macos > .github/workflows/macos-ci.yml
+
+# Generate Windows CI workflow
+./scripts/gen_gh_actions.py windows > .github/workflows/windows-ci.yml
+
+# Generate FreeBSD CI workflow
+./scripts/gen_gh_actions.py freebsd > .github/workflows/freebsd-ci.yml
+
+# Generate combined workflow with all platforms
+./scripts/gen_gh_actions.py all > .github/workflows/ci-all.yml
+```
+
+## Generated Workflows
+
+### Linux CI (`linux-ci.yml`)
+- **test-linux** (AMD64): `ubuntu-latest` (x86_64)
+  - ~96 configurations covering GCC, Clang, various flags
+- **test-linux-arm64** (ARM64): `ubuntu-24.04-arm` (aarch64)
+  - ~14 configurations including large hugepage tests
+  - **Note:** Free ARM64 runners (Public Preview) - may have longer queue times during peak hours
+
+**Total:** 110 configurations
+
+### macOS CI (`macos-ci.yml`)
+- **test-macos** (Intel): `macos-15-intel` (x86_64)
+  - ~10 configurations with GCC compiler
+- **test-macos-arm64** (Apple Silicon): `macos-latest` (arm64)
+  - ~11 configurations including large hugepage tests
+
+**Total:** 21 configurations
+
+### Windows CI (`windows-ci.yml`)
+- **test-windows** (AMD64): `windows-latest` (x86_64)
+  - 10 configurations covering MinGW-GCC and MSVC compilers
+  - 32-bit and 64-bit builds
+  - Uses MSYS2 for build environment
+
+**Total:** 10 configurations
+
+### FreeBSD CI (`freebsd-ci.yml`)
+- **test-freebsd** (AMD64): Runs in FreeBSD VM on `ubuntu-latest`
+  - Matrix testing: debug (on/off), prof (on/off), arch (32/64-bit), uncommon configs
+  - 16 total configuration combinations
+  - Uses FreeBSD 15.0 via `vmactions/freebsd-vm@v1`
+  - Uses `gmake` (GNU Make) instead of BSD make
+
+**Total:** 16 configurations
+
+## Architecture Verification
+
+Each workflow includes a "Show OS version" step that prints:
+
+**Linux:**
+```bash
+=== System Information ===
+uname -a              # Kernel and architecture
+=== Architecture ===
+uname -m              # x86_64, aarch64, etc.
+arch                 # Architecture type
+=== CPU Info ===
+lscpu                # Detailed CPU information
+```
+
+**macOS:**
+```bash
+=== macOS Version ===
+sw_vers              # macOS version and build
+=== Architecture ===
+uname -m             # x86_64 or arm64
+arch                # i386 or arm64
+=== CPU Info ===
+sysctl machdep.cpu.brand_string  # CPU model
+```
+
+**Windows:**
+```cmd
+=== Windows Version ===
+systeminfo           # OS name and version
+ver                 # Windows version
+=== Architecture ===
+PROCESSOR_ARCHITECTURE  # AMD64, x86, ARM64
+```
+
+## GitHub Runner Images
+
+| Platform | Runner Label | Architecture | OS Version | Strategy |
+|----------|--------------|--------------|------------|----------|
+| Linux AMD64 | ubuntu-latest | x86_64 | Ubuntu 22.04+ | Auto-update |
+| Linux ARM64 | ubuntu-24.04-arm | aarch64 | Ubuntu 24.04 | Free (Public Preview) |
+| macOS Intel | macos-15-intel | x86_64 | macOS 15 Sequoia | Pinned |
+| macOS Apple Silicon | macos-15 | arm64 | macOS 15 Sequoia | Pinned |
+| Windows | windows-latest | x86_64 | Windows Server 2022+ | Auto-update |
+| FreeBSD | ubuntu-latest (VM) | x86_64 | FreeBSD 15.0 in VM | VM-based |
+
+### Runner Strategy Explained
+
+We use a **hybrid approach** to balance stability and maintenance:
+
+**Auto-update runners (`-latest`):**
+- **Linux AMD64**: `ubuntu-latest` - Very stable, rarely breaks, auto-updates to newest Ubuntu LTS
+- **Windows**: `windows-latest` - Backward compatible, auto-updates to newest Windows Server
+
+**Pinned runners (specific versions):**
+- **Linux ARM64**: `ubuntu-24.04-arm` - **Free for public repos** (Public Preview, may have queue delays)
+- **macOS Intel**: `macos-15-intel` - Last Intel macOS runner (EOL **August 2027**)
+- **macOS Apple Silicon**: `macos-15` - Pin for control over macOS upgrades
+
+**Why this approach?**
+-  Reduces maintenance (auto-update where safe)
+-  Prevents surprise breakages (pin where needed)
+-  Balances stability and staying current
+-  Uses free ARM64 runners for public repositories
+
+### ARM64 Queue Times
+
+**If you experience long waits for ARM64 jobs:**
+
+The `ubuntu-24.04-arm` runner is **free for public repositories** but is in **Public Preview**. GitHub warns: *"you may experience longer queue times during peak usage hours"*.
+
+To reduce wait times we should upgrade to Team/Enterprise plan - then we could use `ubuntu-24.04-arm64` for faster, paid runners
+
+### Important Deprecation Timeline
+
+| Date | Event | Action Required |
+|------|-------|------------------|
+| **August 2027** | macOS Intel runners removed | Must drop Intel macOS testing or use self-hosted |
+| **TBD** | ARM64 runners leave Public Preview | May see improved queue times |
+
+**Note:** `macos-15-intel` is the **last Intel-based macOS runner** from GitHub Actions. After August 2027, only Apple Silicon runners will be available.
+
+## Platform-Specific Details
+
+### Windows Build Process
+The Windows workflow uses:
+1. **MSYS2** setup via `msys2/setup-msys2@v2` action
+2. **MinGW-GCC**: Standard autotools build process in MSYS2 shell
+3. **MSVC (cl.exe)**: Requires `ilammy/msvc-dev-cmd@v1` for environment setup
+   - Uses `MSYS2_PATH_TYPE: inherit` to inherit Windows PATH
+   - Exports `AR=lib.exe`, `NM=dumpbin.exe`, `RANLIB=:`
+4. **mingw32-make**: Used instead of `make` (standard in MSYS2)
+
+### macOS Build Process
+- Uses Homebrew to install `autoconf`
+- Tests on both Intel (x86_64) and Apple Silicon (ARM64)
+- Standard autotools build process
+- Excludes certain malloc configurations not supported on macOS
+
+### Linux Build Process
+- Ubuntu Latest for AMD64, Ubuntu 24.04 for ARM64
+- Installs 32-bit cross-compilation dependencies when needed
+- Most comprehensive test matrix (110 configurations)
+
+## Relationship to Travis CI
+
+This script mirrors the logic from `gen_travis.py` but generates GitHub Actions workflows instead of `.travis.yml`. The test matrices are designed to provide equivalent coverage to the Travis CI configuration.
+
+## Regenerating Workflows
+
+To regenerate all workflows after modifying `gen_gh_actions.py`:
+
+```bash
+./scripts/gen_gh_actions.py linux > .github/workflows/linux-ci.yml
+./scripts/gen_gh_actions.py macos > .github/workflows/macos-ci.yml
+./scripts/gen_gh_actions.py windows > .github/workflows/windows-ci.yml
+```
+
+**Note**: The generated files should not be edited by hand. All changes should be made to `gen_gh_actions.py` and then regenerated.
+
diff --git a/scripts/gen_gh_actions.py b/scripts/gen_gh_actions.py
new file mode 100755
index 00000000..4c5474ab
--- /dev/null
+++ b/scripts/gen_gh_actions.py
@@ -0,0 +1,686 @@
+#!/usr/bin/env python3
+
+from itertools import combinations, chain
+from enum import Enum, auto
+
+
+LINUX = 'ubuntu-24.04'
+OSX = 'macos-latest'
+WINDOWS = 'windows-latest'
+FREEBSD = 'freebsd'
+
+AMD64 = 'amd64'
+ARM64 = 'arm64'
+PPC64LE = 'ppc64le'
+
+
+GITHUB_ACTIONS_TEMPLATE = """\
+# This config file is generated by ./scripts/gen_gh_actions.py.
+# Do not edit by hand.
+
+name: {name}
+
+on:
+  push:
+    branches: [ dev, ci_travis ]
+  pull_request:
+    branches: [ dev ]
+
+jobs:
+{jobs}
+"""
+
+
+class Option(object):
+    class Type:
+        COMPILER = auto()
+        COMPILER_FLAG = auto()
+        CONFIGURE_FLAG = auto()
+        MALLOC_CONF = auto()
+        FEATURE = auto()
+
+    def __init__(self, type, value):
+        self.type = type
+        self.value = value
+
+    @staticmethod
+    def as_compiler(value):
+        return Option(Option.Type.COMPILER, value)
+
+    @staticmethod
+    def as_compiler_flag(value):
+        return Option(Option.Type.COMPILER_FLAG, value)
+
+    @staticmethod
+    def as_configure_flag(value):
+        return Option(Option.Type.CONFIGURE_FLAG, value)
+
+    @staticmethod
+    def as_malloc_conf(value):
+        return Option(Option.Type.MALLOC_CONF, value)
+
+    @staticmethod
+    def as_feature(value):
+        return Option(Option.Type.FEATURE, value)
+
+    def __eq__(self, obj):
+        return (isinstance(obj, Option) and obj.type == self.type
+                and obj.value == self.value)
+
+    def __repr__(self):
+        type_names = {
+            Option.Type.COMPILER: 'COMPILER',
+            Option.Type.COMPILER_FLAG: 'COMPILER_FLAG',
+            Option.Type.CONFIGURE_FLAG: 'CONFIGURE_FLAG',
+            Option.Type.MALLOC_CONF: 'MALLOC_CONF',
+            Option.Type.FEATURE: 'FEATURE'
+        }
+        return f"Option({type_names[self.type]}, {repr(self.value)})"
+
+
+# The 'default' configuration is gcc, on linux, with no compiler or configure
+# flags.  We also test with clang, -m32, --enable-debug, --enable-prof,
+# --disable-stats, and --with-malloc-conf=tcache:false.  To avoid abusing
+# CI resources though, we don't test all 2**7 = 128 possible combinations of these;
+# instead, we only test combinations of up to 2 'unusual' settings, under the
+# hope that bugs involving interactions of such settings are rare.
+MAX_UNUSUAL_OPTIONS = 2
+
+
+GCC = Option.as_compiler('CC=gcc CXX=g++')
+CLANG = Option.as_compiler('CC=clang CXX=clang++')
+CL = Option.as_compiler('CC=cl.exe CXX=cl.exe')
+
+
+compilers_unusual = [CLANG,]
+
+
+CROSS_COMPILE_32BIT = Option.as_feature('CROSS_COMPILE_32BIT')
+feature_unusuals = [CROSS_COMPILE_32BIT]
+
+
+configure_flag_unusuals = [Option.as_configure_flag(opt) for opt in (
+    '--enable-debug',
+    '--enable-prof',
+    '--disable-stats',
+    '--disable-libdl',
+    '--enable-opt-safety-checks',
+    '--with-lg-page=16',
+    '--with-lg-page=16 --with-lg-hugepage=29',
+)]
+LARGE_HUGEPAGE = Option.as_configure_flag("--with-lg-page=16 --with-lg-hugepage=29")
+
+
+malloc_conf_unusuals = [Option.as_malloc_conf(opt) for opt in (
+    'tcache:false',
+    'dss:primary',
+    'percpu_arena:percpu',
+    'background_thread:true',
+)]
+
+
+all_unusuals = (compilers_unusual + feature_unusuals
+    + configure_flag_unusuals + malloc_conf_unusuals)
+
+
+def get_extra_cflags(os, compiler):
+    if os == WINDOWS:
+        # For non-CL compilers under Windows (for now it's only MinGW-GCC),
+        # -fcommon needs to be specified to correctly handle multiple
+        # 'malloc_conf' symbols and such, which are declared weak under Linux.
+        # Weak symbols don't work with MinGW-GCC.
+        if compiler != CL.value:
+            return ['-fcommon']
+        else:
+            return []
+
+    # We get some spurious errors when -Warray-bounds is enabled.
+    extra_cflags = ['-Werror', '-Wno-array-bounds']
+    if compiler == CLANG.value or os == OSX:
+        extra_cflags += [
+            '-Wno-unknown-warning-option',
+            '-Wno-ignored-attributes'
+        ]
+    if os == OSX:
+        extra_cflags += [
+            '-Wno-deprecated-declarations',
+        ]
+    return extra_cflags
+
+
+def format_env_dict(os, arch, combination):
+    """Format environment variables as a dictionary for the matrix."""
+    compilers = [x.value for x in combination if x.type == Option.Type.COMPILER]
+    compiler_flags = [x.value for x in combination if x.type == Option.Type.COMPILER_FLAG]
+    configure_flags = [x.value for x in combination if x.type == Option.Type.CONFIGURE_FLAG]
+    malloc_conf = [x.value for x in combination if x.type == Option.Type.MALLOC_CONF]
+    features = [x.value for x in combination if x.type == Option.Type.FEATURE]
+
+    if len(malloc_conf) > 0:
+        configure_flags.append('--with-malloc-conf=' + ','.join(malloc_conf))
+
+    if not compilers:
+        compiler = GCC.value
+    else:
+        compiler = compilers[0]
+
+    cross_compile = CROSS_COMPILE_32BIT.value in features
+    if os == LINUX and cross_compile:
+        compiler_flags.append('-m32')
+
+    env_dict = {}
+
+    # Parse compiler
+    cc_parts = compiler.split()
+    for part in cc_parts:
+        if part.startswith('CC='):
+            env_dict['CC'] = part.split('=')[1]
+        elif part.startswith('CXX='):
+            env_dict['CXX'] = part.split('=')[1]
+
+    # Add features
+    for feature in features:
+        env_dict[feature] = 'yes'
+
+    # Add flags
+    if compiler_flags:
+        env_dict['COMPILER_FLAGS'] = ' '.join(compiler_flags)
+    if configure_flags:
+        env_dict['CONFIGURE_FLAGS'] = ' '.join(configure_flags)
+
+    extra_cflags = get_extra_cflags(os, compiler)
+    if extra_cflags:
+        env_dict['EXTRA_CFLAGS'] = ' '.join(extra_cflags)
+
+    return env_dict
+
+
+def generate_job_matrix_entries(os, arch, exclude, max_unusual_opts, unusuals=all_unusuals):
+    """Generate matrix entries for a job."""
+    entries = []
+    for combination in chain.from_iterable(
+            [combinations(unusuals, i) for i in range(max_unusual_opts + 1)]):
+        if not any(excluded in combination for excluded in exclude):
+            env_dict = format_env_dict(os, arch, combination)
+            entries.append(env_dict)
+    return entries
+
+
+def generate_linux_job(arch):
+    """Generate Linux job configuration."""
+    os = LINUX
+
+    # Only generate 2 unusual options for AMD64 to reduce matrix size
+    max_unusual_opts = MAX_UNUSUAL_OPTIONS if arch == AMD64 else 1
+
+    exclude = []
+    if arch == PPC64LE:
+        # Avoid 32 bit builds and clang on PowerPC
+        exclude = (CROSS_COMPILE_32BIT, CLANG,)
+    if arch == ARM64:
+        # Avoid 32 bit build on ARM64
+        exclude = (CROSS_COMPILE_32BIT,)
+
+    if arch != ARM64:
+        exclude += [LARGE_HUGEPAGE]
+
+    linux_configure_flags = list(configure_flag_unusuals)
+    linux_configure_flags.append(Option.as_configure_flag("--enable-prof --enable-prof-frameptr"))
+
+    linux_unusuals = (compilers_unusual + feature_unusuals
+                    + linux_configure_flags + malloc_conf_unusuals)
+
+    matrix_entries = generate_job_matrix_entries(os, arch, exclude, max_unusual_opts, linux_unusuals)
+
+    arch_suffix = f"-{arch}" if arch != AMD64 else ""
+
+    # Select appropriate runner based on architecture
+    if arch == ARM64:
+        runner = "ubuntu-24.04-arm"    # Free ARM64 runner for public repos (Public Preview)
+    elif arch == PPC64LE:
+        # GitHub doesn't provide PPC runners, would need self-hosted
+        runner = "self-hosted-ppc64le"
+    else:  # AMD64
+        runner = "ubuntu-24.04"        # Ubuntu 24.04 LTS
+
+    job = f"""  test-linux{arch_suffix}:
+    runs-on: {runner}
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+"""
+
+    for entry in matrix_entries:
+        job += "          - env:\n"
+        for key, value in entry.items():
+            # Properly escape values with special characters
+            if ' ' in str(value) or any(c in str(value) for c in [':', ',', '#']):
+                job += f'              {key}: "{value}"\n'
+            else:
+                job += f"              {key}: {value}\n"
+
+    # Add manual job entries
+    manual_entries = [
+        {
+            'CC': 'gcc',
+            'CXX': 'g++',
+            'CONFIGURE_FLAGS': '--enable-debug --disable-cache-oblivious --enable-stats --enable-log --enable-prof',
+            'EXTRA_CFLAGS': '-Werror -Wno-array-bounds'
+        },
+        {
+            'CC': 'gcc',
+            'CXX': 'g++',
+            'CONFIGURE_FLAGS': '--enable-debug --enable-experimental-smallocx --enable-stats --enable-prof',
+            'EXTRA_CFLAGS': '-Werror -Wno-array-bounds'
+        }
+    ]
+
+    if arch == AMD64:
+        for entry in manual_entries:
+            job += "          - env:\n"
+            for key, value in entry.items():
+                if ' ' in str(value):
+                    job += f'              {key}: "{value}"\n'
+                else:
+                    job += f"              {key}: {value}\n"
+
+    job += f"""
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Show OS version
+      run: |
+        echo "=== System Information ==="
+        uname -a
+        echo ""
+        echo "=== Architecture ==="
+        uname -m
+        arch
+        echo ""
+        echo "=== OS Release ==="
+        cat /etc/os-release || true
+        echo ""
+        echo "=== CPU Info ==="
+        lscpu | grep -E "Architecture|CPU op-mode|Byte Order|CPU\(s\):" || true
+
+    - name: Install dependencies (32-bit)
+      if: matrix.env.CROSS_COMPILE_32BIT == 'yes'
+      run: |
+        sudo dpkg --add-architecture i386
+        sudo apt-get update
+        sudo apt-get install -y gcc-multilib g++-multilib libc6-dev-i386
+
+    - name: Build and test
+      env:
+        CC: ${{{{ matrix.env.CC }}}}
+        CXX: ${{{{ matrix.env.CXX }}}}
+        COMPILER_FLAGS: ${{{{ matrix.env.COMPILER_FLAGS }}}}
+        CONFIGURE_FLAGS: ${{{{ matrix.env.CONFIGURE_FLAGS }}}}
+        EXTRA_CFLAGS: ${{{{ matrix.env.EXTRA_CFLAGS }}}}
+      run: |
+        # Verify the script generates the same output
+        ./scripts/gen_gh_actions.py > gh_actions_script.yml
+
+        # Run autoconf
+        autoconf
+
+        # Configure with flags
+        if [ -n "$COMPILER_FLAGS" ]; then
+          ./configure CC="${{CC}} ${{COMPILER_FLAGS}}" CXX="${{CXX}} ${{COMPILER_FLAGS}}" $CONFIGURE_FLAGS
+        else
+          ./configure $CONFIGURE_FLAGS
+        fi
+
+        # Build
+        make -j3
+        make -j3 tests
+
+        # Run tests
+        make check
+
+"""
+
+    return job
+
+
+def generate_macos_job(arch):
+    """Generate macOS job configuration."""
+    os = OSX
+    max_unusual_opts = 1
+
+    exclude = ([Option.as_malloc_conf(opt) for opt in (
+            'dss:primary',
+            'background_thread:true')] +
+        [Option.as_configure_flag('--enable-prof')] +
+        [CLANG,])
+
+    if arch != ARM64:
+        exclude += [LARGE_HUGEPAGE]
+
+    matrix_entries = generate_job_matrix_entries(os, arch, exclude, max_unusual_opts)
+
+    arch_suffix = f"-{arch}" if arch != AMD64 else ""
+
+    # Select appropriate runner based on architecture
+    # Pin both for more control over OS upgrades
+    if arch == ARM64:
+        runner = "macos-15"          # Pinned macOS 15 on Apple Silicon
+    else:  # AMD64
+        runner = "macos-15-intel"    # Pinned macOS 15 on Intel (last Intel runner, EOL Aug 2027)
+
+    job = f"""  test-macos{arch_suffix}:
+    runs-on: {runner}
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+"""
+
+    for entry in matrix_entries:
+        job += "          - env:\n"
+        for key, value in entry.items():
+            if ' ' in str(value) or any(c in str(value) for c in [':', ',', '#']):
+                job += f'              {key}: "{value}"\n'
+            else:
+                job += f"              {key}: {value}\n"
+
+    job += f"""
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Show OS version
+      run: |
+        echo "=== macOS Version ==="
+        sw_vers
+        echo ""
+        echo "=== Architecture ==="
+        uname -m
+        arch
+        echo ""
+        echo "=== CPU Info ==="
+        sysctl -n machdep.cpu.brand_string
+        sysctl -n hw.machine
+
+    - name: Install dependencies
+      run: |
+        brew install autoconf
+
+    - name: Build and test
+      env:
+        CC: ${{{{ matrix.env.CC || 'gcc' }}}}
+        CXX: ${{{{ matrix.env.CXX || 'g++' }}}}
+        COMPILER_FLAGS: ${{{{ matrix.env.COMPILER_FLAGS }}}}
+        CONFIGURE_FLAGS: ${{{{ matrix.env.CONFIGURE_FLAGS }}}}
+        EXTRA_CFLAGS: ${{{{ matrix.env.EXTRA_CFLAGS }}}}
+      run: |
+        # Run autoconf
+        autoconf
+
+        # Configure with flags
+        if [ -n "$COMPILER_FLAGS" ]; then
+          ./configure CC="${{CC}} ${{COMPILER_FLAGS}}" CXX="${{CXX}} ${{COMPILER_FLAGS}}" $CONFIGURE_FLAGS
+        else
+          ./configure $CONFIGURE_FLAGS
+        fi
+
+        # Build
+        make -j3
+        make -j3 tests
+
+        # Run tests
+        make check
+
+"""
+
+    return job
+
+
+def generate_windows_job(arch):
+    """Generate Windows job configuration."""
+    os = WINDOWS
+    max_unusual_opts = 3
+    unusuals = (
+        Option.as_configure_flag('--enable-debug'),
+        CL,
+        CROSS_COMPILE_32BIT,
+    )
+
+    matrix_entries = generate_job_matrix_entries(os, arch, (), max_unusual_opts, unusuals)
+
+    arch_suffix = f"-{arch}" if arch != AMD64 else ""
+
+    # Use latest for Windows - tends to be backward compatible and stable
+    job = f"""  test-windows{arch_suffix}:
+    runs-on: windows-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+"""
+
+    for entry in matrix_entries:
+        job += "          - env:\n"
+        for key, value in entry.items():
+            if ' ' in str(value) or any(c in str(value) for c in [':', ',', '#']):
+                job += f'              {key}: "{value}"\n'
+            else:
+                job += f"              {key}: {value}\n"
+
+    job += f"""
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Show OS version
+      shell: cmd
+      run: |
+        echo === Windows Version ===
+        systeminfo | findstr /B /C:"OS Name" /C:"OS Version"
+        ver
+        echo.
+        echo === Architecture ===
+        echo PROCESSOR_ARCHITECTURE=%PROCESSOR_ARCHITECTURE%
+        echo.
+
+    - name: Setup MSYS2
+      uses: msys2/setup-msys2@v2
+      with:
+        msystem: ${{{{ matrix.env.CROSS_COMPILE_32BIT == 'yes' && 'MINGW32' || 'MINGW64' }}}}
+        update: true
+        install: >-
+          autotools
+          git
+        pacboy: >-
+          make:p
+          gcc:p
+          binutils:p
+
+    - name: Build and test (MinGW-GCC)
+      if: matrix.env.CC != 'cl.exe'
+      shell: msys2 {{0}}
+      env:
+        CC: ${{{{ matrix.env.CC || 'gcc' }}}}
+        CXX: ${{{{ matrix.env.CXX || 'g++' }}}}
+        COMPILER_FLAGS: ${{{{ matrix.env.COMPILER_FLAGS }}}}
+        CONFIGURE_FLAGS: ${{{{ matrix.env.CONFIGURE_FLAGS }}}}
+        EXTRA_CFLAGS: ${{{{ matrix.env.EXTRA_CFLAGS }}}}
+      run: |
+        # Run autoconf
+        autoconf
+
+        # Configure with flags
+        if [ -n "$COMPILER_FLAGS" ]; then
+          ./configure CC="${{CC}} ${{COMPILER_FLAGS}}" CXX="${{CXX}} ${{COMPILER_FLAGS}}" $CONFIGURE_FLAGS
+        else
+          ./configure $CONFIGURE_FLAGS
+        fi
+
+        # Build (mingw32-make is the "make" command in MSYS2)
+        mingw32-make -j3
+        mingw32-make tests
+
+        # Run tests
+        mingw32-make -k check
+
+    - name: Setup MSVC environment
+      if: matrix.env.CC == 'cl.exe'
+      uses: ilammy/msvc-dev-cmd@v1
+      with:
+        arch: ${{{{ matrix.env.CROSS_COMPILE_32BIT == 'yes' && 'x86' || 'x64' }}}}
+
+    - name: Build and test (MSVC)
+      if: matrix.env.CC == 'cl.exe'
+      shell: msys2 {{0}}
+      env:
+        CONFIGURE_FLAGS: ${{{{ matrix.env.CONFIGURE_FLAGS }}}}
+        MSYS2_PATH_TYPE: inherit
+      run: |
+        # Export MSVC environment variables for configure
+        export CC=cl.exe
+        export CXX=cl.exe
+        export AR=lib.exe
+        export NM=dumpbin.exe
+        export RANLIB=:
+
+        # Verify cl.exe is accessible (should be in PATH via inherit)
+        if ! which cl.exe > /dev/null 2>&1; then
+          echo "cl.exe not found, trying to locate MSVC..."
+          # Find and add MSVC bin directory to PATH
+          MSVC_BIN=$(cmd.exe /c "echo %VCToolsInstallDir%" | tr -d '\\\\r' | sed 's/\\\\\\\\\\\\\\\\/\\//g' | sed 's/C:/\\\\/c/g')
+          if [ -n "$MSVC_BIN" ]; then
+            export PATH="$PATH:$MSVC_BIN/bin/Hostx64/x64:$MSVC_BIN/bin/Hostx86/x86"
+          fi
+        fi
+
+        # Run autoconf
+        autoconf
+
+        # Configure with MSVC
+        ./configure CC=cl.exe CXX=cl.exe AR=lib.exe $CONFIGURE_FLAGS
+
+        # Build (mingw32-make is the "make" command in MSYS2)
+        mingw32-make -j3
+        # Build tests sequentially due to PDB file issues
+        mingw32-make tests
+
+        # Run tests
+        mingw32-make -k check
+
+"""
+
+    return job
+
+
+def generate_freebsd_job(arch):
+    """Generate FreeBSD job configuration."""
+    # FreeBSD runs in a VM on ubuntu-latest, not native
+
+    job = f"""  test-freebsd:
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        debug: ['--enable-debug', '--disable-debug']
+        prof: ['--enable-prof', '--disable-prof']
+        arch: ['64-bit', '32-bit']
+        uncommon:
+          - ''
+          - '--with-lg-page=16 --with-malloc-conf=tcache:false'
+
+    name: FreeBSD (${{{{ matrix.arch }}}}, debug=${{{{ matrix.debug }}}}, prof=${{{{ matrix.prof }}}}${{{{ matrix.uncommon && ', uncommon' || '' }}}})
+
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        fetch-depth: 1
+
+    - name: Test on FreeBSD
+      uses: vmactions/freebsd-vm@v1
+      with:
+        release: '15.0'
+        usesh: true
+        prepare: |
+          pkg install -y autoconf gmake
+        run: |
+          # Verify we're running in FreeBSD
+          echo "==== System Information ===="
+          uname -a
+          freebsd-version
+          echo "============================"
+
+          # Set compiler flags for 32-bit if needed
+          if [ "${{{{ matrix.arch }}}}" = "32-bit" ]; then
+            export CC="cc -m32"
+            export CXX="c++ -m32"
+          fi
+
+          # Generate configure script
+          autoconf
+
+          # Configure with matrix options
+          ./configure --with-jemalloc-prefix=ci_ ${{{{ matrix.debug }}}} ${{{{ matrix.prof }}}} ${{{{ matrix.uncommon }}}}
+
+          # Get CPU count for parallel builds
+          export JFLAG=$(sysctl -n kern.smp.cpus)
+
+          gmake -j${{JFLAG}}
+          gmake -j${{JFLAG}} tests
+          gmake check
+
+"""
+
+    return job
+
+
+def main():
+    import sys
+
+    # Determine which workflow to generate based on command-line argument
+    workflow_type = sys.argv[1] if len(sys.argv) > 1 else 'linux'
+
+    if workflow_type == 'linux':
+        jobs = '\n'.join((
+            generate_linux_job(AMD64),
+            generate_linux_job(ARM64),
+        ))
+        print(GITHUB_ACTIONS_TEMPLATE.format(name='Linux CI', jobs=jobs))
+
+    elif workflow_type == 'macos':
+        jobs = '\n'.join((
+            generate_macos_job(AMD64),   # Intel x86_64
+            generate_macos_job(ARM64),   # Apple Silicon
+        ))
+        print(GITHUB_ACTIONS_TEMPLATE.format(name='macOS CI', jobs=jobs))
+
+    elif workflow_type == 'windows':
+        jobs = generate_windows_job(AMD64)
+        print(GITHUB_ACTIONS_TEMPLATE.format(name='Windows CI', jobs=jobs))
+
+    elif workflow_type == 'freebsd':
+        jobs = generate_freebsd_job(AMD64)
+        print(GITHUB_ACTIONS_TEMPLATE.format(name='FreeBSD CI', jobs=jobs))
+
+    elif workflow_type == 'all':
+        # Generate all workflow files
+        linux_jobs = '\n'.join((
+            generate_linux_job(AMD64),
+            generate_linux_job(ARM64),
+        ))
+        macos_jobs = '\n'.join((
+            generate_macos_job(AMD64),   # Intel
+            generate_macos_job(ARM64),   # Apple Silicon
+        ))
+        windows_jobs = generate_windows_job(AMD64)
+        freebsd_jobs = generate_freebsd_job(AMD64)
+
+        all_jobs = '\n'.join((linux_jobs, macos_jobs, windows_jobs, freebsd_jobs))
+        print(GITHUB_ACTIONS_TEMPLATE.format(name='CI', jobs=all_jobs))
+
+    else:
+        print(f"Unknown workflow type: {workflow_type}", file=sys.stderr)
+        print("Usage: gen_gh_actions.py [linux|macos|windows|freebsd|all]", file=sys.stderr)
+        sys.exit(1)
+
+
+if __name__ == '__main__':
+    main()

From c7690e92da89cb08ea43a786d7e7ff5378c4d6af Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@meta.com>
Date: Tue, 9 Dec 2025 13:14:24 -0500
Subject: [PATCH 350/395] Remove Cirrus CI

---
 .cirrus.yml | 47 -----------------------------------------------
 1 file changed, 47 deletions(-)
 delete mode 100644 .cirrus.yml

diff --git a/.cirrus.yml b/.cirrus.yml
deleted file mode 100644
index 585aa42f..00000000
--- a/.cirrus.yml
+++ /dev/null
@@ -1,47 +0,0 @@
-env:
-  CIRRUS_CLONE_DEPTH: 1
-  ARCH: amd64
-
-task:
-  matrix:
-      env:
-        DEBUG_CONFIG: --enable-debug
-      env:
-        DEBUG_CONFIG: --disable-debug
-  matrix:
-    - env:
-        PROF_CONFIG: --enable-prof
-    - env:
-        PROF_CONFIG: --disable-prof
-  matrix:
-    - name: 64-bit
-      env:
-        CC:
-        CXX:
-    - name: 32-bit
-      env:
-        CC: cc -m32
-        CXX: c++ -m32
-  matrix:
-    - env:
-        UNCOMMON_CONFIG:
-    - env:
-        UNCOMMON_CONFIG: --with-lg-page=16 --with-malloc-conf=tcache:false
-  matrix:
-     - name: 15-CURRENT
-       freebsd_instance:
-         image_family: freebsd-15-0-snap
-  install_script:
-    - sed -i.bak -e 's,pkg+http://pkg.FreeBSD.org/\${ABI}/quarterly,pkg+http://pkg.FreeBSD.org/\${ABI}/latest,' /etc/pkg/FreeBSD.conf
-    - pkg upgrade -y
-    - pkg install -y autoconf gmake
-  script:
-    - autoconf
-    # We don't perfectly track freebsd stdlib.h definitions.  This is fine when
-    # we count as a system header, but breaks otherwise, like during these
-    # tests.
-    - ./configure --with-jemalloc-prefix=ci_ ${DEBUG_CONFIG} ${PROF_CONFIG} ${UNCOMMON_CONFIG}
-    - export JFLAG=`sysctl -n kern.smp.cpus`
-    - gmake -j${JFLAG}
-    - gmake -j${JFLAG} tests
-    - gmake check

From 6016d86c187ce01ef8cbe1c3023a3ca394c9b47f Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Thu, 30 Oct 2025 16:05:04 -0700
Subject: [PATCH 351/395] [SEC] Make SEC owned by hpa_shard, simplify the code,
 add stats, lock per bin

---
 Makefile.in                                   |    2 +-
 include/jemalloc/internal/arena_externs.h     |    2 +-
 include/jemalloc/internal/ctl.h               |    1 -
 include/jemalloc/internal/hpa.h               |   19 +-
 include/jemalloc/internal/pa.h                |   12 +-
 include/jemalloc/internal/pai.h               |   37 -
 include/jemalloc/internal/sec.h               |  129 +-
 include/jemalloc/internal/sec_opts.h          |   43 +-
 include/jemalloc/internal/witness.h           |    2 +-
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |    1 -
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |    3 -
 .../projects/vc2017/jemalloc/jemalloc.vcxproj |    1 -
 .../vc2017/jemalloc/jemalloc.vcxproj.filters  |    3 -
 .../projects/vc2019/jemalloc/jemalloc.vcxproj |    1 -
 .../vc2019/jemalloc/jemalloc.vcxproj.filters  |    3 -
 .../projects/vc2022/jemalloc/jemalloc.vcxproj |    1 -
 .../vc2022/jemalloc/jemalloc.vcxproj.filters  |    3 -
 src/arena.c                                   |    6 +-
 src/ctl.c                                     |   31 +-
 src/hpa.c                                     |  144 ++-
 src/jemalloc.c                                |   17 +-
 src/pa.c                                      |   22 +-
 src/pa_extra.c                                |   10 +-
 src/pac.c                                     |    8 +-
 src/pai.c                                     |   32 -
 src/sec.c                                     |  564 ++++-----
 src/stats.c                                   |   27 +-
 test/unit/hpa.c                               |   88 +-
 test/unit/hpa_sec_integration.c               |  239 ++++
 test/unit/hpa_sec_integration.sh              |    3 +
 test/unit/hpa_thp_always.c                    |    8 +-
 test/unit/hpa_vectorized_madvise.c            |    7 +-
 .../unit/hpa_vectorized_madvise_large_batch.c |    8 +-
 test/unit/mallctl.c                           |    1 -
 test/unit/sec.c                               | 1043 ++++++++---------
 35 files changed, 1264 insertions(+), 1257 deletions(-)
 delete mode 100644 src/pai.c
 create mode 100644 test/unit/hpa_sec_integration.c
 create mode 100644 test/unit/hpa_sec_integration.sh

diff --git a/Makefile.in b/Makefile.in
index 7365a923..83f04e64 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -135,7 +135,6 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/nstime.c \
 	$(srcroot)src/pa.c \
 	$(srcroot)src/pa_extra.c \
-	$(srcroot)src/pai.c \
 	$(srcroot)src/pac.c \
 	$(srcroot)src/pages.c \
 	$(srcroot)src/peak_event.c \
@@ -230,6 +229,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/hash.c \
 	$(srcroot)test/unit/hook.c \
 	$(srcroot)test/unit/hpa.c \
+	$(srcroot)test/unit/hpa_sec_integration.c \
 	$(srcroot)test/unit/hpa_thp_always.c \
 	$(srcroot)test/unit/hpa_vectorized_madvise.c \
 	$(srcroot)test/unit/hpa_vectorized_madvise_large_batch.c \
diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index cf191aeb..1d004635 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -46,7 +46,7 @@ void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
     size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
     bin_stats_data_t *bstats, arena_stats_large_t *lstats, pac_estats_t *estats,
-    hpa_shard_stats_t *hpastats, sec_stats_t *secstats);
+    hpa_shard_stats_t *hpastats);
 void arena_handle_deferred_work(tsdn_t *tsdn, arena_t *arena);
 edata_t *arena_extent_alloc_large(
     tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, bool zero);
diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index b290411b..82035fe3 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -51,7 +51,6 @@ typedef struct ctl_arena_stats_s {
 	arena_stats_large_t lstats[SC_NSIZES - SC_NBINS];
 	pac_estats_t        estats[SC_NPSIZES];
 	hpa_shard_stats_t   hpastats;
-	sec_stats_t         secstats;
 } ctl_arena_stats_t;
 
 typedef struct ctl_stats_s {
diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index 06567740..dc7725b7 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -12,6 +12,7 @@
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/pai.h"
 #include "jemalloc/internal/psset.h"
+#include "jemalloc/internal/sec.h"
 
 typedef struct hpa_shard_nonderived_stats_s hpa_shard_nonderived_stats_t;
 struct hpa_shard_nonderived_stats_s {
@@ -57,6 +58,7 @@ typedef struct hpa_shard_stats_s hpa_shard_stats_t;
 struct hpa_shard_stats_s {
 	psset_stats_t                psset_stats;
 	hpa_shard_nonderived_stats_t nonderived_stats;
+	sec_stats_t                  secstats;
 };
 
 typedef struct hpa_shard_s hpa_shard_t;
@@ -69,14 +71,17 @@ struct hpa_shard_s {
 
 	/* The central allocator we get our hugepages from. */
 	hpa_central_t *central;
+
 	/* Protects most of this shard's state. */
 	malloc_mutex_t mtx;
+
 	/*
 	 * Guards the shard's access to the central allocator (preventing
 	 * multiple threads operating on this shard from accessing the central
 	 * allocator).
 	 */
 	malloc_mutex_t grow_mtx;
+
 	/* The base metadata allocator. */
 	base_t *base;
 
@@ -87,6 +92,9 @@ struct hpa_shard_s {
 	 */
 	edata_cache_fast_t ecf;
 
+	/* Small extent cache (not guarded by mtx) */
+	JEMALLOC_ALIGNED(CACHELINE) sec_t sec;
+
 	psset_t psset;
 
 	/*
@@ -142,9 +150,9 @@ bool hpa_hugepage_size_exceeds_limit(void);
  * just that it can function properly given the system it's running on.
  */
 bool hpa_supported(void);
-bool hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap,
-    base_t *base, edata_cache_t *edata_cache, unsigned ind,
-    const hpa_shard_opts_t *opts);
+bool hpa_shard_init(tsdn_t *tsdn, hpa_shard_t *shard, hpa_central_t *central,
+    emap_t *emap, base_t *base, edata_cache_t *edata_cache, unsigned ind,
+    const hpa_shard_opts_t *opts, const sec_opts_t *sec_opts);
 
 void hpa_shard_stats_accum(hpa_shard_stats_t *dst, hpa_shard_stats_t *src);
 void hpa_shard_stats_merge(
@@ -157,6 +165,8 @@ void hpa_shard_stats_merge(
  */
 void hpa_shard_disable(tsdn_t *tsdn, hpa_shard_t *shard);
 void hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard);
+/* Flush caches that shard may be using */
+void hpa_shard_flush(tsdn_t *tsdn, hpa_shard_t *shard);
 
 void hpa_shard_set_deferral_allowed(
     tsdn_t *tsdn, hpa_shard_t *shard, bool deferral_allowed);
@@ -164,8 +174,9 @@ void hpa_shard_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard);
 
 /*
  * We share the fork ordering with the PA and arena prefork handling; that's why
- * these are 3 and 4 rather than 0 and 1.
+ * these are 2, 3 and 4 rather than 0 and 1.
  */
+void hpa_shard_prefork2(tsdn_t *tsdn, hpa_shard_t *shard);
 void hpa_shard_prefork3(tsdn_t *tsdn, hpa_shard_t *shard);
 void hpa_shard_prefork4(tsdn_t *tsdn, hpa_shard_t *shard);
 void hpa_shard_postfork_parent(tsdn_t *tsdn, hpa_shard_t *shard);
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 3f2d10b0..f3910ad8 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -96,12 +96,6 @@ struct pa_shard_s {
 	/* Allocates from a PAC. */
 	pac_t pac;
 
-	/*
-	 * We place a small extent cache in front of the HPA, since we intend
-	 * these configurations to use many fewer arenas, and therefore have a
-	 * higher risk of hot locks.
-	 */
-	sec_t       hpa_sec;
 	hpa_shard_t hpa_shard;
 
 	/* The source of edata_t objects. */
@@ -166,6 +160,9 @@ void pa_shard_reset(tsdn_t *tsdn, pa_shard_t *shard);
  */
 void pa_shard_destroy(tsdn_t *tsdn, pa_shard_t *shard);
 
+/* Flush any caches used by shard */
+void pa_shard_flush(tsdn_t *tsdn, pa_shard_t *shard);
+
 /* Gets an edata for the given allocation. */
 edata_t *pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size,
     size_t alignment, bool slab, szind_t szind, bool zero, bool guarded,
@@ -233,8 +230,7 @@ void pa_shard_basic_stats_merge(
 
 void pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
     pa_shard_stats_t *pa_shard_stats_out, pac_estats_t *estats_out,
-    hpa_shard_stats_t *hpa_stats_out, sec_stats_t *sec_stats_out,
-    size_t *resident);
+    hpa_shard_stats_t *hpa_stats_out, size_t *resident);
 
 /*
  * Reads the PA-owned mutex stats into the output stats array, at the
diff --git a/include/jemalloc/internal/pai.h b/include/jemalloc/internal/pai.h
index 1d924657..9b4c257b 100644
--- a/include/jemalloc/internal/pai.h
+++ b/include/jemalloc/internal/pai.h
@@ -13,15 +13,6 @@ struct pai_s {
 	edata_t *(*alloc)(tsdn_t *tsdn, pai_t *self, size_t size,
 	    size_t alignment, bool zero, bool guarded, bool frequent_reuse,
 	    bool *deferred_work_generated);
-	/*
-	 * Returns the number of extents added to the list (which may be fewer
-	 * than requested, in case of OOM).  The list should already be
-	 * initialized.  The only alignment guarantee is page-alignment, and
-	 * the results are not necessarily zeroed.
-	 */
-	size_t (*alloc_batch)(tsdn_t *tsdn, pai_t *self, size_t size,
-	    size_t nallocs, edata_list_active_t *results, bool frequent_reuse,
-	    bool *deferred_work_generated);
 	bool (*expand)(tsdn_t *tsdn, pai_t *self, edata_t *edata,
 	    size_t old_size, size_t new_size, bool zero,
 	    bool *deferred_work_generated);
@@ -29,9 +20,6 @@ struct pai_s {
 	    size_t old_size, size_t new_size, bool *deferred_work_generated);
 	void (*dalloc)(tsdn_t *tsdn, pai_t *self, edata_t *edata,
 	    bool *deferred_work_generated);
-	/* This function empties out list as a side-effect of being called. */
-	void (*dalloc_batch)(tsdn_t *tsdn, pai_t *self,
-	    edata_list_active_t *list, bool *deferred_work_generated);
 	uint64_t (*time_until_deferred_work)(tsdn_t *tsdn, pai_t *self);
 };
 
@@ -47,14 +35,6 @@ pai_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
 	    frequent_reuse, deferred_work_generated);
 }
 
-static inline size_t
-pai_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
-    edata_list_active_t *results, bool frequent_reuse,
-    bool *deferred_work_generated) {
-	return self->alloc_batch(tsdn, self, size, nallocs, results,
-	    frequent_reuse, deferred_work_generated);
-}
-
 static inline bool
 pai_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
     size_t new_size, bool zero, bool *deferred_work_generated) {
@@ -75,26 +55,9 @@ pai_dalloc(
 	self->dalloc(tsdn, self, edata, deferred_work_generated);
 }
 
-static inline void
-pai_dalloc_batch(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list,
-    bool *deferred_work_generated) {
-	self->dalloc_batch(tsdn, self, list, deferred_work_generated);
-}
-
 static inline uint64_t
 pai_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) {
 	return self->time_until_deferred_work(tsdn, self);
 }
 
-/*
- * An implementation of batch allocation that simply calls alloc once for
- * each item in the list.
- */
-size_t pai_alloc_batch_default(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t nallocs, edata_list_active_t *results, bool frequent_reuse,
-    bool *deferred_work_generated);
-/* Ditto, for dalloc. */
-void pai_dalloc_batch_default(tsdn_t *tsdn, pai_t *self,
-    edata_list_active_t *list, bool *deferred_work_generated);
-
 #endif /* JEMALLOC_INTERNAL_PAI_H */
diff --git a/include/jemalloc/internal/sec.h b/include/jemalloc/internal/sec.h
index 50daf066..cc458b9d 100644
--- a/include/jemalloc/internal/sec.h
+++ b/include/jemalloc/internal/sec.h
@@ -17,91 +17,104 @@
  * knowledge of the underlying PAI implementation).
  */
 
-/*
- * For now, this is just one field; eventually, we'll probably want to get more
- * fine-grained data out (like per-size class statistics).
- */
+typedef struct sec_bin_stats_s sec_bin_stats_t;
+struct sec_bin_stats_s {
+	/* Number of alloc requests that did not find extent in this bin */
+	size_t nmisses;
+	/* Number of successful alloc requests. */
+	size_t nhits;
+	/* Number of dallocs causing the flush */
+	size_t ndalloc_flush;
+	/* Number of dallocs not causing the flush */
+	size_t ndalloc_noflush;
+	/* Number of fills that hit max_bytes */
+	size_t noverfills;
+};
 typedef struct sec_stats_s sec_stats_t;
 struct sec_stats_s {
 	/* Sum of bytes_cur across all shards. */
 	size_t bytes;
+
+	/* Totals of bin_stats. */
+	sec_bin_stats_t total;
 };
 
+static inline void
+sec_bin_stats_init(sec_bin_stats_t *stats) {
+	stats->ndalloc_flush = 0;
+	stats->nmisses = 0;
+	stats->nhits = 0;
+	stats->ndalloc_noflush = 0;
+	stats->noverfills = 0;
+}
+
+static inline void
+sec_bin_stats_accum(sec_bin_stats_t *dst, sec_bin_stats_t *src) {
+	dst->nmisses += src->nmisses;
+	dst->nhits += src->nhits;
+	dst->ndalloc_flush += src->ndalloc_flush;
+	dst->ndalloc_noflush += src->ndalloc_noflush;
+	dst->noverfills += src->noverfills;
+}
+
 static inline void
 sec_stats_accum(sec_stats_t *dst, sec_stats_t *src) {
 	dst->bytes += src->bytes;
+	sec_bin_stats_accum(&dst->total, &src->total);
 }
 
 /* A collections of free extents, all of the same size. */
 typedef struct sec_bin_s sec_bin_t;
 struct sec_bin_s {
 	/*
-	 * When we fail to fulfill an allocation, we do a batch-alloc on the
-	 * underlying allocator to fill extra items, as well.  We drop the SEC
-	 * lock while doing so, to allow operations on other bins to succeed.
-	 * That introduces the possibility of other threads also trying to
-	 * allocate out of this bin, failing, and also going to the backing
-	 * allocator.  To avoid a thundering herd problem in which lots of
-	 * threads do batch allocs and overfill this bin as a result, we only
-	 * allow one batch allocation at a time for a bin.  This bool tracks
-	 * whether or not some thread is already batch allocating.
-	 *
-	 * Eventually, the right answer may be a smarter sharding policy for the
-	 * bins (e.g. a mutex per bin, which would also be more scalable
-	 * generally; the batch-allocating thread could hold it while
-	 * batch-allocating).
+	 * Protects the data members of the bin.
 	 */
-	bool being_batch_filled;
+	malloc_mutex_t mtx;
 
 	/*
-	 * Number of bytes in this particular bin (as opposed to the
-	 * sec_shard_t's bytes_cur.  This isn't user visible or reported in
-	 * stats; rather, it allows us to quickly determine the change in the
-	 * centralized counter when flushing.
+	 * Number of bytes in this particular bin.
 	 */
 	size_t              bytes_cur;
 	edata_list_active_t freelist;
-};
-
-typedef struct sec_shard_s sec_shard_t;
-struct sec_shard_s {
-	/*
-	 * We don't keep per-bin mutexes, even though that would allow more
-	 * sharding; this allows global cache-eviction, which in turn allows for
-	 * better balancing across free lists.
-	 */
-	malloc_mutex_t mtx;
-	/*
-	 * A SEC may need to be shut down (i.e. flushed of its contents and
-	 * prevented from further caching).  To avoid tricky synchronization
-	 * issues, we just track enabled-status in each shard, guarded by a
-	 * mutex.  In practice, this is only ever checked during brief races,
-	 * since the arena-level atomic boolean tracking HPA enabled-ness means
-	 * that we won't go down these pathways very often after custom extent
-	 * hooks are installed.
-	 */
-	bool       enabled;
-	sec_bin_t *bins;
-	/* Number of bytes in all bins in the shard. */
-	size_t bytes_cur;
-	/* The next pszind to flush in the flush-some pathways. */
-	pszind_t to_flush_next;
+	sec_bin_stats_t     stats;
 };
 
 typedef struct sec_s sec_t;
 struct sec_s {
-	pai_t  pai;
-	pai_t *fallback;
-
-	sec_opts_t   opts;
-	sec_shard_t *shards;
-	pszind_t     npsizes;
+	sec_opts_t opts;
+	sec_bin_t *bins;
+	pszind_t   npsizes;
 };
 
-bool sec_init(tsdn_t *tsdn, sec_t *sec, base_t *base, pai_t *fallback,
-    const sec_opts_t *opts);
-void sec_flush(tsdn_t *tsdn, sec_t *sec);
-void sec_disable(tsdn_t *tsdn, sec_t *sec);
+static inline bool
+sec_is_used(sec_t *sec) {
+	return sec->opts.nshards != 0;
+}
+
+static inline bool
+sec_size_supported(sec_t *sec, size_t size) {
+	return sec_is_used(sec) && size <= sec->opts.max_alloc;
+}
+
+/* If sec does not have extent available, it will return NULL. */
+edata_t *sec_alloc(tsdn_t *tsdn, sec_t *sec, size_t size);
+void     sec_fill(tsdn_t *tsdn, sec_t *sec, size_t size,
+        edata_list_active_t *result, size_t nallocs);
+
+/*
+ * Upon return dalloc_list may be empty if edata is consumed by sec or non-empty
+ * if there are extents that need to be flushed from cache.  Please note, that
+ * if we need to flush, extent(s) returned in the list to be deallocated
+ * will almost certainly not contain the one being dalloc-ed (that one will be
+ * considered "hot" and preserved in the cache, while "colder" ones are
+ * returned).
+ */
+void sec_dalloc(tsdn_t *tsdn, sec_t *sec, edata_list_active_t *dalloc_list);
+
+bool sec_init(tsdn_t *tsdn, sec_t *sec, base_t *base, const sec_opts_t *opts);
+
+/* Fills to_flush with extents that need to be deallocated */
+void sec_flush(tsdn_t *tsdn, sec_t *sec, edata_list_active_t *to_flush);
 
 /*
  * Morally, these two stats methods probably ought to be a single one (and the
diff --git a/include/jemalloc/internal/sec_opts.h b/include/jemalloc/internal/sec_opts.h
index e0699d7a..039d423c 100644
--- a/include/jemalloc/internal/sec_opts.h
+++ b/include/jemalloc/internal/sec_opts.h
@@ -12,46 +12,39 @@ typedef struct sec_opts_s sec_opts_t;
 struct sec_opts_s {
 	/*
 	 * We don't necessarily always use all the shards; requests are
-	 * distributed across shards [0, nshards - 1).
+	 * distributed across shards [0, nshards - 1).  Once thread picks a
+	 * shard it will always use that one.  If this value is set to 0 sec is
+	 * not used.
 	 */
 	size_t nshards;
 	/*
 	 * We'll automatically refuse to cache any objects in this sec if
-	 * they're larger than max_alloc bytes, instead forwarding such objects
-	 * directly to the fallback.
+	 * they're larger than max_alloc bytes.
 	 */
 	size_t max_alloc;
 	/*
-	 * Exceeding this amount of cached extents in a shard causes us to start
-	 * flushing bins in that shard until we fall below bytes_after_flush.
+	 * Exceeding this amount of cached extents in a bin causes us to flush
+	 * until we are 1/4 below max_bytes.
 	 */
 	size_t max_bytes;
-	/*
-	 * The number of bytes (in all bins) we flush down to when we exceed
-	 * bytes_cur.  We want this to be less than bytes_cur, because
-	 * otherwise we could get into situations where a shard undergoing
-	 * net-deallocation keeps bytes_cur very near to max_bytes, so that
-	 * most deallocations get immediately forwarded to the underlying PAI
-	 * implementation, defeating the point of the SEC.
-	 */
-	size_t bytes_after_flush;
 	/*
 	 * When we can't satisfy an allocation out of the SEC because there are
-	 * no available ones cached, we allocate multiple of that size out of
-	 * the fallback allocator.  Eventually we might want to do something
-	 * cleverer, but for now we just grab a fixed number.
+	 * no available ones cached, allocator will allocate a batch with extra
+	 * batch_fill_extra extents of the same size.
 	 */
 	size_t batch_fill_extra;
 };
 
+#define SEC_OPTS_NSHARDS_DEFAULT 2
+#define SEC_OPTS_BATCH_FILL_EXTRA_DEFAULT 3
+#define SEC_OPTS_MAX_ALLOC_DEFAULT ((32 * 1024) < PAGE ? PAGE : (32 * 1024))
+#define SEC_OPTS_MAX_BYTES_DEFAULT                                             \
+	((256 * 1024) < (4 * SEC_OPTS_MAX_ALLOC_DEFAULT)                       \
+	        ? (4 * SEC_OPTS_MAX_ALLOC_DEFAULT)                             \
+	        : (256 * 1024))
+
 #define SEC_OPTS_DEFAULT                                                       \
-	{                                                                      \
-		/* nshards */                                                  \
-		4,                                           /* max_alloc */   \
-		    (32 * 1024) < PAGE ? PAGE : (32 * 1024), /* max_bytes */   \
-		    256 * 1024, /* bytes_after_flush */                        \
-		    128 * 1024, /* batch_fill_extra */                         \
-		    0                                                          \
-	}
+	{SEC_OPTS_NSHARDS_DEFAULT, SEC_OPTS_MAX_ALLOC_DEFAULT,                 \
+	    SEC_OPTS_MAX_BYTES_DEFAULT, SEC_OPTS_BATCH_FILL_EXTRA_DEFAULT}
 
 #endif /* JEMALLOC_INTERNAL_SEC_OPTS_H */
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index 7ca3c347..0a426ff5 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -46,7 +46,7 @@ enum witness_rank_e {
 	WITNESS_RANK_DECAY = WITNESS_RANK_CORE,
 	WITNESS_RANK_TCACHE_QL,
 
-	WITNESS_RANK_SEC_SHARD,
+	WITNESS_RANK_SEC_BIN,
 
 	WITNESS_RANK_EXTENT_GROW,
 	WITNESS_RANK_HPA_SHARD_GROW = WITNESS_RANK_EXTENT_GROW,
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index bfb62d78..1e8def75 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -74,7 +74,6 @@
     <ClCompile Include="..\..\..\..\src\nstime.c" />
     <ClCompile Include="..\..\..\..\src\pa.c" />
     <ClCompile Include="..\..\..\..\src\pa_extra.c" />
-    <ClCompile Include="..\..\..\..\src\pai.c" />
     <ClCompile Include="..\..\..\..\src\pac.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
     <ClCompile Include="..\..\..\..\src\peak_event.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 26408c8e..f6e340cf 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -106,9 +106,6 @@
     <ClCompile Include="..\..\..\..\src\pa_extra.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\pai.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\pac.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 037eb724..45ddf73d 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -74,7 +74,6 @@
     <ClCompile Include="..\..\..\..\src\nstime.c" />
     <ClCompile Include="..\..\..\..\src\pa.c" />
     <ClCompile Include="..\..\..\..\src\pa_extra.c" />
-    <ClCompile Include="..\..\..\..\src\pai.c" />
     <ClCompile Include="..\..\..\..\src\pac.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
     <ClCompile Include="..\..\..\..\src\peak_event.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 26408c8e..f6e340cf 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -106,9 +106,6 @@
     <ClCompile Include="..\..\..\..\src\pa_extra.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\pai.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\pac.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
index bd6595b1..f1a5158a 100644
--- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
@@ -74,7 +74,6 @@
     <ClCompile Include="..\..\..\..\src\nstime.c" />
     <ClCompile Include="..\..\..\..\src\pa.c" />
     <ClCompile Include="..\..\..\..\src\pa_extra.c" />
-    <ClCompile Include="..\..\..\..\src\pai.c" />
     <ClCompile Include="..\..\..\..\src\pac.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
     <ClCompile Include="..\..\..\..\src\peak_event.c" />
diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
index 26408c8e..f6e340cf 100644
--- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
@@ -106,9 +106,6 @@
     <ClCompile Include="..\..\..\..\src\pa_extra.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\pai.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\pac.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
index 3f880176..a6f92ccf 100644
--- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
@@ -74,7 +74,6 @@
     <ClCompile Include="..\..\..\..\src\nstime.c" />
     <ClCompile Include="..\..\..\..\src\pa.c" />
     <ClCompile Include="..\..\..\..\src\pa_extra.c" />
-    <ClCompile Include="..\..\..\..\src\pai.c" />
     <ClCompile Include="..\..\..\..\src\pac.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
     <ClCompile Include="..\..\..\..\src\peak_event.c" />
diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
index 26408c8e..f6e340cf 100644
--- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
@@ -106,9 +106,6 @@
     <ClCompile Include="..\..\..\..\src\pa_extra.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\pai.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\pac.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/arena.c b/src/arena.c
index 664ed6a3..5b144c63 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -89,7 +89,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
     size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
     bin_stats_data_t *bstats, arena_stats_large_t *lstats, pac_estats_t *estats,
-    hpa_shard_stats_t *hpastats, sec_stats_t *secstats) {
+    hpa_shard_stats_t *hpastats) {
 	cassert(config_stats);
 
 	arena_basic_stats_merge(tsdn, arena, nthreads, dss, dirty_decay_ms,
@@ -159,7 +159,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	}
 
 	pa_shard_stats_merge(tsdn, &arena->pa_shard, &astats->pa_shard_stats,
-	    estats, hpastats, secstats, &astats->resident);
+	    estats, hpastats, &astats->resident);
 
 	LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 
@@ -529,7 +529,7 @@ arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread, bool all) {
 		 * as possible", including flushing any caches (for situations
 		 * like thread death, or manual purge calls).
 		 */
-		sec_flush(tsdn, &arena->pa_shard.hpa_sec);
+		pa_shard_flush(tsdn, &arena->pa_shard);
 	}
 	if (arena_decay_dirty(tsdn, arena, is_background_thread, all)) {
 		return;
diff --git a/src/ctl.c b/src/ctl.c
index 553c58ad..1260e197 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -115,7 +115,6 @@ CTL_PROTO(opt_hpa_dirty_mult)
 CTL_PROTO(opt_hpa_sec_nshards)
 CTL_PROTO(opt_hpa_sec_max_alloc)
 CTL_PROTO(opt_hpa_sec_max_bytes)
-CTL_PROTO(opt_hpa_sec_bytes_after_flush)
 CTL_PROTO(opt_hpa_sec_batch_fill_extra)
 CTL_PROTO(opt_huge_arena_pac_thp)
 CTL_PROTO(opt_metadata_thp)
@@ -339,6 +338,11 @@ CTL_PROTO(stats_arenas_i_tcache_stashed_bytes)
 CTL_PROTO(stats_arenas_i_resident)
 CTL_PROTO(stats_arenas_i_abandoned_vm)
 CTL_PROTO(stats_arenas_i_hpa_sec_bytes)
+CTL_PROTO(stats_arenas_i_hpa_sec_hits)
+CTL_PROTO(stats_arenas_i_hpa_sec_misses)
+CTL_PROTO(stats_arenas_i_hpa_sec_dalloc_flush)
+CTL_PROTO(stats_arenas_i_hpa_sec_dalloc_noflush)
+CTL_PROTO(stats_arenas_i_hpa_sec_overfills)
 INDEX_PROTO(stats_arenas_i)
 CTL_PROTO(stats_allocated)
 CTL_PROTO(stats_active)
@@ -486,7 +490,6 @@ static const ctl_named_node_t opt_node[] = {{NAME("abort"), CTL(opt_abort)},
     {NAME("hpa_sec_nshards"), CTL(opt_hpa_sec_nshards)},
     {NAME("hpa_sec_max_alloc"), CTL(opt_hpa_sec_max_alloc)},
     {NAME("hpa_sec_max_bytes"), CTL(opt_hpa_sec_max_bytes)},
-    {NAME("hpa_sec_bytes_after_flush"), CTL(opt_hpa_sec_bytes_after_flush)},
     {NAME("hpa_sec_batch_fill_extra"), CTL(opt_hpa_sec_batch_fill_extra)},
     {NAME("huge_arena_pac_thp"), CTL(opt_huge_arena_pac_thp)},
     {NAME("metadata_thp"), CTL(opt_metadata_thp)},
@@ -826,6 +829,12 @@ static const ctl_named_node_t stats_arenas_i_node[] = {
     {NAME("resident"), CTL(stats_arenas_i_resident)},
     {NAME("abandoned_vm"), CTL(stats_arenas_i_abandoned_vm)},
     {NAME("hpa_sec_bytes"), CTL(stats_arenas_i_hpa_sec_bytes)},
+    {NAME("hpa_sec_hits"), CTL(stats_arenas_i_hpa_sec_hits)},
+    {NAME("hpa_sec_misses"), CTL(stats_arenas_i_hpa_sec_misses)},
+    {NAME("hpa_sec_dalloc_noflush"),
+        CTL(stats_arenas_i_hpa_sec_dalloc_noflush)},
+    {NAME("hpa_sec_dalloc_flush"), CTL(stats_arenas_i_hpa_sec_dalloc_flush)},
+    {NAME("hpa_sec_overfills"), CTL(stats_arenas_i_hpa_sec_overfills)},
     {NAME("small"), CHILD(named, stats_arenas_i_small)},
     {NAME("large"), CHILD(named, stats_arenas_i_large)},
     {NAME("bins"), CHILD(indexed, stats_arenas_i_bins)},
@@ -1066,7 +1075,7 @@ ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_t *ctl_arena, arena_t *arena) {
 		    &ctl_arena->pdirty, &ctl_arena->pmuzzy,
 		    &ctl_arena->astats->astats, ctl_arena->astats->bstats,
 		    ctl_arena->astats->lstats, ctl_arena->astats->estats,
-		    &ctl_arena->astats->hpastats, &ctl_arena->astats->secstats);
+		    &ctl_arena->astats->hpastats);
 
 		for (i = 0; i < SC_NBINS; i++) {
 			bin_stats_t *bstats =
@@ -1258,7 +1267,6 @@ ctl_arena_stats_sdmerge(
 
 		/* Merge HPA stats. */
 		hpa_shard_stats_accum(&sdstats->hpastats, &astats->hpastats);
-		sec_stats_accum(&sdstats->secstats, &astats->secstats);
 	}
 }
 
@@ -2175,11 +2183,8 @@ CTL_RO_NL_GEN(opt_hpa_slab_max_alloc, opt_hpa_opts.slab_max_alloc, size_t)
 CTL_RO_NL_GEN(opt_hpa_sec_nshards, opt_hpa_sec_opts.nshards, size_t)
 CTL_RO_NL_GEN(opt_hpa_sec_max_alloc, opt_hpa_sec_opts.max_alloc, size_t)
 CTL_RO_NL_GEN(opt_hpa_sec_max_bytes, opt_hpa_sec_opts.max_bytes, size_t)
-CTL_RO_NL_GEN(
-    opt_hpa_sec_bytes_after_flush, opt_hpa_sec_opts.bytes_after_flush, size_t)
 CTL_RO_NL_GEN(
     opt_hpa_sec_batch_fill_extra, opt_hpa_sec_opts.batch_fill_extra, size_t)
-
 CTL_RO_NL_GEN(opt_huge_arena_pac_thp, opt_huge_arena_pac_thp, bool)
 CTL_RO_NL_GEN(
     opt_metadata_thp, metadata_thp_mode_names[opt_metadata_thp], const char *)
@@ -3869,7 +3874,17 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_abandoned_vm,
     size_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_sec_bytes,
-    arenas_i(mib[2])->astats->secstats.bytes, size_t)
+    arenas_i(mib[2])->astats->hpastats.secstats.bytes, size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_sec_hits,
+    arenas_i(mib[2])->astats->hpastats.secstats.total.nhits, size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_sec_misses,
+    arenas_i(mib[2])->astats->hpastats.secstats.total.nmisses, size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_sec_dalloc_flush,
+    arenas_i(mib[2])->astats->hpastats.secstats.total.ndalloc_flush, size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_sec_dalloc_noflush,
+    arenas_i(mib[2])->astats->hpastats.secstats.total.ndalloc_noflush, size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_sec_overfills,
+    arenas_i(mib[2])->astats->hpastats.secstats.total.noverfills, size_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_small_allocated,
     arenas_i(mib[2])->astats->allocated_small, size_t)
diff --git a/src/hpa.c b/src/hpa.c
index cc330379..7e5b5f72 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -11,19 +11,17 @@
 static edata_t *hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
     size_t alignment, bool zero, bool guarded, bool frequent_reuse,
     bool *deferred_work_generated);
-static size_t   hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size,
-      size_t nallocs, edata_list_active_t *results, bool frequent_reuse,
-      bool *deferred_work_generated);
 static bool     hpa_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
         size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated);
 static bool     hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
         size_t old_size, size_t new_size, bool *deferred_work_generated);
 static void     hpa_dalloc(
         tsdn_t *tsdn, pai_t *self, edata_t *edata, bool *deferred_work_generated);
-static void     hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self,
-        edata_list_active_t *list, bool *deferred_work_generated);
 static uint64_t hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self);
 
+static void hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self,
+    edata_list_active_t *list, bool *deferred_work_generated);
+
 const char *const hpa_hugify_style_names[] = {"auto", "none", "eager", "lazy"};
 
 bool opt_experimental_hpa_start_huge_if_thp_always = true;
@@ -74,9 +72,9 @@ hpa_do_consistency_checks(hpa_shard_t *shard) {
 }
 
 bool
-hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap,
-    base_t *base, edata_cache_t *edata_cache, unsigned ind,
-    const hpa_shard_opts_t *opts) {
+hpa_shard_init(tsdn_t *tsdn, hpa_shard_t *shard, hpa_central_t *central,
+    emap_t *emap, base_t *base, edata_cache_t *edata_cache, unsigned ind,
+    const hpa_shard_opts_t *opts, const sec_opts_t *sec_opts) {
 	/* malloc_conf processing should have filtered out these cases. */
 	assert(hpa_supported());
 	bool err;
@@ -118,13 +116,16 @@ hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap,
 	 * operating on corrupted data.
 	 */
 	shard->pai.alloc = &hpa_alloc;
-	shard->pai.alloc_batch = &hpa_alloc_batch;
 	shard->pai.expand = &hpa_expand;
 	shard->pai.shrink = &hpa_shrink;
 	shard->pai.dalloc = &hpa_dalloc;
-	shard->pai.dalloc_batch = &hpa_dalloc_batch;
 	shard->pai.time_until_deferred_work = &hpa_time_until_deferred_work;
 
+	err = sec_init(tsdn, &shard->sec, base, sec_opts);
+	if (err) {
+		return true;
+	}
+
 	hpa_do_consistency_checks(shard);
 
 	return false;
@@ -151,6 +152,7 @@ hpa_shard_stats_accum(hpa_shard_stats_t *dst, hpa_shard_stats_t *src) {
 	psset_stats_accum(&dst->psset_stats, &src->psset_stats);
 	hpa_shard_nonderived_stats_accum(
 	    &dst->nonderived_stats, &src->nonderived_stats);
+	sec_stats_accum(&dst->secstats, &src->secstats);
 }
 
 void
@@ -164,6 +166,8 @@ hpa_shard_stats_merge(
 	hpa_shard_nonderived_stats_accum(&dst->nonderived_stats, &shard->stats);
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 	malloc_mutex_unlock(tsdn, &shard->grow_mtx);
+
+	sec_stats_merge(tsdn, &shard->sec, &dst->secstats);
 }
 
 static bool
@@ -825,37 +829,9 @@ hpa_from_pai(pai_t *self) {
 	return (hpa_shard_t *)self;
 }
 
-static size_t
-hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
-    edata_list_active_t *results, bool frequent_reuse,
-    bool *deferred_work_generated) {
-	assert(nallocs > 0);
-	assert((size & PAGE_MASK) == 0);
-	witness_assert_depth_to_rank(
-	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
-	hpa_shard_t *shard = hpa_from_pai(self);
-
-	/*
-	 * frequent_use here indicates this request comes from the arena bins,
-	 * in which case it will be split into slabs, and therefore there is no
-	 * intrinsic slack in the allocation (the entire range of allocated size
-	 * will be accessed).
-	 *
-	 * In this case bypass the slab_max_alloc limit (if still within the
-	 * huge page size).  These requests do not concern internal
-	 * fragmentation with huge pages (again, the full size will be used).
-	 */
-	if (!(frequent_reuse && size <= HUGEPAGE)
-	    && (size > shard->opts.slab_max_alloc)) {
-		return 0;
-	}
-
-	size_t nsuccess = hpa_alloc_batch_psset(
-	    tsdn, shard, size, nallocs, results, deferred_work_generated);
-
-	witness_assert_depth_to_rank(
-	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
-
+static void
+hpa_assert_results(
+    tsdn_t *tsdn, hpa_shard_t *shard, edata_list_active_t *results) {
 	/*
 	 * Guard the sanity checks with config_debug because the loop cannot be
 	 * proven non-circular by the compiler, even if everything within the
@@ -876,7 +852,6 @@ hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
 			assert(edata_base_get(edata) != NULL);
 		}
 	}
-	return nsuccess;
 }
 
 static edata_t *
@@ -891,16 +866,52 @@ hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
 	if (alignment > PAGE || zero) {
 		return NULL;
 	}
+	hpa_shard_t *shard = hpa_from_pai(self);
+
 	/*
-	 * An alloc with alignment == PAGE and zero == false is equivalent to a
-	 * batch alloc of 1.  Just do that, so we can share code.
+	 * frequent_use here indicates this request comes from the arena bins,
+	 * in which case it will be split into slabs, and therefore there is no
+	 * intrinsic slack in the allocation (the entire range of allocated size
+	 * will be accessed).
+	 *
+	 * In this case bypass the slab_max_alloc limit (if still within the
+	 * huge page size).  These requests do not concern internal
+	 * fragmentation with huge pages (again, the full size will be used).
 	 */
+	if (!(frequent_reuse && size <= HUGEPAGE)
+	    && (size > shard->opts.slab_max_alloc)) {
+		return NULL;
+	}
+	edata_t *edata = sec_alloc(tsdn, &shard->sec, size);
+	if (edata != NULL) {
+		return edata;
+	}
+	size_t              nallocs = sec_size_supported(&shard->sec, size)
+	                 ? shard->sec.opts.batch_fill_extra + 1
+	                 : 1;
 	edata_list_active_t results;
 	edata_list_active_init(&results);
-	size_t nallocs = hpa_alloc_batch(tsdn, self, size, /* nallocs */ 1,
-	    &results, frequent_reuse, deferred_work_generated);
-	assert(nallocs == 0 || nallocs == 1);
-	edata_t *edata = edata_list_active_first(&results);
+	size_t nsuccess = hpa_alloc_batch_psset(
+	    tsdn, shard, size, nallocs, &results, deferred_work_generated);
+	hpa_assert_results(tsdn, shard, &results);
+	edata = edata_list_active_first(&results);
+
+	if (edata != NULL) {
+		edata_list_active_remove(&results, edata);
+		assert(nsuccess > 0);
+		nsuccess--;
+	}
+	if (nsuccess > 0) {
+		assert(sec_size_supported(&shard->sec, size));
+		sec_fill(tsdn, &shard->sec, size, &results, nsuccess);
+		/* Unlikely rollback in case of overfill */
+		if (!edata_list_active_empty(&results)) {
+			hpa_dalloc_batch(
+			    tsdn, self, &results, deferred_work_generated);
+		}
+	}
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 	return edata;
 }
 
@@ -996,10 +1007,19 @@ static void
 hpa_dalloc(
     tsdn_t *tsdn, pai_t *self, edata_t *edata, bool *deferred_work_generated) {
 	assert(!edata_guarded_get(edata));
-	/* Just a dalloc_batch of size 1; this lets us share logic. */
+
 	edata_list_active_t dalloc_list;
 	edata_list_active_init(&dalloc_list);
 	edata_list_active_append(&dalloc_list, edata);
+
+	hpa_shard_t *shard = hpa_from_pai(self);
+	sec_dalloc(tsdn, &shard->sec, &dalloc_list);
+	if (edata_list_active_empty(&dalloc_list)) {
+		/* sec consumed the pointer */
+		*deferred_work_generated = false;
+		return;
+	}
+	/* We may have more than one pointer to flush now */
 	hpa_dalloc_batch(tsdn, self, &dalloc_list, deferred_work_generated);
 }
 
@@ -1063,15 +1083,32 @@ hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) {
 	return time_ns;
 }
 
+static void
+hpa_sec_flush_impl(tsdn_t *tsdn, hpa_shard_t *shard) {
+	edata_list_active_t to_flush;
+	edata_list_active_init(&to_flush);
+
+	sec_flush(tsdn, &shard->sec, &to_flush);
+	bool deferred_work_generated;
+	hpa_dalloc_batch(
+	    tsdn, (pai_t *)shard, &to_flush, &deferred_work_generated);
+}
+
 void
 hpa_shard_disable(tsdn_t *tsdn, hpa_shard_t *shard) {
 	hpa_do_consistency_checks(shard);
+	hpa_sec_flush_impl(tsdn, shard);
 
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	edata_cache_fast_disable(tsdn, &shard->ecf);
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 }
 
+void
+hpa_shard_flush(tsdn_t *tsdn, hpa_shard_t *shard) {
+	hpa_sec_flush_impl(tsdn, shard);
+}
+
 static void
 hpa_shard_assert_stats_empty(psset_bin_stats_t *bin_stats) {
 	assert(bin_stats->npageslabs == 0);
@@ -1093,6 +1130,7 @@ hpa_assert_empty(tsdn_t *tsdn, hpa_shard_t *shard, psset_t *psset) {
 void
 hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard) {
 	hpa_do_consistency_checks(shard);
+	hpa_shard_flush(tsdn, shard);
 	/*
 	 * By the time we're here, the arena code should have dalloc'd all the
 	 * active extents, which means we should have eventually evicted
@@ -1137,6 +1175,12 @@ hpa_shard_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard) {
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 }
 
+void
+hpa_shard_prefork2(tsdn_t *tsdn, hpa_shard_t *shard) {
+	hpa_do_consistency_checks(shard);
+	sec_prefork2(tsdn, &shard->sec);
+}
+
 void
 hpa_shard_prefork3(tsdn_t *tsdn, hpa_shard_t *shard) {
 	hpa_do_consistency_checks(shard);
@@ -1155,6 +1199,7 @@ void
 hpa_shard_postfork_parent(tsdn_t *tsdn, hpa_shard_t *shard) {
 	hpa_do_consistency_checks(shard);
 
+	sec_postfork_parent(tsdn, &shard->sec);
 	malloc_mutex_postfork_parent(tsdn, &shard->grow_mtx);
 	malloc_mutex_postfork_parent(tsdn, &shard->mtx);
 }
@@ -1163,6 +1208,7 @@ void
 hpa_shard_postfork_child(tsdn_t *tsdn, hpa_shard_t *shard) {
 	hpa_do_consistency_checks(shard);
 
+	sec_postfork_child(tsdn, &shard->sec);
 	malloc_mutex_postfork_child(tsdn, &shard->grow_mtx);
 	malloc_mutex_postfork_child(tsdn, &shard->mtx);
 }
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 6844da5a..5d23962d 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1013,6 +1013,15 @@ malloc_conf_error(
 		/* However, tolerate experimental features. */
 		return;
 	}
+	const char  *deprecated[] = {"hpa_sec_bytes_after_flush"};
+	const size_t deprecated_cnt = (sizeof(deprecated)
+	    / sizeof(deprecated[0]));
+	for (size_t i = 0; i < deprecated_cnt; ++i) {
+		if (strncmp(k, deprecated[i], strlen(deprecated[i])) == 0) {
+			/* Tolerate deprecated features. */
+			return;
+		}
+	}
 	had_conf_error = true;
 }
 
@@ -1685,7 +1694,6 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				}
 				CONF_CONTINUE;
 			}
-
 			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.nshards,
 			    "hpa_sec_nshards", 0, 0, CONF_CHECK_MIN,
 			    CONF_DONT_CHECK_MAX, true);
@@ -1694,13 +1702,10 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    USIZE_GROW_SLOW_THRESHOLD, CONF_CHECK_MIN,
 			    CONF_CHECK_MAX, true);
 			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.max_bytes,
-			    "hpa_sec_max_bytes", PAGE, 0, CONF_CHECK_MIN,
-			    CONF_DONT_CHECK_MAX, true);
-			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.bytes_after_flush,
-			    "hpa_sec_bytes_after_flush", PAGE, 0,
+			    "hpa_sec_max_bytes", SEC_OPTS_MAX_BYTES_DEFAULT, 0,
 			    CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
 			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.batch_fill_extra,
-			    "hpa_sec_batch_fill_extra", 0, HUGEPAGE_PAGES,
+			    "hpa_sec_batch_fill_extra", 1, HUGEPAGE_PAGES,
 			    CONF_CHECK_MIN, CONF_CHECK_MAX, true);
 
 			if (CONF_MATCH("slab_sizes")) {
diff --git a/src/pa.c b/src/pa.c
index becf69b1..a03b0c1c 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -67,12 +67,9 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, pa_central_t *central,
 bool
 pa_shard_enable_hpa(tsdn_t *tsdn, pa_shard_t *shard,
     const hpa_shard_opts_t *hpa_opts, const sec_opts_t *hpa_sec_opts) {
-	if (hpa_shard_init(&shard->hpa_shard, &shard->central->hpa, shard->emap,
-	        shard->base, &shard->edata_cache, shard->ind, hpa_opts)) {
-		return true;
-	}
-	if (sec_init(tsdn, &shard->hpa_sec, shard->base, &shard->hpa_shard.pai,
-	        hpa_sec_opts)) {
+	if (hpa_shard_init(tsdn, &shard->hpa_shard, &shard->central->hpa,
+	        shard->emap, shard->base, &shard->edata_cache, shard->ind,
+	        hpa_opts, hpa_sec_opts)) {
 		return true;
 	}
 	shard->ever_used_hpa = true;
@@ -85,7 +82,6 @@ void
 pa_shard_disable_hpa(tsdn_t *tsdn, pa_shard_t *shard) {
 	atomic_store_b(&shard->use_hpa, false, ATOMIC_RELAXED);
 	if (shard->ever_used_hpa) {
-		sec_disable(tsdn, &shard->hpa_sec);
 		hpa_shard_disable(tsdn, &shard->hpa_shard);
 	}
 }
@@ -93,8 +89,13 @@ pa_shard_disable_hpa(tsdn_t *tsdn, pa_shard_t *shard) {
 void
 pa_shard_reset(tsdn_t *tsdn, pa_shard_t *shard) {
 	atomic_store_zu(&shard->nactive, 0, ATOMIC_RELAXED);
+	pa_shard_flush(tsdn, shard);
+}
+
+void
+pa_shard_flush(tsdn_t *tsdn, pa_shard_t *shard) {
 	if (shard->ever_used_hpa) {
-		sec_flush(tsdn, &shard->hpa_sec);
+		hpa_shard_flush(tsdn, &shard->hpa_shard);
 	}
 }
 
@@ -107,7 +108,6 @@ void
 pa_shard_destroy(tsdn_t *tsdn, pa_shard_t *shard) {
 	pac_destroy(tsdn, &shard->pac);
 	if (shard->ever_used_hpa) {
-		sec_flush(tsdn, &shard->hpa_sec);
 		hpa_shard_destroy(tsdn, &shard->hpa_shard);
 	}
 }
@@ -115,7 +115,7 @@ pa_shard_destroy(tsdn_t *tsdn, pa_shard_t *shard) {
 static pai_t *
 pa_get_pai(pa_shard_t *shard, edata_t *edata) {
 	return (edata_pai_get(edata) == EXTENT_PAI_PAC ? &shard->pac.pai
-	                                               : &shard->hpa_sec.pai);
+	                                               : &shard->hpa_shard.pai);
 }
 
 edata_t *
@@ -128,7 +128,7 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 
 	edata_t *edata = NULL;
 	if (!guarded && pa_shard_uses_hpa(shard)) {
-		edata = pai_alloc(tsdn, &shard->hpa_sec.pai, size, alignment,
+		edata = pai_alloc(tsdn, &shard->hpa_shard.pai, size, alignment,
 		    zero, /* guarded */ false, slab, deferred_work_generated);
 	}
 	/*
diff --git a/src/pa_extra.c b/src/pa_extra.c
index 7c2498b7..ff45674f 100644
--- a/src/pa_extra.c
+++ b/src/pa_extra.c
@@ -17,7 +17,7 @@ pa_shard_prefork0(tsdn_t *tsdn, pa_shard_t *shard) {
 void
 pa_shard_prefork2(tsdn_t *tsdn, pa_shard_t *shard) {
 	if (shard->ever_used_hpa) {
-		sec_prefork2(tsdn, &shard->hpa_sec);
+		hpa_shard_prefork2(tsdn, &shard->hpa_shard);
 	}
 }
 
@@ -54,7 +54,6 @@ pa_shard_postfork_parent(tsdn_t *tsdn, pa_shard_t *shard) {
 	malloc_mutex_postfork_parent(tsdn, &shard->pac.decay_dirty.mtx);
 	malloc_mutex_postfork_parent(tsdn, &shard->pac.decay_muzzy.mtx);
 	if (shard->ever_used_hpa) {
-		sec_postfork_parent(tsdn, &shard->hpa_sec);
 		hpa_shard_postfork_parent(tsdn, &shard->hpa_shard);
 	}
 }
@@ -69,7 +68,6 @@ pa_shard_postfork_child(tsdn_t *tsdn, pa_shard_t *shard) {
 	malloc_mutex_postfork_child(tsdn, &shard->pac.decay_dirty.mtx);
 	malloc_mutex_postfork_child(tsdn, &shard->pac.decay_muzzy.mtx);
 	if (shard->ever_used_hpa) {
-		sec_postfork_child(tsdn, &shard->hpa_sec);
 		hpa_shard_postfork_child(tsdn, &shard->hpa_shard);
 	}
 }
@@ -104,8 +102,7 @@ pa_shard_basic_stats_merge(
 void
 pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
     pa_shard_stats_t *pa_shard_stats_out, pac_estats_t *estats_out,
-    hpa_shard_stats_t *hpa_stats_out, sec_stats_t *sec_stats_out,
-    size_t *resident) {
+    hpa_shard_stats_t *hpa_stats_out, size_t *resident) {
 	cassert(config_stats);
 
 	pa_shard_stats_out->pac_stats.retained +=
@@ -170,7 +167,6 @@ pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
 
 	if (shard->ever_used_hpa) {
 		hpa_shard_stats_merge(tsdn, &shard->hpa_shard, hpa_stats_out);
-		sec_stats_merge(tsdn, &shard->hpa_sec, sec_stats_out);
 	}
 }
 
@@ -204,7 +200,7 @@ pa_shard_mtx_stats_read(tsdn_t *tsdn, pa_shard_t *shard,
 		pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
 		    &shard->hpa_shard.grow_mtx,
 		    arena_prof_mutex_hpa_shard_grow);
-		sec_mutex_stats_read(tsdn, &shard->hpa_sec,
+		sec_mutex_stats_read(tsdn, &shard->hpa_shard.sec,
 		    &mutex_prof_data[arena_prof_mutex_hpa_sec]);
 	}
 }
diff --git a/src/pac.c b/src/pac.c
index 361816e9..86001139 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -97,11 +97,9 @@ pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
 	atomic_store_zu(&pac->extent_sn_next, 0, ATOMIC_RELAXED);
 
 	pac->pai.alloc = &pac_alloc_impl;
-	pac->pai.alloc_batch = &pai_alloc_batch_default;
 	pac->pai.expand = &pac_expand_impl;
 	pac->pai.shrink = &pac_shrink_impl;
 	pac->pai.dalloc = &pac_dalloc_impl;
-	pac->pai.dalloc_batch = &pai_dalloc_batch_default;
 	pac->pai.time_until_deferred_work = &pac_time_until_deferred_work;
 
 	return false;
@@ -449,8 +447,8 @@ decay_with_process_madvise(edata_list_inactive_t *decay_extents) {
 
 	size_t cur = 0, total_bytes = 0;
 	for (edata_t *edata = edata_list_inactive_first(decay_extents);
-	     edata != NULL;
-	     edata = edata_list_inactive_next(decay_extents, edata)) {
+	    edata != NULL;
+	    edata = edata_list_inactive_next(decay_extents, edata)) {
 		size_t pages_bytes = edata_size_get(edata);
 		vec[cur].iov_base = edata_base_get(edata);
 		vec[cur].iov_len = pages_bytes;
@@ -511,7 +509,7 @@ pac_decay_stashed(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
 	}
 
 	for (edata_t *edata = edata_list_inactive_first(decay_extents);
-	     edata != NULL; edata = edata_list_inactive_first(decay_extents)) {
+	    edata != NULL; edata = edata_list_inactive_first(decay_extents)) {
 		edata_list_inactive_remove(decay_extents, edata);
 
 		size_t size = edata_size_get(edata);
diff --git a/src/pai.c b/src/pai.c
deleted file mode 100644
index 3114e658..00000000
--- a/src/pai.c
+++ /dev/null
@@ -1,32 +0,0 @@
-#include "jemalloc/internal/jemalloc_preamble.h"
-#include "jemalloc/internal/jemalloc_internal_includes.h"
-
-size_t
-pai_alloc_batch_default(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
-    edata_list_active_t *results, bool frequent_reuse,
-    bool *deferred_work_generated) {
-	for (size_t i = 0; i < nallocs; i++) {
-		bool     deferred_by_alloc = false;
-		edata_t *edata = pai_alloc(tsdn, self, size, PAGE,
-		    /* zero */ false, /* guarded */ false, frequent_reuse,
-		    &deferred_by_alloc);
-		*deferred_work_generated |= deferred_by_alloc;
-		if (edata == NULL) {
-			return i;
-		}
-		edata_list_active_append(results, edata);
-	}
-	return nallocs;
-}
-
-void
-pai_dalloc_batch_default(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list,
-    bool *deferred_work_generated) {
-	edata_t *edata;
-	while ((edata = edata_list_active_first(list)) != NULL) {
-		bool deferred_by_dalloc = false;
-		edata_list_active_remove(list, edata);
-		pai_dalloc(tsdn, self, edata, &deferred_by_dalloc);
-		*deferred_work_generated |= deferred_by_dalloc;
-	}
-}
diff --git a/src/sec.c b/src/sec.c
index c827dd5c..5f65362f 100644
--- a/src/sec.c
+++ b/src/sec.c
@@ -4,95 +4,56 @@
 #include "jemalloc/internal/sec.h"
 #include "jemalloc/internal/jemalloc_probe.h"
 
-static edata_t *sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t alignment, bool zero, bool guarded, bool frequent_reuse,
-    bool *deferred_work_generated);
-static bool     sec_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-        size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated);
-static bool     sec_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-        size_t old_size, size_t new_size, bool *deferred_work_generated);
-static void     sec_dalloc(
-        tsdn_t *tsdn, pai_t *self, edata_t *edata, bool *deferred_work_generated);
-
-static void
+static bool
 sec_bin_init(sec_bin_t *bin) {
-	bin->being_batch_filled = false;
 	bin->bytes_cur = 0;
+	sec_bin_stats_init(&bin->stats);
 	edata_list_active_init(&bin->freelist);
+	bool err = malloc_mutex_init(&bin->mtx, "sec_bin", WITNESS_RANK_SEC_BIN,
+	    malloc_mutex_rank_exclusive);
+	if (err) {
+		return true;
+	}
+
+	return false;
 }
 
 bool
-sec_init(tsdn_t *tsdn, sec_t *sec, base_t *base, pai_t *fallback,
-    const sec_opts_t *opts) {
+sec_init(tsdn_t *tsdn, sec_t *sec, base_t *base, const sec_opts_t *opts) {
+	sec->opts = *opts;
+	if (opts->nshards == 0) {
+		return false;
+	}
 	assert(opts->max_alloc >= PAGE);
+
 	/*
 	 * Same as tcache, sec do not cache allocs/dallocs larger than
 	 * USIZE_GROW_SLOW_THRESHOLD because the usize above this increases
 	 * by PAGE and the number of usizes is too large.
 	 */
-	assert(!sz_large_size_classes_disabled()
-	    || opts->max_alloc <= USIZE_GROW_SLOW_THRESHOLD);
+	assert(opts->max_alloc <= USIZE_GROW_SLOW_THRESHOLD);
 
 	size_t   max_alloc = PAGE_FLOOR(opts->max_alloc);
 	pszind_t npsizes = sz_psz2ind(max_alloc) + 1;
 
-	size_t sz_shards = opts->nshards * sizeof(sec_shard_t);
-	size_t sz_bins = opts->nshards * (size_t)npsizes * sizeof(sec_bin_t);
-	size_t sz_alloc = sz_shards + sz_bins;
-	void  *dynalloc = base_alloc(tsdn, base, sz_alloc, CACHELINE);
+	size_t ntotal_bins = opts->nshards * (size_t)npsizes;
+	size_t sz_bins = sizeof(sec_bin_t) * ntotal_bins;
+	void  *dynalloc = base_alloc(tsdn, base, sz_bins, CACHELINE);
 	if (dynalloc == NULL) {
 		return true;
 	}
-	sec_shard_t *shard_cur = (sec_shard_t *)dynalloc;
-	sec->shards = shard_cur;
-	sec_bin_t *bin_cur = (sec_bin_t *)&shard_cur[opts->nshards];
-	/* Just for asserts, below. */
-	sec_bin_t *bin_start = bin_cur;
-
-	for (size_t i = 0; i < opts->nshards; i++) {
-		sec_shard_t *shard = shard_cur;
-		shard_cur++;
-		bool err = malloc_mutex_init(&shard->mtx, "sec_shard",
-		    WITNESS_RANK_SEC_SHARD, malloc_mutex_rank_exclusive);
-		if (err) {
+	sec->bins = (sec_bin_t *)dynalloc;
+	for (pszind_t j = 0; j < ntotal_bins; j++) {
+		if (sec_bin_init(&sec->bins[j])) {
 			return true;
 		}
-		shard->enabled = true;
-		shard->bins = bin_cur;
-		for (pszind_t j = 0; j < npsizes; j++) {
-			sec_bin_init(&shard->bins[j]);
-			bin_cur++;
-		}
-		shard->bytes_cur = 0;
-		shard->to_flush_next = 0;
 	}
-	/*
-	 * Should have exactly matched the bin_start to the first unused byte
-	 * after the shards.
-	 */
-	assert((void *)shard_cur == (void *)bin_start);
-	/* And the last bin to use up the last bytes of the allocation. */
-	assert((char *)bin_cur == ((char *)dynalloc + sz_alloc));
-	sec->fallback = fallback;
-
-	sec->opts = *opts;
 	sec->npsizes = npsizes;
 
-	/*
-	 * Initialize these last so that an improper use of an SEC whose
-	 * initialization failed will segfault in an easy-to-spot way.
-	 */
-	sec->pai.alloc = &sec_alloc;
-	sec->pai.alloc_batch = &pai_alloc_batch_default;
-	sec->pai.expand = &sec_expand;
-	sec->pai.shrink = &sec_shrink;
-	sec->pai.dalloc = &sec_dalloc;
-	sec->pai.dalloc_batch = &pai_dalloc_batch_default;
-
 	return false;
 }
 
-static sec_shard_t *
+static uint8_t
 sec_shard_pick(tsdn_t *tsdn, sec_t *sec) {
 	/*
 	 * Eventually, we should implement affinity, tracking source shard using
@@ -100,7 +61,7 @@ sec_shard_pick(tsdn_t *tsdn, sec_t *sec) {
 	 * distribute across all shards.
 	 */
 	if (tsdn_null(tsdn)) {
-		return &sec->shards[0];
+		return 0;
 	}
 	tsd_t   *tsd = tsdn_tsd(tsdn);
 	uint8_t *idxp = tsd_sec_shardp_get(tsd);
@@ -118,284 +79,252 @@ sec_shard_pick(tsdn_t *tsdn, sec_t *sec) {
 		assert(idx < (uint32_t)sec->opts.nshards);
 		*idxp = (uint8_t)idx;
 	}
-	return &sec->shards[*idxp];
+	return *idxp;
 }
 
-/*
- * Perhaps surprisingly, this can be called on the alloc pathways; if we hit an
- * empty cache, we'll try to fill it, which can push the shard over it's limit.
- */
-static void
-sec_flush_some_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
-	malloc_mutex_assert_owner(tsdn, &shard->mtx);
-	edata_list_active_t to_flush;
-	edata_list_active_init(&to_flush);
-	while (shard->bytes_cur > sec->opts.bytes_after_flush) {
-		/* Pick a victim. */
-		sec_bin_t *bin = &shard->bins[shard->to_flush_next];
-
-		/* Update our victim-picking state. */
-		shard->to_flush_next++;
-		if (shard->to_flush_next == sec->npsizes) {
-			shard->to_flush_next = 0;
-		}
-
-		assert(shard->bytes_cur >= bin->bytes_cur);
-		if (bin->bytes_cur != 0) {
-			shard->bytes_cur -= bin->bytes_cur;
-			bin->bytes_cur = 0;
-			edata_list_active_concat(&to_flush, &bin->freelist);
-		}
-		/*
-		 * Either bin->bytes_cur was 0, in which case we didn't touch
-		 * the bin list but it should be empty anyways (or else we
-		 * missed a bytes_cur update on a list modification), or it
-		 * *was* 0 and we emptied it ourselves.  Either way, it should
-		 * be empty now.
-		 */
-		assert(edata_list_active_empty(&bin->freelist));
-	}
-
-	malloc_mutex_unlock(tsdn, &shard->mtx);
-	bool deferred_work_generated = false;
-	pai_dalloc_batch(
-	    tsdn, sec->fallback, &to_flush, &deferred_work_generated);
+static sec_bin_t *
+sec_bin_pick(sec_t *sec, uint8_t shard, pszind_t pszind) {
+	assert(shard < sec->opts.nshards);
+	size_t ind = (size_t)shard * sec->npsizes + pszind;
+	assert(ind < sec->npsizes * sec->opts.nshards);
+	return &sec->bins[ind];
 }
 
 static edata_t *
-sec_shard_alloc_locked(
-    tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard, sec_bin_t *bin) {
-	malloc_mutex_assert_owner(tsdn, &shard->mtx);
-	if (!shard->enabled) {
-		return NULL;
-	}
+sec_bin_alloc_locked(tsdn_t *tsdn, sec_t *sec, sec_bin_t *bin, size_t size) {
+	malloc_mutex_assert_owner(tsdn, &bin->mtx);
+
 	edata_t *edata = edata_list_active_first(&bin->freelist);
 	if (edata != NULL) {
+		assert(!edata_list_active_empty(&bin->freelist));
 		edata_list_active_remove(&bin->freelist, edata);
-		assert(edata_size_get(edata) <= bin->bytes_cur);
-		bin->bytes_cur -= edata_size_get(edata);
-		assert(edata_size_get(edata) <= shard->bytes_cur);
-		shard->bytes_cur -= edata_size_get(edata);
+		size_t sz = edata_size_get(edata);
+		assert(sz <= bin->bytes_cur && sz > 0);
+		bin->bytes_cur -= sz;
+		bin->stats.nhits++;
 	}
 	return edata;
 }
 
 static edata_t *
-sec_batch_fill_and_alloc(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
-    sec_bin_t *bin, size_t size, bool frequent_reuse) {
-	malloc_mutex_assert_not_owner(tsdn, &shard->mtx);
+sec_multishard_trylock_alloc(
+    tsdn_t *tsdn, sec_t *sec, size_t size, pszind_t pszind) {
+	assert(sec->opts.nshards > 0);
 
-	edata_list_active_t result;
-	edata_list_active_init(&result);
-	bool   deferred_work_generated = false;
-	size_t nalloc = pai_alloc_batch(tsdn, sec->fallback, size,
-	    1 + sec->opts.batch_fill_extra, &result, frequent_reuse,
-	    &deferred_work_generated);
-
-	edata_t *ret = edata_list_active_first(&result);
-	if (ret != NULL) {
-		edata_list_active_remove(&result, ret);
+	uint8_t    cur_shard = sec_shard_pick(tsdn, sec);
+	sec_bin_t *bin;
+	for (size_t i = 0; i < sec->opts.nshards; ++i) {
+		bin = sec_bin_pick(sec, cur_shard, pszind);
+		if (!malloc_mutex_trylock(tsdn, &bin->mtx)) {
+			edata_t *edata = sec_bin_alloc_locked(
+			    tsdn, sec, bin, size);
+			malloc_mutex_unlock(tsdn, &bin->mtx);
+			if (edata != NULL) {
+				JE_USDT(sec_alloc, 5, sec, bin, edata, size,
+				    /* frequent_reuse */ 1);
+				return edata;
+			}
+		}
+		cur_shard++;
+		if (cur_shard == sec->opts.nshards) {
+			cur_shard = 0;
+		}
 	}
-
-	malloc_mutex_lock(tsdn, &shard->mtx);
-	bin->being_batch_filled = false;
-	/*
-	 * Handle the easy case first: nothing to cache.  Note that this can
-	 * only happen in case of OOM, since sec_alloc checks the expected
-	 * number of allocs, and doesn't bother going down the batch_fill
-	 * pathway if there won't be anything left to cache.  So to be in this
-	 * code path, we must have asked for > 1 alloc, but only gotten 1 back.
-	 */
-	if (nalloc <= 1) {
-		malloc_mutex_unlock(tsdn, &shard->mtx);
-		return ret;
+	/* No bin had alloc or had the extent */
+	assert(cur_shard == sec_shard_pick(tsdn, sec));
+	bin = sec_bin_pick(sec, cur_shard, pszind);
+	malloc_mutex_lock(tsdn, &bin->mtx);
+	edata_t *edata = sec_bin_alloc_locked(tsdn, sec, bin, size);
+	if (edata == NULL) {
+		/* Only now we know it is a miss */
+		bin->stats.nmisses++;
 	}
-
-	size_t new_cached_bytes = (nalloc - 1) * size;
-
-	edata_list_active_concat(&bin->freelist, &result);
-	bin->bytes_cur += new_cached_bytes;
-	shard->bytes_cur += new_cached_bytes;
-
-	if (shard->bytes_cur > sec->opts.max_bytes) {
-		sec_flush_some_and_unlock(tsdn, sec, shard);
-	} else {
-		malloc_mutex_unlock(tsdn, &shard->mtx);
-	}
-
-	return ret;
+	malloc_mutex_unlock(tsdn, &bin->mtx);
+	JE_USDT(sec_alloc, 5, sec, bin, edata, size, /* frequent_reuse */ 1);
+	return edata;
 }
 
-static edata_t *
-sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
-    bool guarded, bool frequent_reuse, bool *deferred_work_generated) {
+edata_t *
+sec_alloc(tsdn_t *tsdn, sec_t *sec, size_t size) {
+	if (!sec_size_supported(sec, size)) {
+		return NULL;
+	}
 	assert((size & PAGE_MASK) == 0);
-	assert(!guarded);
-
-	sec_t *sec = (sec_t *)self;
-
-	if (zero || alignment > PAGE || sec->opts.nshards == 0
-	    || size > sec->opts.max_alloc) {
-		return pai_alloc(tsdn, sec->fallback, size, alignment, zero,
-		    /* guarded */ false, frequent_reuse,
-		    deferred_work_generated);
-	}
 	pszind_t pszind = sz_psz2ind(size);
 	assert(pszind < sec->npsizes);
 
-	sec_shard_t *shard = sec_shard_pick(tsdn, sec);
-	sec_bin_t   *bin = &shard->bins[pszind];
-	bool         do_batch_fill = false;
-
-	malloc_mutex_lock(tsdn, &shard->mtx);
-	edata_t *edata = sec_shard_alloc_locked(tsdn, sec, shard, bin);
-	if (edata == NULL) {
-		if (!bin->being_batch_filled
-		    && sec->opts.batch_fill_extra > 0) {
-			bin->being_batch_filled = true;
-			do_batch_fill = true;
+	/*
+	 * If there's only one shard, skip the trylock optimization and
+	 * go straight to the blocking lock.
+	 */
+	if (sec->opts.nshards == 1) {
+		sec_bin_t *bin = sec_bin_pick(sec, /* shard */ 0, pszind);
+		malloc_mutex_lock(tsdn, &bin->mtx);
+		edata_t *edata = sec_bin_alloc_locked(tsdn, sec, bin, size);
+		if (edata == NULL) {
+			bin->stats.nmisses++;
 		}
+		malloc_mutex_unlock(tsdn, &bin->mtx);
+		JE_USDT(sec_alloc, 5, sec, bin, edata, size,
+		    /* frequent_reuse */ 1);
+		return edata;
 	}
-	malloc_mutex_unlock(tsdn, &shard->mtx);
-	if (edata == NULL) {
-		if (do_batch_fill) {
-			edata = sec_batch_fill_and_alloc(
-			    tsdn, sec, shard, bin, size, frequent_reuse);
-		} else {
-			edata = pai_alloc(tsdn, sec->fallback, size, alignment,
-			    zero, /* guarded */ false, frequent_reuse,
-			    deferred_work_generated);
-		}
-	}
-	JE_USDT(sec_alloc, 5, sec, shard, edata, size, frequent_reuse);
-	return edata;
-}
-
-static bool
-sec_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
-    size_t new_size, bool zero, bool *deferred_work_generated) {
-	sec_t *sec = (sec_t *)self;
-	JE_USDT(sec_expand, 4, sec, edata, old_size, new_size);
-	return pai_expand(tsdn, sec->fallback, edata, old_size, new_size, zero,
-	    deferred_work_generated);
-}
-
-static bool
-sec_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
-    size_t new_size, bool *deferred_work_generated) {
-	sec_t *sec = (sec_t *)self;
-	JE_USDT(sec_shrink, 4, sec, edata, old_size, new_size);
-	return pai_shrink(tsdn, sec->fallback, edata, old_size, new_size,
-	    deferred_work_generated);
+	return sec_multishard_trylock_alloc(tsdn, sec, size, pszind);
 }
 
 static void
-sec_flush_all_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
-	malloc_mutex_assert_owner(tsdn, &shard->mtx);
-	shard->bytes_cur = 0;
-	edata_list_active_t to_flush;
-	edata_list_active_init(&to_flush);
-	for (pszind_t i = 0; i < sec->npsizes; i++) {
-		sec_bin_t *bin = &shard->bins[i];
-		bin->bytes_cur = 0;
-		edata_list_active_concat(&to_flush, &bin->freelist);
-	}
+sec_bin_dalloc_locked(tsdn_t *tsdn, sec_t *sec, sec_bin_t *bin, size_t size,
+    edata_list_active_t *dalloc_list) {
+	malloc_mutex_assert_owner(tsdn, &bin->mtx);
 
-	/*
-	 * Ordinarily we would try to avoid doing the batch deallocation while
-	 * holding the shard mutex, but the flush_all pathways only happen when
-	 * we're disabling the HPA or resetting the arena, both of which are
-	 * rare pathways.
-	 */
-	bool deferred_work_generated = false;
-	pai_dalloc_batch(
-	    tsdn, sec->fallback, &to_flush, &deferred_work_generated);
-}
-
-static void
-sec_shard_dalloc_and_unlock(
-    tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard, edata_t *edata) {
-	malloc_mutex_assert_owner(tsdn, &shard->mtx);
-	assert(shard->bytes_cur <= sec->opts.max_bytes);
-	size_t   size = edata_size_get(edata);
-	pszind_t pszind = sz_psz2ind(size);
-	assert(pszind < sec->npsizes);
-	/*
-	 * Prepending here results in LIFO allocation per bin, which seems
-	 * reasonable.
-	 */
-	sec_bin_t *bin = &shard->bins[pszind];
-	edata_list_active_prepend(&bin->freelist, edata);
 	bin->bytes_cur += size;
-	shard->bytes_cur += size;
-	if (shard->bytes_cur > sec->opts.max_bytes) {
-		/*
-		 * We've exceeded the shard limit.  We make two nods in the
-		 * direction of fragmentation avoidance: we flush everything in
-		 * the shard, rather than one particular bin, and we hold the
-		 * lock while flushing (in case one of the extents we flush is
-		 * highly preferred from a fragmentation-avoidance perspective
-		 * in the backing allocator).  This has the extra advantage of
-		 * not requiring advanced cache balancing strategies.
-		 */
-		sec_flush_some_and_unlock(tsdn, sec, shard);
-		malloc_mutex_assert_not_owner(tsdn, &shard->mtx);
-	} else {
-		malloc_mutex_unlock(tsdn, &shard->mtx);
-	}
-}
+	edata_t *edata = edata_list_active_first(dalloc_list);
+	assert(edata != NULL);
+	edata_list_active_remove(dalloc_list, edata);
+	JE_USDT(sec_dalloc, 3, sec, bin, edata);
+	edata_list_active_prepend(&bin->freelist, edata);
+	/* Single extent can be returned to SEC */
+	assert(edata_list_active_empty(dalloc_list));
 
-static void
-sec_dalloc(
-    tsdn_t *tsdn, pai_t *self, edata_t *edata, bool *deferred_work_generated) {
-	sec_t *sec = (sec_t *)self;
-	if (sec->opts.nshards == 0
-	    || edata_size_get(edata) > sec->opts.max_alloc) {
-		pai_dalloc(tsdn, sec->fallback, edata, deferred_work_generated);
+	if (bin->bytes_cur <= sec->opts.max_bytes) {
+		bin->stats.ndalloc_noflush++;
 		return;
 	}
-	sec_shard_t *shard = sec_shard_pick(tsdn, sec);
-	JE_USDT(sec_dalloc, 3, sec, shard, edata);
-	malloc_mutex_lock(tsdn, &shard->mtx);
-	if (shard->enabled) {
-		sec_shard_dalloc_and_unlock(tsdn, sec, shard, edata);
+	bin->stats.ndalloc_flush++;
+	/* we want to flush 1/4 of max_bytes */
+	size_t bytes_target = sec->opts.max_bytes - (sec->opts.max_bytes >> 2);
+	while (bin->bytes_cur > bytes_target
+	    && !edata_list_active_empty(&bin->freelist)) {
+		edata_t *cur = edata_list_active_last(&bin->freelist);
+		size_t   sz = edata_size_get(cur);
+		assert(sz <= bin->bytes_cur && sz > 0);
+		bin->bytes_cur -= sz;
+		edata_list_active_remove(&bin->freelist, cur);
+		edata_list_active_append(dalloc_list, cur);
+	}
+}
+
+static void
+sec_multishard_trylock_dalloc(tsdn_t *tsdn, sec_t *sec, size_t size,
+    pszind_t pszind, edata_list_active_t *dalloc_list) {
+	assert(sec->opts.nshards > 0);
+
+	/* Try to dalloc in this threads bin first */
+	uint8_t cur_shard = sec_shard_pick(tsdn, sec);
+	for (size_t i = 0; i < sec->opts.nshards; ++i) {
+		sec_bin_t *bin = sec_bin_pick(sec, cur_shard, pszind);
+		if (!malloc_mutex_trylock(tsdn, &bin->mtx)) {
+			sec_bin_dalloc_locked(
+			    tsdn, sec, bin, size, dalloc_list);
+			malloc_mutex_unlock(tsdn, &bin->mtx);
+			return;
+		}
+		cur_shard++;
+		if (cur_shard == sec->opts.nshards) {
+			cur_shard = 0;
+		}
+	}
+	/* No bin had alloc or had the extent */
+	assert(cur_shard == sec_shard_pick(tsdn, sec));
+	sec_bin_t *bin = sec_bin_pick(sec, cur_shard, pszind);
+	malloc_mutex_lock(tsdn, &bin->mtx);
+	sec_bin_dalloc_locked(tsdn, sec, bin, size, dalloc_list);
+	malloc_mutex_unlock(tsdn, &bin->mtx);
+}
+
+void
+sec_dalloc(tsdn_t *tsdn, sec_t *sec, edata_list_active_t *dalloc_list) {
+	if (!sec_is_used(sec)) {
+		return;
+	}
+	edata_t *edata = edata_list_active_first(dalloc_list);
+	size_t   size = edata_size_get(edata);
+	if (size > sec->opts.max_alloc) {
+		return;
+	}
+	pszind_t pszind = sz_psz2ind(size);
+	assert(pszind < sec->npsizes);
+
+	/*
+         * If there's only one shard, skip the trylock optimization and
+	 * go straight to the blocking lock.
+	 */
+	if (sec->opts.nshards == 1) {
+		sec_bin_t *bin = sec_bin_pick(sec, /* shard */ 0, pszind);
+		malloc_mutex_lock(tsdn, &bin->mtx);
+		sec_bin_dalloc_locked(tsdn, sec, bin, size, dalloc_list);
+		malloc_mutex_unlock(tsdn, &bin->mtx);
+		return;
+	}
+	sec_multishard_trylock_dalloc(tsdn, sec, size, pszind, dalloc_list);
+}
+
+void
+sec_fill(tsdn_t *tsdn, sec_t *sec, size_t size, edata_list_active_t *result,
+    size_t nallocs) {
+	assert((size & PAGE_MASK) == 0);
+	assert(sec->opts.nshards != 0 && size <= sec->opts.max_alloc);
+	assert(nallocs > 0);
+
+	pszind_t pszind = sz_psz2ind(size);
+	assert(pszind < sec->npsizes);
+
+	sec_bin_t *bin = sec_bin_pick(sec, sec_shard_pick(tsdn, sec), pszind);
+	malloc_mutex_assert_not_owner(tsdn, &bin->mtx);
+	malloc_mutex_lock(tsdn, &bin->mtx);
+	size_t new_cached_bytes = nallocs * size;
+	if (bin->bytes_cur + new_cached_bytes <= sec->opts.max_bytes) {
+		assert(!edata_list_active_empty(result));
+		edata_list_active_concat(&bin->freelist, result);
+		bin->bytes_cur += new_cached_bytes;
 	} else {
-		malloc_mutex_unlock(tsdn, &shard->mtx);
-		pai_dalloc(tsdn, sec->fallback, edata, deferred_work_generated);
+		/*
+		 * Unlikely case of many threads filling at the same time and
+		 * going above max.
+		 */
+		bin->stats.noverfills++;
+		while (bin->bytes_cur + size <= sec->opts.max_bytes) {
+			edata_t *edata = edata_list_active_first(result);
+			if (edata == NULL) {
+				break;
+			}
+			edata_list_active_remove(result, edata);
+			assert(size == edata_size_get(edata));
+			edata_list_active_append(&bin->freelist, edata);
+			bin->bytes_cur += size;
+		}
 	}
+	malloc_mutex_unlock(tsdn, &bin->mtx);
 }
 
 void
-sec_flush(tsdn_t *tsdn, sec_t *sec) {
-	for (size_t i = 0; i < sec->opts.nshards; i++) {
-		malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
-		sec_flush_all_locked(tsdn, sec, &sec->shards[i]);
-		malloc_mutex_unlock(tsdn, &sec->shards[i].mtx);
+sec_flush(tsdn_t *tsdn, sec_t *sec, edata_list_active_t *to_flush) {
+	if (!sec_is_used(sec)) {
+		return;
 	}
-}
-
-void
-sec_disable(tsdn_t *tsdn, sec_t *sec) {
-	for (size_t i = 0; i < sec->opts.nshards; i++) {
-		malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
-		sec->shards[i].enabled = false;
-		sec_flush_all_locked(tsdn, sec, &sec->shards[i]);
-		malloc_mutex_unlock(tsdn, &sec->shards[i].mtx);
+	size_t ntotal_bins = sec->opts.nshards * sec->npsizes;
+	for (pszind_t i = 0; i < ntotal_bins; i++) {
+		sec_bin_t *bin = &sec->bins[i];
+		malloc_mutex_lock(tsdn, &bin->mtx);
+		bin->bytes_cur = 0;
+		edata_list_active_concat(to_flush, &bin->freelist);
+		malloc_mutex_unlock(tsdn, &bin->mtx);
 	}
 }
 
 void
 sec_stats_merge(tsdn_t *tsdn, sec_t *sec, sec_stats_t *stats) {
+	if (!sec_is_used(sec)) {
+		return;
+	}
 	size_t sum = 0;
-	for (size_t i = 0; i < sec->opts.nshards; i++) {
-		/*
-		 * We could save these lock acquisitions by making bytes_cur
-		 * atomic, but stats collection is rare anyways and we expect
-		 * the number and type of stats to get more interesting.
-		 */
-		malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
-		sum += sec->shards[i].bytes_cur;
-		malloc_mutex_unlock(tsdn, &sec->shards[i].mtx);
+	size_t ntotal_bins = sec->opts.nshards * sec->npsizes;
+	for (pszind_t i = 0; i < ntotal_bins; i++) {
+		sec_bin_t *bin = &sec->bins[i];
+		malloc_mutex_lock(tsdn, &bin->mtx);
+		sum += bin->bytes_cur;
+		sec_bin_stats_accum(&stats->total, &bin->stats);
+		malloc_mutex_unlock(tsdn, &bin->mtx);
 	}
 	stats->bytes += sum;
 }
@@ -403,31 +332,50 @@ sec_stats_merge(tsdn_t *tsdn, sec_t *sec, sec_stats_t *stats) {
 void
 sec_mutex_stats_read(
     tsdn_t *tsdn, sec_t *sec, mutex_prof_data_t *mutex_prof_data) {
-	for (size_t i = 0; i < sec->opts.nshards; i++) {
-		malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
-		malloc_mutex_prof_accum(
-		    tsdn, mutex_prof_data, &sec->shards[i].mtx);
-		malloc_mutex_unlock(tsdn, &sec->shards[i].mtx);
+	if (!sec_is_used(sec)) {
+		return;
+	}
+	size_t ntotal_bins = sec->opts.nshards * sec->npsizes;
+	for (pszind_t i = 0; i < ntotal_bins; i++) {
+		sec_bin_t *bin = &sec->bins[i];
+		malloc_mutex_lock(tsdn, &bin->mtx);
+		malloc_mutex_prof_accum(tsdn, mutex_prof_data, &bin->mtx);
+		malloc_mutex_unlock(tsdn, &bin->mtx);
 	}
 }
 
 void
 sec_prefork2(tsdn_t *tsdn, sec_t *sec) {
-	for (size_t i = 0; i < sec->opts.nshards; i++) {
-		malloc_mutex_prefork(tsdn, &sec->shards[i].mtx);
+	if (!sec_is_used(sec)) {
+		return;
+	}
+	size_t ntotal_bins = sec->opts.nshards * sec->npsizes;
+	for (pszind_t i = 0; i < ntotal_bins; i++) {
+		sec_bin_t *bin = &sec->bins[i];
+		malloc_mutex_prefork(tsdn, &bin->mtx);
 	}
 }
 
 void
 sec_postfork_parent(tsdn_t *tsdn, sec_t *sec) {
-	for (size_t i = 0; i < sec->opts.nshards; i++) {
-		malloc_mutex_postfork_parent(tsdn, &sec->shards[i].mtx);
+	if (!sec_is_used(sec)) {
+		return;
+	}
+	size_t ntotal_bins = sec->opts.nshards * sec->npsizes;
+	for (pszind_t i = 0; i < ntotal_bins; i++) {
+		sec_bin_t *bin = &sec->bins[i];
+		malloc_mutex_postfork_parent(tsdn, &bin->mtx);
 	}
 }
 
 void
 sec_postfork_child(tsdn_t *tsdn, sec_t *sec) {
-	for (size_t i = 0; i < sec->opts.nshards; i++) {
-		malloc_mutex_postfork_child(tsdn, &sec->shards[i].mtx);
+	if (!sec_is_used(sec)) {
+		return;
+	}
+	size_t ntotal_bins = sec->opts.nshards * sec->npsizes;
+	for (pszind_t i = 0; i < ntotal_bins; i++) {
+		sec_bin_t *bin = &sec->bins[i];
+		malloc_mutex_postfork_child(tsdn, &bin->mtx);
 	}
 }
diff --git a/src/stats.c b/src/stats.c
index 2ccac6c9..be70a6fc 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -791,9 +791,35 @@ stats_arena_extents_print(emitter_t *emitter, unsigned i) {
 static void
 stats_arena_hpa_shard_sec_print(emitter_t *emitter, unsigned i) {
 	size_t sec_bytes;
+	size_t sec_hits;
+	size_t sec_misses;
+	size_t sec_dalloc_flush;
+	size_t sec_dalloc_noflush;
+	size_t sec_overfills;
 	CTL_M2_GET("stats.arenas.0.hpa_sec_bytes", i, &sec_bytes, size_t);
 	emitter_kv(emitter, "sec_bytes", "Bytes in small extent cache",
 	    emitter_type_size, &sec_bytes);
+	CTL_M2_GET("stats.arenas.0.hpa_sec_hits", i, &sec_hits, size_t);
+	emitter_kv(emitter, "sec_hits", "Total hits in small extent cache",
+	    emitter_type_size, &sec_hits);
+	CTL_M2_GET("stats.arenas.0.hpa_sec_misses", i, &sec_misses, size_t);
+	emitter_kv(emitter, "sec_misses", "Total misses in small extent cache",
+	    emitter_type_size, &sec_misses);
+	CTL_M2_GET("stats.arenas.0.hpa_sec_dalloc_noflush", i,
+	    &sec_dalloc_noflush, size_t);
+	emitter_kv(emitter, "sec_dalloc_noflush",
+	    "Dalloc calls without flush in small extent cache",
+	    emitter_type_size, &sec_dalloc_noflush);
+	CTL_M2_GET("stats.arenas.0.hpa_sec_dalloc_flush", i, &sec_dalloc_flush,
+	    size_t);
+	emitter_kv(emitter, "sec_dalloc_flush",
+	    "Dalloc calls with flush in small extent cache", emitter_type_size,
+	    &sec_dalloc_flush);
+	CTL_M2_GET(
+	    "stats.arenas.0.hpa_sec_overfills", i, &sec_overfills, size_t);
+	emitter_kv(emitter, "sec_overfills",
+	    "sec_fill calls that went over max_bytes", emitter_type_size,
+	    &sec_overfills);
 }
 
 static void
@@ -1642,7 +1668,6 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_SIZE_T("hpa_sec_nshards")
 	OPT_WRITE_SIZE_T("hpa_sec_max_alloc")
 	OPT_WRITE_SIZE_T("hpa_sec_max_bytes")
-	OPT_WRITE_SIZE_T("hpa_sec_bytes_after_flush")
 	OPT_WRITE_SIZE_T("hpa_sec_batch_fill_extra")
 	OPT_WRITE_BOOL("huge_arena_pac_thp")
 	OPT_WRITE_CHAR_P("metadata_thp")
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index 5937601e..9c4253cd 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -113,10 +113,12 @@ create_test_data(const hpa_hooks_t *hooks, hpa_shard_opts_t *opts) {
 
 	err = hpa_central_init(&test_data->central, test_data->base, hooks);
 	assert_false(err, "");
-
-	err = hpa_shard_init(&test_data->shard, &test_data->central,
+	sec_opts_t sec_opts;
+	sec_opts.nshards = 0;
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	err = hpa_shard_init(tsdn, &test_data->shard, &test_data->central,
 	    &test_data->emap, test_data->base, &test_data->shard_edata_cache,
-	    SHARD_IND, opts);
+	    SHARD_IND, opts, &sec_opts);
 	assert_false(err, "");
 
 	return (hpa_shard_t *)test_data;
@@ -309,83 +311,6 @@ TEST_BEGIN(test_stress) {
 }
 TEST_END
 
-static void
-expect_contiguous(edata_t **edatas, size_t nedatas) {
-	for (size_t i = 0; i < nedatas; i++) {
-		size_t expected = (size_t)edata_base_get(edatas[0]) + i * PAGE;
-		expect_zu_eq(expected, (size_t)edata_base_get(edatas[i]),
-		    "Mismatch at index %zu", i);
-	}
-}
-
-TEST_BEGIN(test_alloc_dalloc_batch) {
-	test_skip_if(!hpa_supported());
-
-	hpa_shard_t *shard = create_test_data(
-	    &hpa_hooks_default, &test_hpa_shard_opts_default);
-	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
-
-	bool deferred_work_generated = false;
-
-	enum { NALLOCS = 8 };
-
-	edata_t *allocs[NALLOCS];
-	/*
-	 * Allocate a mix of ways; first half from regular alloc, second half
-	 * from alloc_batch.
-	 */
-	for (size_t i = 0; i < NALLOCS / 2; i++) {
-		allocs[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE,
-		    /* zero */ false, /* guarded */ false,
-		    /* frequent_reuse */ false, &deferred_work_generated);
-		expect_ptr_not_null(allocs[i], "Unexpected alloc failure");
-	}
-	edata_list_active_t allocs_list;
-	edata_list_active_init(&allocs_list);
-	size_t nsuccess = pai_alloc_batch(tsdn, &shard->pai, PAGE, NALLOCS / 2,
-	    &allocs_list, /* frequent_reuse */ false, &deferred_work_generated);
-	expect_zu_eq(NALLOCS / 2, nsuccess, "Unexpected oom");
-	for (size_t i = NALLOCS / 2; i < NALLOCS; i++) {
-		allocs[i] = edata_list_active_first(&allocs_list);
-		edata_list_active_remove(&allocs_list, allocs[i]);
-	}
-
-	/*
-	 * Should have allocated them contiguously, despite the differing
-	 * methods used.
-	 */
-	void *orig_base = edata_base_get(allocs[0]);
-	expect_contiguous(allocs, NALLOCS);
-
-	/*
-	 * Batch dalloc the first half, individually deallocate the second half.
-	 */
-	for (size_t i = 0; i < NALLOCS / 2; i++) {
-		edata_list_active_append(&allocs_list, allocs[i]);
-	}
-	pai_dalloc_batch(
-	    tsdn, &shard->pai, &allocs_list, &deferred_work_generated);
-	for (size_t i = NALLOCS / 2; i < NALLOCS; i++) {
-		pai_dalloc(
-		    tsdn, &shard->pai, allocs[i], &deferred_work_generated);
-	}
-
-	/* Reallocate (individually), and ensure reuse and contiguity. */
-	for (size_t i = 0; i < NALLOCS; i++) {
-		allocs[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE,
-		    /* zero */ false, /* guarded */ false, /* frequent_reuse */
-		    false, &deferred_work_generated);
-		expect_ptr_not_null(allocs[i], "Unexpected alloc failure.");
-	}
-	void *new_base = edata_base_get(allocs[0]);
-	expect_ptr_eq(
-	    orig_base, new_base, "Failed to reuse the allocated memory.");
-	expect_contiguous(allocs, NALLOCS);
-
-	destroy_test_data(shard);
-}
-TEST_END
-
 static uintptr_t defer_bump_ptr = HUGEPAGE * 123;
 static void *
 defer_test_map(size_t size) {
@@ -1533,8 +1458,7 @@ main(void) {
 	(void)mem_tree_iter;
 	(void)mem_tree_reverse_iter;
 	(void)mem_tree_destroy;
-	return test_no_reentrancy(test_alloc_max, test_stress,
-	    test_alloc_dalloc_batch, test_defer_time,
+	return test_no_reentrancy(test_alloc_max, test_stress, test_defer_time,
 	    test_purge_no_infinite_loop, test_no_min_purge_interval,
 	    test_min_purge_interval, test_purge,
 	    test_experimental_max_purge_nhp, test_vectorized_opt_eq_zero,
diff --git a/test/unit/hpa_sec_integration.c b/test/unit/hpa_sec_integration.c
new file mode 100644
index 00000000..c54cdc0c
--- /dev/null
+++ b/test/unit/hpa_sec_integration.c
@@ -0,0 +1,239 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/hpa.h"
+#include "jemalloc/internal/nstime.h"
+
+#define SHARD_IND 111
+
+#define ALLOC_MAX (HUGEPAGE)
+
+typedef struct test_data_s test_data_t;
+struct test_data_s {
+	/*
+	 * Must be the first member -- we convert back and forth between the
+	 * test_data_t and the hpa_shard_t;
+	 */
+	hpa_shard_t   shard;
+	hpa_central_t central;
+	base_t       *base;
+	edata_cache_t shard_edata_cache;
+
+	emap_t emap;
+};
+
+static hpa_shard_opts_t test_hpa_shard_opts = {
+    /* slab_max_alloc */
+    HUGEPAGE,
+    /* hugification_threshold */
+    0.9 * HUGEPAGE,
+    /* dirty_mult */
+    FXP_INIT_PERCENT(10),
+    /* deferral_allowed */
+    true,
+    /* hugify_delay_ms */
+    0,
+    /* hugify_sync */
+    false,
+    /* min_purge_interval_ms */
+    5,
+    /* experimental_max_purge_nhp */
+    -1,
+    /* purge_threshold */
+    PAGE,
+    /* min_purge_delay_ms */
+    10,
+    /* hugify_style */
+    hpa_hugify_style_lazy};
+
+static hpa_shard_t *
+create_test_data(const hpa_hooks_t *hooks, hpa_shard_opts_t *opts,
+    const sec_opts_t *sec_opts) {
+	bool    err;
+	base_t *base = base_new(TSDN_NULL, /* ind */ SHARD_IND,
+	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
+	assert_ptr_not_null(base, "");
+
+	test_data_t *test_data = malloc(sizeof(test_data_t));
+	assert_ptr_not_null(test_data, "");
+
+	test_data->base = base;
+
+	err = edata_cache_init(&test_data->shard_edata_cache, base);
+	assert_false(err, "");
+
+	err = emap_init(&test_data->emap, test_data->base, /* zeroed */ false);
+	assert_false(err, "");
+
+	err = hpa_central_init(&test_data->central, test_data->base, hooks);
+	assert_false(err, "");
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	err = hpa_shard_init(tsdn, &test_data->shard, &test_data->central,
+	    &test_data->emap, test_data->base, &test_data->shard_edata_cache,
+	    SHARD_IND, opts, sec_opts);
+	assert_false(err, "");
+
+	return (hpa_shard_t *)test_data;
+}
+
+static void
+destroy_test_data(hpa_shard_t *shard) {
+	test_data_t *test_data = (test_data_t *)shard;
+	base_delete(TSDN_NULL, test_data->base);
+	free(test_data);
+}
+
+static uintptr_t defer_bump_ptr = HUGEPAGE * 123;
+static void *
+defer_test_map(size_t size) {
+	void *result = (void *)defer_bump_ptr;
+	defer_bump_ptr += size;
+	return result;
+}
+
+static void
+defer_test_unmap(void *ptr, size_t size) {
+	(void)ptr;
+	(void)size;
+}
+
+static size_t ndefer_purge_calls = 0;
+static size_t npurge_size = 0;
+static void
+defer_test_purge(void *ptr, size_t size) {
+	(void)ptr;
+	npurge_size = size;
+	++ndefer_purge_calls;
+}
+
+static bool defer_vectorized_purge_called = false;
+static bool
+defer_vectorized_purge(void *vec, size_t vlen, size_t nbytes) {
+	(void)vec;
+	(void)nbytes;
+	++ndefer_purge_calls;
+	defer_vectorized_purge_called = true;
+	return false;
+}
+
+static size_t ndefer_hugify_calls = 0;
+static bool
+defer_test_hugify(void *ptr, size_t size, bool sync) {
+	++ndefer_hugify_calls;
+	return false;
+}
+
+static size_t ndefer_dehugify_calls = 0;
+static void
+defer_test_dehugify(void *ptr, size_t size) {
+	++ndefer_dehugify_calls;
+}
+
+static nstime_t defer_curtime;
+static void
+defer_test_curtime(nstime_t *r_time, bool first_reading) {
+	*r_time = defer_curtime;
+}
+
+static uint64_t
+defer_test_ms_since(nstime_t *past_time) {
+	return (nstime_ns(&defer_curtime) - nstime_ns(past_time)) / 1000 / 1000;
+}
+
+// test that freed pages stay in SEC and hpa thinks they are active
+
+TEST_BEGIN(test_hpa_sec) {
+	test_skip_if(!hpa_supported());
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts;
+
+	enum { NALLOCS = 8 };
+	sec_opts_t sec_opts;
+	sec_opts.nshards = 1;
+	sec_opts.max_alloc = 2 * PAGE;
+	sec_opts.max_bytes = NALLOCS * PAGE;
+	sec_opts.batch_fill_extra = 4;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts, &sec_opts);
+	bool         deferred_work_generated = false;
+	tsdn_t      *tsdn = tsd_tsdn(tsd_fetch());
+
+	/* alloc 1 PAGE, confirm sec has fill_extra bytes. */
+	edata_t *edata1 = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false, false,
+	    false, &deferred_work_generated);
+	expect_ptr_not_null(edata1, "Unexpected null edata");
+	hpa_shard_stats_t hpa_stats;
+	memset(&hpa_stats, 0, sizeof(hpa_shard_stats_t));
+	hpa_shard_stats_merge(tsdn, shard, &hpa_stats);
+	expect_zu_eq(hpa_stats.psset_stats.merged.nactive,
+	    1 + sec_opts.batch_fill_extra, "");
+	expect_zu_eq(hpa_stats.secstats.bytes, PAGE * sec_opts.batch_fill_extra,
+	    "sec should have fill extra pages");
+
+	/* Alloc/dealloc NALLOCS times and confirm extents are in sec. */
+	edata_t *edatas[NALLOCS];
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	memset(&hpa_stats, 0, sizeof(hpa_shard_stats_t));
+	hpa_shard_stats_merge(tsdn, shard, &hpa_stats);
+	expect_zu_eq(hpa_stats.psset_stats.merged.nactive, 2 + NALLOCS, "");
+	expect_zu_eq(hpa_stats.secstats.bytes, PAGE, "2 refills (at 0 and 4)");
+
+	for (int i = 0; i < NALLOCS - 1; i++) {
+		pai_dalloc(
+		    tsdn, &shard->pai, edatas[i], &deferred_work_generated);
+	}
+	memset(&hpa_stats, 0, sizeof(hpa_shard_stats_t));
+	hpa_shard_stats_merge(tsdn, shard, &hpa_stats);
+	expect_zu_eq(hpa_stats.psset_stats.merged.nactive, (2 + NALLOCS), "");
+	expect_zu_eq(
+	    hpa_stats.secstats.bytes, sec_opts.max_bytes, "sec should be full");
+
+	/* this one should flush 1 + 0.25 * 8 = 3 extents */
+	pai_dalloc(
+	    tsdn, &shard->pai, edatas[NALLOCS - 1], &deferred_work_generated);
+	memset(&hpa_stats, 0, sizeof(hpa_shard_stats_t));
+	hpa_shard_stats_merge(tsdn, shard, &hpa_stats);
+	expect_zu_eq(hpa_stats.psset_stats.merged.nactive, (NALLOCS - 1), "");
+	expect_zu_eq(hpa_stats.psset_stats.merged.ndirty, 3, "");
+	expect_zu_eq(hpa_stats.secstats.bytes, 0.75 * sec_opts.max_bytes,
+	    "sec should be full");
+
+	/* Next allocation should come from SEC and not increase active */
+	edata_t *edata2 = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false, false,
+	    false, &deferred_work_generated);
+	expect_ptr_not_null(edata2, "Unexpected null edata");
+	memset(&hpa_stats, 0, sizeof(hpa_shard_stats_t));
+	hpa_shard_stats_merge(tsdn, shard, &hpa_stats);
+	expect_zu_eq(hpa_stats.psset_stats.merged.nactive, NALLOCS - 1, "");
+	expect_zu_eq(hpa_stats.secstats.bytes, 0.75 * sec_opts.max_bytes - PAGE,
+	    "sec should have max_bytes minus one page that just came from it");
+
+	/* We return this one and it stays in the cache */
+	pai_dalloc(tsdn, &shard->pai, edata2, &deferred_work_generated);
+	memset(&hpa_stats, 0, sizeof(hpa_shard_stats_t));
+	hpa_shard_stats_merge(tsdn, shard, &hpa_stats);
+	expect_zu_eq(hpa_stats.psset_stats.merged.nactive, NALLOCS - 1, "");
+	expect_zu_eq(hpa_stats.psset_stats.merged.ndirty, 3, "");
+	expect_zu_eq(hpa_stats.secstats.bytes, 0.75 * sec_opts.max_bytes, "");
+
+	destroy_test_data(shard);
+}
+TEST_END
+
+int
+main(void) {
+	return test_no_reentrancy(test_hpa_sec);
+}
diff --git a/test/unit/hpa_sec_integration.sh b/test/unit/hpa_sec_integration.sh
new file mode 100644
index 00000000..22451f1d
--- /dev/null
+++ b/test/unit/hpa_sec_integration.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+export MALLOC_CONF="process_madvise_max_batch:0,experimental_hpa_start_huge_if_thp_always:false"
diff --git a/test/unit/hpa_thp_always.c b/test/unit/hpa_thp_always.c
index 29c86cdd..6e56e663 100644
--- a/test/unit/hpa_thp_always.c
+++ b/test/unit/hpa_thp_always.c
@@ -65,10 +65,12 @@ create_test_data(const hpa_hooks_t *hooks, hpa_shard_opts_t *opts) {
 
 	err = hpa_central_init(&test_data->central, test_data->base, hooks);
 	assert_false(err, "");
-
-	err = hpa_shard_init(&test_data->shard, &test_data->central,
+	sec_opts_t sec_opts;
+	sec_opts.nshards = 0;
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	err = hpa_shard_init(tsdn, &test_data->shard, &test_data->central,
 	    &test_data->emap, test_data->base, &test_data->shard_edata_cache,
-	    SHARD_IND, opts);
+	    SHARD_IND, opts, &sec_opts);
 	assert_false(err, "");
 
 	return (hpa_shard_t *)test_data;
diff --git a/test/unit/hpa_vectorized_madvise.c b/test/unit/hpa_vectorized_madvise.c
index e82f0ffb..2121de49 100644
--- a/test/unit/hpa_vectorized_madvise.c
+++ b/test/unit/hpa_vectorized_madvise.c
@@ -66,9 +66,12 @@ create_test_data(const hpa_hooks_t *hooks, hpa_shard_opts_t *opts) {
 	err = hpa_central_init(&test_data->central, test_data->base, hooks);
 	assert_false(err, "");
 
-	err = hpa_shard_init(&test_data->shard, &test_data->central,
+	sec_opts_t sec_opts;
+	sec_opts.nshards = 0;
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	err = hpa_shard_init(tsdn, &test_data->shard, &test_data->central,
 	    &test_data->emap, test_data->base, &test_data->shard_edata_cache,
-	    SHARD_IND, opts);
+	    SHARD_IND, opts, &sec_opts);
 	assert_false(err, "");
 
 	return (hpa_shard_t *)test_data;
diff --git a/test/unit/hpa_vectorized_madvise_large_batch.c b/test/unit/hpa_vectorized_madvise_large_batch.c
index d542f72a..e92988de 100644
--- a/test/unit/hpa_vectorized_madvise_large_batch.c
+++ b/test/unit/hpa_vectorized_madvise_large_batch.c
@@ -66,10 +66,12 @@ create_test_data(const hpa_hooks_t *hooks, hpa_shard_opts_t *opts) {
 
 	err = hpa_central_init(&test_data->central, test_data->base, hooks);
 	assert_false(err, "");
-
-	err = hpa_shard_init(&test_data->shard, &test_data->central,
+	sec_opts_t sec_opts;
+	sec_opts.nshards = 0;
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	err = hpa_shard_init(tsdn, &test_data->shard, &test_data->central,
 	    &test_data->emap, test_data->base, &test_data->shard_edata_cache,
-	    SHARD_IND, opts);
+	    SHARD_IND, opts, &sec_opts);
 	assert_false(err, "");
 
 	return (hpa_shard_t *)test_data;
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index f409f687..4c11e485 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -313,7 +313,6 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(size_t, hpa_sec_nshards, always);
 	TEST_MALLCTL_OPT(size_t, hpa_sec_max_alloc, always);
 	TEST_MALLCTL_OPT(size_t, hpa_sec_max_bytes, always);
-	TEST_MALLCTL_OPT(size_t, hpa_sec_bytes_after_flush, always);
 	TEST_MALLCTL_OPT(size_t, hpa_sec_batch_fill_extra, always);
 	TEST_MALLCTL_OPT(ssize_t, experimental_hpa_max_purge_nhp, always);
 	TEST_MALLCTL_OPT(size_t, hpa_purge_threshold, always);
diff --git a/test/unit/sec.c b/test/unit/sec.c
index d57c66ec..2a6a00ce 100644
--- a/test/unit/sec.c
+++ b/test/unit/sec.c
@@ -2,618 +2,493 @@
 
 #include "jemalloc/internal/sec.h"
 
-typedef struct pai_test_allocator_s pai_test_allocator_t;
-struct pai_test_allocator_s {
-	pai_t  pai;
-	bool   alloc_fail;
-	size_t alloc_count;
-	size_t alloc_batch_count;
-	size_t dalloc_count;
-	size_t dalloc_batch_count;
+typedef struct test_data_s test_data_t;
+struct test_data_s {
 	/*
-	 * We use a simple bump allocator as the implementation.  This isn't
-	 * *really* correct, since we may allow expansion into a subsequent
-	 * allocation, but it's not like the SEC is really examining the
-	 * pointers it gets back; this is mostly just helpful for debugging.
+	 * Must be the first member -- we convert back and forth between the
+	 * test_data_t and the sec_t;
 	 */
-	uintptr_t next_ptr;
-	size_t    expand_count;
-	bool      expand_return_value;
-	size_t    shrink_count;
-	bool      shrink_return_value;
+	sec_t   sec;
+	base_t *base;
 };
 
 static void
-test_sec_init(sec_t *sec, pai_t *fallback, size_t nshards, size_t max_alloc,
-    size_t max_bytes) {
-	sec_opts_t opts;
-	opts.nshards = 1;
-	opts.max_alloc = max_alloc;
-	opts.max_bytes = max_bytes;
-	/*
-	 * Just choose reasonable defaults for these; most tests don't care so
-	 * long as they're something reasonable.
-	 */
-	opts.bytes_after_flush = max_bytes / 2;
-	opts.batch_fill_extra = 4;
-
-	/*
-	 * We end up leaking this base, but that's fine; this test is
-	 * short-running, and SECs are arena-scoped in reality.
-	 */
-	base_t *base = base_new(TSDN_NULL, /* ind */ 123,
+test_data_init(tsdn_t *tsdn, test_data_t *tdata, const sec_opts_t *opts) {
+	tdata->base = base_new(TSDN_NULL, /* ind */ 123,
 	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
 
-	bool err = sec_init(TSDN_NULL, sec, base, fallback, &opts);
+	bool err = sec_init(tsdn, &tdata->sec, tdata->base, opts);
 	assert_false(err, "Unexpected initialization failure");
-	assert_u_ge(sec->npsizes, 0, "Zero size classes allowed for caching");
-}
-
-static inline edata_t *
-pai_test_allocator_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t alignment, bool zero, bool guarded, bool frequent_reuse,
-    bool *deferred_work_generated) {
-	assert(!guarded);
-	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
-	if (ta->alloc_fail) {
-		return NULL;
+	if (tdata->sec.opts.nshards > 0) {
+		assert_u_ge(tdata->sec.npsizes, 0,
+		    "Zero size classes allowed for caching");
 	}
-	edata_t *edata = malloc(sizeof(edata_t));
-	assert_ptr_not_null(edata, "");
-	ta->next_ptr += alignment - 1;
-	edata_init(edata, /* arena_ind */ 0,
-	    (void *)(ta->next_ptr & ~(alignment - 1)), size,
-	    /* slab */ false,
-	    /* szind */ 0, /* sn */ 1, extent_state_active, /* zero */ zero,
-	    /* comitted */ true, /* ranged */ false, EXTENT_NOT_HEAD);
-	ta->next_ptr += size;
-	ta->alloc_count++;
-	return edata;
-}
-
-static inline size_t
-pai_test_allocator_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t nallocs, edata_list_active_t *results, bool frequent_reuse,
-    bool *deferred_work_generated) {
-	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
-	if (ta->alloc_fail) {
-		return 0;
-	}
-	for (size_t i = 0; i < nallocs; i++) {
-		edata_t *edata = malloc(sizeof(edata_t));
-		assert_ptr_not_null(edata, "");
-		edata_init(edata, /* arena_ind */ 0, (void *)ta->next_ptr, size,
-		    /* slab */ false, /* szind */ 0, /* sn */ 1,
-		    extent_state_active, /* zero */ false, /* comitted */ true,
-		    /* ranged */ false, EXTENT_NOT_HEAD);
-		ta->next_ptr += size;
-		ta->alloc_batch_count++;
-		edata_list_active_append(results, edata);
-	}
-	return nallocs;
-}
-
-static bool
-pai_test_allocator_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size, bool zero,
-    bool *deferred_work_generated) {
-	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
-	ta->expand_count++;
-	return ta->expand_return_value;
-}
-
-static bool
-pai_test_allocator_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size, bool *deferred_work_generated) {
-	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
-	ta->shrink_count++;
-	return ta->shrink_return_value;
 }
 
 static void
-pai_test_allocator_dalloc(
-    tsdn_t *tsdn, pai_t *self, edata_t *edata, bool *deferred_work_generated) {
-	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
-	ta->dalloc_count++;
-	free(edata);
+destroy_test_data(tsdn_t *tsdn, test_data_t *tdata) {
+	/* There is no destroy sec to delete the bins ?! */
+	base_delete(tsdn, tdata->base);
 }
 
-static void
-pai_test_allocator_dalloc_batch(tsdn_t *tsdn, pai_t *self,
-    edata_list_active_t *list, bool *deferred_work_generated) {
-	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
-
-	edata_t *edata;
-	while ((edata = edata_list_active_first(list)) != NULL) {
-		edata_list_active_remove(list, edata);
-		ta->dalloc_batch_count++;
-		free(edata);
-	}
-}
-
-static inline void
-pai_test_allocator_init(pai_test_allocator_t *ta) {
-	ta->alloc_fail = false;
-	ta->alloc_count = 0;
-	ta->alloc_batch_count = 0;
-	ta->dalloc_count = 0;
-	ta->dalloc_batch_count = 0;
-	/* Just don't start the edata at 0. */
-	ta->next_ptr = 10 * PAGE;
-	ta->expand_count = 0;
-	ta->expand_return_value = false;
-	ta->shrink_count = 0;
-	ta->shrink_return_value = false;
-	ta->pai.alloc = &pai_test_allocator_alloc;
-	ta->pai.alloc_batch = &pai_test_allocator_alloc_batch;
-	ta->pai.expand = &pai_test_allocator_expand;
-	ta->pai.shrink = &pai_test_allocator_shrink;
-	ta->pai.dalloc = &pai_test_allocator_dalloc;
-	ta->pai.dalloc_batch = &pai_test_allocator_dalloc_batch;
-}
-
-TEST_BEGIN(test_reuse) {
-	pai_test_allocator_t ta;
-	pai_test_allocator_init(&ta);
-	sec_t sec;
-	/*
-	 * We can't use the "real" tsd, since we malloc within the test
-	 * allocator hooks; we'd get lock inversion crashes.  Eventually, we
-	 * should have a way to mock tsds, but for now just don't do any
-	 * lock-order checking.
-	 */
-	tsdn_t *tsdn = TSDN_NULL;
-	/*
-	 * 11 allocs apiece of 1-PAGE and 2-PAGE objects means that we should be
-	 * able to get to 33 pages in the cache before triggering a flush.  We
-	 * set the flush liimt to twice this amount, to avoid accidentally
-	 * triggering a flush caused by the batch-allocation down the cache fill
-	 * pathway disrupting ordering.
-	 */
-	enum { NALLOCS = 11 };
-	edata_t *one_page[NALLOCS];
-	edata_t *two_page[NALLOCS];
-	bool     deferred_work_generated = false;
-	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ 2 * PAGE,
-	    /* max_bytes */ 2 * (NALLOCS * PAGE + NALLOCS * 2 * PAGE));
-	for (int i = 0; i < NALLOCS; i++) {
-		one_page[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false, /* guarded */ false, /* frequent_reuse */
-		    false, &deferred_work_generated);
-		expect_ptr_not_null(one_page[i], "Unexpected alloc failure");
-		two_page[i] = pai_alloc(tsdn, &sec.pai, 2 * PAGE, PAGE,
-		    /* zero */ false, /* guarded */ false, /* frequent_reuse */
-		    false, &deferred_work_generated);
-		expect_ptr_not_null(one_page[i], "Unexpected alloc failure");
-	}
-	expect_zu_eq(0, ta.alloc_count, "Should be using batch allocs");
-	size_t max_allocs = ta.alloc_count + ta.alloc_batch_count;
-	expect_zu_le(
-	    2 * NALLOCS, max_allocs, "Incorrect number of allocations");
-	expect_zu_eq(0, ta.dalloc_count, "Incorrect number of allocations");
-	/*
-	 * Free in a different order than we allocated, to make sure free-list
-	 * separation works correctly.
-	 */
-	for (int i = NALLOCS - 1; i >= 0; i--) {
-		pai_dalloc(
-		    tsdn, &sec.pai, one_page[i], &deferred_work_generated);
-	}
-	for (int i = NALLOCS - 1; i >= 0; i--) {
-		pai_dalloc(
-		    tsdn, &sec.pai, two_page[i], &deferred_work_generated);
-	}
-	expect_zu_eq(max_allocs, ta.alloc_count + ta.alloc_batch_count,
-	    "Incorrect number of allocations");
-	expect_zu_eq(0, ta.dalloc_count, "Incorrect number of allocations");
-	/*
-	 * Check that the n'th most recent deallocated extent is returned for
-	 * the n'th alloc request of a given size.
-	 */
-	for (int i = 0; i < NALLOCS; i++) {
-		edata_t *alloc1 = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false, /* guarded */ false, /* frequent_reuse */
-		    false, &deferred_work_generated);
-		edata_t *alloc2 = pai_alloc(tsdn, &sec.pai, 2 * PAGE, PAGE,
-		    /* zero */ false, /* guarded */ false, /* frequent_reuse */
-		    false, &deferred_work_generated);
-		expect_ptr_eq(one_page[i], alloc1, "Got unexpected allocation");
-		expect_ptr_eq(two_page[i], alloc2, "Got unexpected allocation");
-	}
-	expect_zu_eq(max_allocs, ta.alloc_count + ta.alloc_batch_count,
-	    "Incorrect number of allocations");
-	expect_zu_eq(0, ta.dalloc_count, "Incorrect number of allocations");
-}
-TEST_END
-
-TEST_BEGIN(test_auto_flush) {
-	pai_test_allocator_t ta;
-	pai_test_allocator_init(&ta);
-	sec_t sec;
-	/* See the note above -- we can't use the real tsd. */
-	tsdn_t *tsdn = TSDN_NULL;
-	/*
-	 * 10-allocs apiece of 1-PAGE and 2-PAGE objects means that we should be
-	 * able to get to 30 pages in the cache before triggering a flush.  The
-	 * choice of NALLOCS here is chosen to match the batch allocation
-	 * default (4 extra + 1 == 5; so 10 allocations leaves the cache exactly
-	 * empty, even in the presence of batch allocation on fill).
-	 * Eventually, once our allocation batching strategies become smarter,
-	 * this should change.
-	 */
-	enum { NALLOCS = 10 };
-	edata_t *extra_alloc;
-	edata_t *allocs[NALLOCS];
-	bool     deferred_work_generated = false;
-	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE,
-	    /* max_bytes */ NALLOCS * PAGE);
-	for (int i = 0; i < NALLOCS; i++) {
-		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false, /* guarded */ false, /* frequent_reuse */
-		    false, &deferred_work_generated);
-		expect_ptr_not_null(allocs[i], "Unexpected alloc failure");
-	}
-	extra_alloc = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false,
-	    /* guarded */ false, /* frequent_reuse */ false,
-	    &deferred_work_generated);
-	expect_ptr_not_null(extra_alloc, "Unexpected alloc failure");
-	size_t max_allocs = ta.alloc_count + ta.alloc_batch_count;
-	expect_zu_le(
-	    NALLOCS + 1, max_allocs, "Incorrect number of allocations");
-	expect_zu_eq(0, ta.dalloc_count, "Incorrect number of allocations");
-	/* Free until the SEC is full, but should not have flushed yet. */
-	for (int i = 0; i < NALLOCS; i++) {
-		pai_dalloc(tsdn, &sec.pai, allocs[i], &deferred_work_generated);
-	}
-	expect_zu_le(
-	    NALLOCS + 1, max_allocs, "Incorrect number of allocations");
-	expect_zu_eq(0, ta.dalloc_count, "Incorrect number of allocations");
-	/*
-	 * Free the extra allocation; this should trigger a flush.  The internal
-	 * flushing logic is allowed to get complicated; for now, we rely on our
-	 * whitebox knowledge of the fact that the SEC flushes bins in their
-	 * entirety when it decides to do so, and it has only one bin active
-	 * right now.
-	 */
-	pai_dalloc(tsdn, &sec.pai, extra_alloc, &deferred_work_generated);
-	expect_zu_eq(max_allocs, ta.alloc_count + ta.alloc_batch_count,
-	    "Incorrect number of allocations");
-	expect_zu_eq(0, ta.dalloc_count,
-	    "Incorrect number of (non-batch) deallocations");
-	expect_zu_eq(NALLOCS + 1, ta.dalloc_batch_count,
-	    "Incorrect number of batch deallocations");
-}
-TEST_END
-
-/*
- * A disable and a flush are *almost* equivalent; the only difference is what
- * happens afterwards; disabling disallows all future caching as well.
- */
-static void
-do_disable_flush_test(bool is_disable) {
-	pai_test_allocator_t ta;
-	pai_test_allocator_init(&ta);
-	sec_t sec;
-	/* See the note above -- we can't use the real tsd. */
-	tsdn_t *tsdn = TSDN_NULL;
-
-	enum { NALLOCS = 11 };
-	edata_t *allocs[NALLOCS];
-	bool     deferred_work_generated = false;
-	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE,
-	    /* max_bytes */ NALLOCS * PAGE);
-	for (int i = 0; i < NALLOCS; i++) {
-		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false, /* guarded */ false, /* frequent_reuse */
-		    false, &deferred_work_generated);
-		expect_ptr_not_null(allocs[i], "Unexpected alloc failure");
-	}
-	/* Free all but the last aloc. */
-	for (int i = 0; i < NALLOCS - 1; i++) {
-		pai_dalloc(tsdn, &sec.pai, allocs[i], &deferred_work_generated);
-	}
-	size_t max_allocs = ta.alloc_count + ta.alloc_batch_count;
-
-	expect_zu_le(NALLOCS, max_allocs, "Incorrect number of allocations");
-	expect_zu_eq(0, ta.dalloc_count, "Incorrect number of allocations");
-
-	if (is_disable) {
-		sec_disable(tsdn, &sec);
-	} else {
-		sec_flush(tsdn, &sec);
-	}
-
-	expect_zu_eq(max_allocs, ta.alloc_count + ta.alloc_batch_count,
-	    "Incorrect number of allocations");
-	expect_zu_eq(0, ta.dalloc_count,
-	    "Incorrect number of (non-batch) deallocations");
-	expect_zu_le(NALLOCS - 1, ta.dalloc_batch_count,
-	    "Incorrect number of batch deallocations");
-	size_t old_dalloc_batch_count = ta.dalloc_batch_count;
-
-	/*
-	 * If we free into a disabled SEC, it should forward to the fallback.
-	 * Otherwise, the SEC should accept the allocation.
-	 */
-	pai_dalloc(
-	    tsdn, &sec.pai, allocs[NALLOCS - 1], &deferred_work_generated);
-
-	expect_zu_eq(max_allocs, ta.alloc_count + ta.alloc_batch_count,
-	    "Incorrect number of allocations");
-	expect_zu_eq(is_disable ? 1 : 0, ta.dalloc_count,
-	    "Incorrect number of (non-batch) deallocations");
-	expect_zu_eq(old_dalloc_batch_count, ta.dalloc_batch_count,
-	    "Incorrect number of batch deallocations");
-}
-
-TEST_BEGIN(test_disable) {
-	do_disable_flush_test(/* is_disable */ true);
-}
-TEST_END
-
-TEST_BEGIN(test_flush) {
-	do_disable_flush_test(/* is_disable */ false);
-}
-TEST_END
-
-TEST_BEGIN(test_max_alloc_respected) {
-	pai_test_allocator_t ta;
-	pai_test_allocator_init(&ta);
-	sec_t sec;
-	/* See the note above -- we can't use the real tsd. */
-	tsdn_t *tsdn = TSDN_NULL;
-
-	size_t max_alloc = 2 * PAGE;
-	size_t attempted_alloc = 3 * PAGE;
-
-	bool deferred_work_generated = false;
-
-	test_sec_init(&sec, &ta.pai, /* nshards */ 1, max_alloc,
-	    /* max_bytes */ 1000 * PAGE);
-
-	for (size_t i = 0; i < 100; i++) {
-		expect_zu_eq(
-		    i, ta.alloc_count, "Incorrect number of allocations");
-		expect_zu_eq(
-		    i, ta.dalloc_count, "Incorrect number of deallocations");
-		edata_t *edata = pai_alloc(tsdn, &sec.pai, attempted_alloc,
-		    PAGE, /* zero */ false, /* guarded */ false,
-		    /* frequent_reuse */ false, &deferred_work_generated);
-		expect_ptr_not_null(edata, "Unexpected alloc failure");
-		expect_zu_eq(
-		    i + 1, ta.alloc_count, "Incorrect number of allocations");
-		expect_zu_eq(
-		    i, ta.dalloc_count, "Incorrect number of deallocations");
-		pai_dalloc(tsdn, &sec.pai, edata, &deferred_work_generated);
-	}
-}
-TEST_END
-
-TEST_BEGIN(test_expand_shrink_delegate) {
-	/*
-	 * Expand and shrink shouldn't affect sec state; they should just
-	 * delegate to the fallback PAI.
-	 */
-	pai_test_allocator_t ta;
-	pai_test_allocator_init(&ta);
-	sec_t sec;
-	/* See the note above -- we can't use the real tsd. */
-	tsdn_t *tsdn = TSDN_NULL;
-
-	bool deferred_work_generated = false;
-
-	test_sec_init(&sec, &ta.pai, /* nshards */ 1,
-	    /* max_alloc */ USIZE_GROW_SLOW_THRESHOLD,
-	    /* max_bytes */ 1000 * PAGE);
-	edata_t *edata = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-	    /* zero */ false, /* guarded */ false, /* frequent_reuse */ false,
-	    &deferred_work_generated);
-	expect_ptr_not_null(edata, "Unexpected alloc failure");
-
-	bool err = pai_expand(tsdn, &sec.pai, edata, PAGE, 4 * PAGE,
-	    /* zero */ false, &deferred_work_generated);
-	expect_false(err, "Unexpected expand failure");
-	expect_zu_eq(1, ta.expand_count, "");
-	ta.expand_return_value = true;
-	err = pai_expand(tsdn, &sec.pai, edata, 4 * PAGE, 3 * PAGE,
-	    /* zero */ false, &deferred_work_generated);
-	expect_true(err, "Unexpected expand success");
-	expect_zu_eq(2, ta.expand_count, "");
-
-	err = pai_shrink(tsdn, &sec.pai, edata, 4 * PAGE, 2 * PAGE,
-	    &deferred_work_generated);
-	expect_false(err, "Unexpected shrink failure");
-	expect_zu_eq(1, ta.shrink_count, "");
-	ta.shrink_return_value = true;
-	err = pai_shrink(
-	    tsdn, &sec.pai, edata, 2 * PAGE, PAGE, &deferred_work_generated);
-	expect_true(err, "Unexpected shrink success");
-	expect_zu_eq(2, ta.shrink_count, "");
-}
-TEST_END
-
-TEST_BEGIN(test_nshards_0) {
-	pai_test_allocator_t ta;
-	pai_test_allocator_init(&ta);
-	sec_t sec;
-	/* See the note above -- we can't use the real tsd. */
-	tsdn_t *tsdn = TSDN_NULL;
-	base_t *base = base_new(TSDN_NULL, /* ind */ 123,
-	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
-
-	sec_opts_t opts = SEC_OPTS_DEFAULT;
+TEST_BEGIN(test_max_nshards_option_zero) {
+	test_data_t tdata;
+	sec_opts_t  opts;
 	opts.nshards = 0;
-	sec_init(TSDN_NULL, &sec, base, &ta.pai, &opts);
+	opts.max_alloc = PAGE;
+	opts.max_bytes = 512 * PAGE;
 
-	bool     deferred_work_generated = false;
-	edata_t *edata = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-	    /* zero */ false, /* guarded */ false, /* frequent_reuse */ false,
-	    &deferred_work_generated);
-	pai_dalloc(tsdn, &sec.pai, edata, &deferred_work_generated);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	test_data_init(tsdn, &tdata, &opts);
 
-	/* Both operations should have gone directly to the fallback. */
-	expect_zu_eq(1, ta.alloc_count, "");
-	expect_zu_eq(1, ta.dalloc_count, "");
+	edata_t *edata = sec_alloc(tsdn, &tdata.sec, PAGE);
+	expect_ptr_null(edata, "SEC should be disabled when nshards==0");
+	destroy_test_data(tsdn, &tdata);
 }
 TEST_END
 
+TEST_BEGIN(test_max_alloc_option_too_small) {
+	test_data_t tdata;
+	sec_opts_t  opts;
+	opts.nshards = 1;
+	opts.max_alloc = 2 * PAGE;
+	opts.max_bytes = 512 * PAGE;
+
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	test_data_init(tsdn, &tdata, &opts);
+
+	edata_t *edata = sec_alloc(tsdn, &tdata.sec, 3 * PAGE);
+	expect_ptr_null(edata, "max_alloc is 2*PAGE, should not alloc 3*PAGE");
+	destroy_test_data(tsdn, &tdata);
+}
+TEST_END
+
+TEST_BEGIN(test_sec_fill) {
+	test_data_t tdata;
+	sec_opts_t  opts;
+	opts.nshards = 1;
+	opts.max_alloc = 2 * PAGE;
+	opts.max_bytes = 4 * PAGE;
+	opts.batch_fill_extra = 2;
+
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	test_data_init(tsdn, &tdata, &opts);
+
+	/* Fill the cache with two extents */
+	sec_stats_t         stats = {0};
+	edata_list_active_t allocs;
+	edata_list_active_init(&allocs);
+	edata_t edata1, edata2;
+	edata_size_set(&edata1, PAGE);
+	edata_size_set(&edata2, PAGE);
+	edata_list_active_append(&allocs, &edata1);
+	edata_list_active_append(&allocs, &edata2);
+	sec_fill(tsdn, &tdata.sec, PAGE, &allocs, 2);
+	sec_stats_merge(tsdn, &tdata.sec, &stats);
+	expect_zu_eq(stats.bytes, 2 * PAGE, "SEC should have what we filled");
+	expect_true(edata_list_active_empty(&allocs),
+	    "extents should be consumed by sec");
+
+	/* Try to overfill and confirm that max_bytes is respected. */
+	stats.bytes = 0;
+	edata_t edata5, edata4, edata3;
+	edata_size_set(&edata3, PAGE);
+	edata_size_set(&edata4, PAGE);
+	edata_size_set(&edata5, PAGE);
+	edata_list_active_append(&allocs, &edata3);
+	edata_list_active_append(&allocs, &edata4);
+	edata_list_active_append(&allocs, &edata5);
+	sec_fill(tsdn, &tdata.sec, PAGE, &allocs, 3);
+	sec_stats_merge(tsdn, &tdata.sec, &stats);
+	expect_zu_eq(
+	    stats.bytes, opts.max_bytes, "SEC can't have more than max_bytes");
+	expect_false(edata_list_active_empty(&allocs), "Not all should fit");
+	expect_zu_eq(stats.total.noverfills, 1, "Expected one overfill");
+	destroy_test_data(tsdn, &tdata);
+}
+TEST_END
+
+TEST_BEGIN(test_sec_alloc) {
+	test_data_t tdata;
+	sec_opts_t  opts;
+	opts.nshards = 1;
+	opts.max_alloc = 2 * PAGE;
+	opts.max_bytes = 4 * PAGE;
+	opts.batch_fill_extra = 1;
+
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	test_data_init(tsdn, &tdata, &opts);
+
+	/* Alloc from empty cache returns NULL */
+	edata_t *edata = sec_alloc(tsdn, &tdata.sec, PAGE);
+	expect_ptr_null(edata, "SEC is empty");
+
+	/* Place two extents into the sec */
+	edata_list_active_t allocs;
+	edata_list_active_init(&allocs);
+	edata_t edata1, edata2;
+	edata_size_set(&edata1, PAGE);
+	edata_list_active_append(&allocs, &edata1);
+	sec_dalloc(tsdn, &tdata.sec, &allocs);
+	expect_true(edata_list_active_empty(&allocs), "");
+	edata_size_set(&edata2, PAGE);
+	edata_list_active_append(&allocs, &edata2);
+	sec_dalloc(tsdn, &tdata.sec, &allocs);
+	expect_true(edata_list_active_empty(&allocs), "");
+
+	sec_stats_t stats = {0};
+	sec_stats_merge(tsdn, &tdata.sec, &stats);
+	expect_zu_eq(stats.bytes, 2 * PAGE,
+	    "After fill bytes should reflect what is in the cache");
+	stats.bytes = 0;
+
+	/* Most recently cached extent should be used on alloc */
+	edata = sec_alloc(tsdn, &tdata.sec, PAGE);
+	expect_ptr_eq(edata, &edata2, "edata2 is most recently used");
+	sec_stats_merge(tsdn, &tdata.sec, &stats);
+	expect_zu_eq(stats.bytes, PAGE, "One more item left in the cache");
+	stats.bytes = 0;
+
+	/* Alloc can still get extents from cache */
+	edata = sec_alloc(tsdn, &tdata.sec, PAGE);
+	expect_ptr_eq(edata, &edata1, "SEC is not empty");
+	sec_stats_merge(tsdn, &tdata.sec, &stats);
+	expect_zu_eq(stats.bytes, 0, "No more items after last one is popped");
+
+	/* And cache is empty again */
+	edata = sec_alloc(tsdn, &tdata.sec, PAGE);
+	expect_ptr_null(edata, "SEC is empty");
+	destroy_test_data(tsdn, &tdata);
+}
+TEST_END
+
+TEST_BEGIN(test_sec_dalloc) {
+	test_data_t tdata;
+	sec_opts_t  opts;
+	opts.nshards = 1;
+	opts.max_alloc = PAGE;
+	opts.max_bytes = 2 * PAGE;
+
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	test_data_init(tsdn, &tdata, &opts);
+
+	/* Return one extent into the cache */
+	edata_list_active_t allocs;
+	edata_list_active_init(&allocs);
+	edata_t edata1;
+	edata_size_set(&edata1, PAGE);
+	edata_list_active_append(&allocs, &edata1);
+
+	/* SEC is empty, we return one pointer to it */
+	sec_dalloc(tsdn, &tdata.sec, &allocs);
+	expect_true(
+	    edata_list_active_empty(&allocs), "extents should be consumed");
+
+	/* Return one more extent, so that we are at the limit */
+	edata_t edata2;
+	edata_size_set(&edata2, PAGE);
+	edata_list_active_append(&allocs, &edata2);
+	/* Sec can take one more as well and we will be exactly at max_bytes */
+	sec_dalloc(tsdn, &tdata.sec, &allocs);
+	expect_true(
+	    edata_list_active_empty(&allocs), "extents should be consumed");
+
+	sec_stats_t stats = {0};
+	sec_stats_merge(tsdn, &tdata.sec, &stats);
+	expect_zu_eq(stats.bytes, opts.max_bytes, "Size should match deallocs");
+	stats.bytes = 0;
+
+	/*
+	 * We are at max_bytes.  Now, we dalloc one more pointer and we go above
+	 * the limit.  This will force flush to 3/4 of max_bytes and given that
+	 * we have max of 2 pages, we will have to flush two. We will not flush
+	 * the one given in the input as it is the most recently used.
+	 */
+	edata_t edata3;
+	edata_size_set(&edata3, PAGE);
+	edata_list_active_append(&allocs, &edata3);
+	sec_dalloc(tsdn, &tdata.sec, &allocs);
+	expect_false(
+	    edata_list_active_empty(&allocs), "extents should NOT be consumed");
+	expect_ptr_ne(
+	    edata_list_active_first(&allocs), &edata3, "edata3 is MRU");
+	expect_ptr_ne(
+	    edata_list_active_last(&allocs), &edata3, "edata3 is MRU");
+	sec_stats_merge(tsdn, &tdata.sec, &stats);
+	expect_zu_eq(PAGE, stats.bytes, "Should have flushed");
+	destroy_test_data(tsdn, &tdata);
+}
+TEST_END
+
+TEST_BEGIN(test_max_bytes_too_low) {
+	test_data_t tdata;
+	sec_opts_t  opts;
+	opts.nshards = 1;
+	opts.max_alloc = 4 * PAGE;
+	opts.max_bytes = 2 * PAGE;
+
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	test_data_init(tsdn, &tdata, &opts);
+
+	/* Return one extent into the cache. Item is too big */
+	edata_list_active_t allocs;
+	edata_list_active_init(&allocs);
+	edata_t edata1;
+	edata_size_set(&edata1, 3 * PAGE);
+	edata_list_active_append(&allocs, &edata1);
+
+	/* SEC is empty, we return one pointer to it */
+	sec_dalloc(tsdn, &tdata.sec, &allocs);
+	expect_false(
+	    edata_list_active_empty(&allocs), "extents should not be consumed");
+	destroy_test_data(tsdn, &tdata);
+}
+TEST_END
+
+TEST_BEGIN(test_sec_flush) {
+	test_data_t tdata;
+	sec_opts_t  opts;
+	opts.nshards = 1;
+	opts.max_alloc = 4 * PAGE;
+	opts.max_bytes = 1024 * PAGE;
+
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	test_data_init(tsdn, &tdata, &opts);
+
+	/* We put in 10 one-page extents, and 10 four-page extents */
+	edata_list_active_t allocs1;
+	edata_list_active_t allocs4;
+	edata_list_active_init(&allocs1);
+	edata_list_active_init(&allocs4);
+	enum { NALLOCS = 10 };
+	edata_t edata1[NALLOCS];
+	edata_t edata4[NALLOCS];
+	for (int i = 0; i < NALLOCS; i++) {
+		edata_size_set(&edata1[i], PAGE);
+		edata_size_set(&edata4[i], 4 * PAGE);
+
+		edata_list_active_append(&allocs1, &edata1[i]);
+		sec_dalloc(tsdn, &tdata.sec, &allocs1);
+		edata_list_active_append(&allocs4, &edata4[i]);
+		sec_dalloc(tsdn, &tdata.sec, &allocs4);
+	}
+
+	sec_stats_t stats = {0};
+	sec_stats_merge(tsdn, &tdata.sec, &stats);
+	expect_zu_eq(
+	    stats.bytes, 10 * 5 * PAGE, "SEC should have what we filled");
+	stats.bytes = 0;
+
+	expect_true(edata_list_active_empty(&allocs1), "");
+	sec_flush(tsdn, &tdata.sec, &allocs1);
+	expect_false(edata_list_active_empty(&allocs1), "");
+
+	sec_stats_merge(tsdn, &tdata.sec, &stats);
+	expect_zu_eq(stats.bytes, 0, "SEC should be empty");
+	stats.bytes = 0;
+	destroy_test_data(tsdn, &tdata);
+}
+TEST_END
+
+TEST_BEGIN(test_sec_stats) {
+	test_data_t tdata;
+	sec_opts_t  opts;
+	opts.nshards = 1;
+	opts.max_alloc = PAGE;
+	opts.max_bytes = 2 * PAGE;
+
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	test_data_init(tsdn, &tdata, &opts);
+
+	edata_list_active_t allocs;
+	edata_list_active_init(&allocs);
+	edata_t edata1;
+	edata_size_set(&edata1, PAGE);
+	edata_list_active_append(&allocs, &edata1);
+
+	/* SEC is empty alloc fails. nmisses==1 */
+	edata_t *edata = sec_alloc(tsdn, &tdata.sec, PAGE);
+	expect_ptr_null(edata, "SEC should be empty");
+
+	/* SEC is empty, we return one pointer to it. ndalloc_noflush=1 */
+	sec_dalloc(tsdn, &tdata.sec, &allocs);
+	expect_true(
+	    edata_list_active_empty(&allocs), "extents should be consumed");
+
+	edata_t edata2;
+	edata_size_set(&edata2, PAGE);
+	edata_list_active_append(&allocs, &edata2);
+	/* Sec can take one more, so ndalloc_noflush=2 */
+	sec_dalloc(tsdn, &tdata.sec, &allocs);
+	expect_true(
+	    edata_list_active_empty(&allocs), "extents should be consumed");
+
+	sec_stats_t stats;
+	memset(&stats, 0, sizeof(sec_stats_t));
+	sec_stats_merge(tsdn, &tdata.sec, &stats);
+	expect_zu_eq(stats.bytes, opts.max_bytes, "Size should match deallocs");
+	expect_zu_eq(stats.total.ndalloc_noflush, 2, "");
+	expect_zu_eq(stats.total.nmisses, 1, "");
+
+	memset(&stats, 0, sizeof(sec_stats_t));
+
+	/*
+	 * We are at max_bytes.  Now, we dalloc one more pointer and we go above
+	 * the limit.  This will force flush, so ndalloc_flush = 1.
+	 */
+	edata_t edata3;
+	edata_size_set(&edata3, PAGE);
+	edata_list_active_append(&allocs, &edata3);
+	sec_dalloc(tsdn, &tdata.sec, &allocs);
+	expect_false(
+	    edata_list_active_empty(&allocs), "extents should NOT be consumed");
+	sec_stats_merge(tsdn, &tdata.sec, &stats);
+	expect_zu_eq(PAGE, stats.bytes, "Should have flushed");
+	expect_zu_eq(stats.total.ndalloc_flush, 1, "");
+	memset(&stats, 0, sizeof(sec_stats_t));
+	destroy_test_data(tsdn, &tdata);
+}
+TEST_END
+
+#define NOPS_PER_THREAD 100
+#define NPREFILL 32
+
 static void
-expect_stats_pages(tsdn_t *tsdn, sec_t *sec, size_t npages) {
-	sec_stats_t stats;
+edata_init_test(edata_t *edata) {
+	memset(edata, 0, sizeof(*edata));
+}
+
+typedef struct {
+	sec_t              *sec;
+	uint8_t             preferred_shard;
+	size_t              nallocs;
+	size_t              nallocs_fail;
+	size_t              ndallocs;
+	size_t              ndallocs_fail;
+	edata_list_active_t fill_list;
+	size_t              fill_list_sz;
+	edata_t            *edata[NOPS_PER_THREAD];
+} trylock_test_arg_t;
+
+static void *
+thd_trylock_test(void *varg) {
+	trylock_test_arg_t *arg = (trylock_test_arg_t *)varg;
+	tsd_t              *tsd = tsd_fetch();
+	tsdn_t             *tsdn = tsd_tsdn(tsd);
+
+	/* Set the preferred shard for this thread */
+	uint8_t *shard_idx = tsd_sec_shardp_get(tsd);
+	*shard_idx = arg->preferred_shard;
+
+	/* Fill the shard with some extents */
+	sec_fill(tsdn, arg->sec, PAGE, &arg->fill_list, arg->fill_list_sz);
+	expect_true(edata_list_active_empty(&arg->fill_list), "");
+
+	for (unsigned i = 0; i < NOPS_PER_THREAD; i++) {
+		/* Try to allocate from SEC */
+		arg->edata[i] = sec_alloc(tsdn, arg->sec, PAGE);
+		if (arg->edata[i] != NULL) {
+			expect_zu_eq(edata_size_get(arg->edata[i]), PAGE, "");
+		}
+	}
+
+	for (unsigned i = 0; i < NOPS_PER_THREAD; i++) {
+		if (arg->edata[i] != NULL) {
+			edata_list_active_t list;
+			edata_list_active_init(&list);
+			arg->nallocs++;
+			edata_list_active_append(&list, arg->edata[i]);
+			expect_zu_eq(edata_size_get(arg->edata[i]), PAGE, "");
+			sec_dalloc(tsdn, arg->sec, &list);
+			if (edata_list_active_empty(&list)) {
+				arg->ndallocs++;
+			} else {
+				arg->ndallocs_fail++;
+			}
+		} else {
+			arg->nallocs_fail++;
+		}
+	}
+
+	return NULL;
+}
+
+TEST_BEGIN(test_sec_multishard) {
+	test_data_t tdata;
+	sec_opts_t  opts;
+	enum { NSHARDS = 2 };
+	enum { NTHREADS = NSHARDS * 16 };
+	opts.nshards = NSHARDS;
+	opts.max_alloc = 2 * PAGE;
+	opts.max_bytes = 64 * NTHREADS * PAGE;
+
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	test_data_init(tsdn, &tdata, &opts);
+
+	/* Create threads with different preferred shards */
+	thd_t              thds[NTHREADS];
+	trylock_test_arg_t args[NTHREADS];
+
+	edata_t all_edatas[NPREFILL * NTHREADS];
+
+	for (unsigned i = 0; i < NTHREADS; i++) {
+		edata_list_active_init(&args[i].fill_list);
+		for (unsigned j = 0; j < NPREFILL; ++j) {
+			size_t ind = i * NPREFILL + j;
+			edata_init_test(&all_edatas[ind]);
+			edata_size_set(&all_edatas[ind], PAGE);
+			edata_list_active_append(
+			    &args[i].fill_list, &all_edatas[ind]);
+		}
+		args[i].fill_list_sz = NPREFILL;
+		args[i].sec = &tdata.sec;
+		args[i].preferred_shard = i % opts.nshards;
+		args[i].nallocs = 0;
+		args[i].nallocs_fail = 0;
+		args[i].ndallocs = 0;
+		args[i].ndallocs_fail = 0;
+		memset(
+		    &args[i].edata[0], 0, NOPS_PER_THREAD * sizeof(edata_t *));
+		thd_create(&thds[i], thd_trylock_test, &args[i]);
+	}
+
+	for (unsigned i = 0; i < NTHREADS; i++) {
+		thd_join(thds[i], NULL);
+	}
+
+	/* Wait for all threads to complete */
+	size_t total_allocs = 0;
+	size_t total_dallocs = 0;
+	size_t total_allocs_fail = 0;
+	for (unsigned i = 0; i < NTHREADS; i++) {
+		total_allocs += args[i].nallocs;
+		total_dallocs += args[i].ndallocs;
+		total_allocs_fail += args[i].nallocs_fail;
+	}
+
+	/* We must have at least some hits */
+	expect_zu_gt(total_allocs, 0, "");
 	/*
-	 * Check that the stats merging accumulates rather than overwrites by
-	 * putting some (made up) data there to begin with.
+	 * We must have at least some successful dallocs by design (max_bytes is
+	 * big enough).
 	 */
-	stats.bytes = 123;
-	sec_stats_merge(tsdn, sec, &stats);
-	assert_zu_le(npages * PAGE + 123, stats.bytes, "");
-}
+	expect_zu_gt(total_dallocs, 0, "");
 
-TEST_BEGIN(test_stats_simple) {
-	pai_test_allocator_t ta;
-	pai_test_allocator_init(&ta);
-	sec_t sec;
+	/* Get final stats to verify that hits and misses are accurate */
+	sec_stats_t stats = {0};
+	memset(&stats, 0, sizeof(sec_stats_t));
+	sec_stats_merge(tsdn, &tdata.sec, &stats);
+	expect_zu_eq(stats.total.nhits, total_allocs, "");
+	expect_zu_eq(stats.total.nmisses, total_allocs_fail, "");
 
-	/* See the note above -- we can't use the real tsd. */
-	tsdn_t *tsdn = TSDN_NULL;
-
-	enum {
-		NITERS = 100,
-		FLUSH_PAGES = 20,
-	};
-
-	bool deferred_work_generated = false;
-
-	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE,
-	    /* max_bytes */ FLUSH_PAGES * PAGE);
-
-	edata_t *allocs[FLUSH_PAGES];
-	for (size_t i = 0; i < FLUSH_PAGES; i++) {
-		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false, /* guarded */ false, /* frequent_reuse */
-		    false, &deferred_work_generated);
-		expect_stats_pages(tsdn, &sec, 0);
-	}
-
-	/* Increase and decrease, without flushing. */
-	for (size_t i = 0; i < NITERS; i++) {
-		for (size_t j = 0; j < FLUSH_PAGES / 2; j++) {
-			pai_dalloc(tsdn, &sec.pai, allocs[j],
-			    &deferred_work_generated);
-			expect_stats_pages(tsdn, &sec, j + 1);
-		}
-		for (size_t j = 0; j < FLUSH_PAGES / 2; j++) {
-			allocs[j] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-			    /* zero */ false, /* guarded */ false,
-			    /* frequent_reuse */ false,
-			    &deferred_work_generated);
-			expect_stats_pages(tsdn, &sec, FLUSH_PAGES / 2 - j - 1);
-		}
-	}
-}
-TEST_END
-
-TEST_BEGIN(test_stats_auto_flush) {
-	pai_test_allocator_t ta;
-	pai_test_allocator_init(&ta);
-	sec_t sec;
-
-	/* See the note above -- we can't use the real tsd. */
-	tsdn_t *tsdn = TSDN_NULL;
-
-	enum {
-		FLUSH_PAGES = 10,
-	};
-
-	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE,
-	    /* max_bytes */ FLUSH_PAGES * PAGE);
-
-	edata_t *extra_alloc0;
-	edata_t *extra_alloc1;
-	edata_t *allocs[2 * FLUSH_PAGES];
-
-	bool deferred_work_generated = false;
-
-	extra_alloc0 = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false,
-	    /* guarded */ false, /* frequent_reuse */ false,
-	    &deferred_work_generated);
-	extra_alloc1 = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false,
-	    /* guarded */ false, /* frequent_reuse */ false,
-	    &deferred_work_generated);
-
-	for (size_t i = 0; i < 2 * FLUSH_PAGES; i++) {
-		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false, /* guarded */ false, /* frequent_reuse */
-		    false, &deferred_work_generated);
-	}
-
-	for (size_t i = 0; i < FLUSH_PAGES; i++) {
-		pai_dalloc(tsdn, &sec.pai, allocs[i], &deferred_work_generated);
-	}
-	pai_dalloc(tsdn, &sec.pai, extra_alloc0, &deferred_work_generated);
-
-	/* Flush the remaining pages; stats should still work. */
-	for (size_t i = 0; i < FLUSH_PAGES; i++) {
-		pai_dalloc(tsdn, &sec.pai, allocs[FLUSH_PAGES + i],
-		    &deferred_work_generated);
-	}
-
-	pai_dalloc(tsdn, &sec.pai, extra_alloc1, &deferred_work_generated);
-
-	expect_stats_pages(tsdn, &sec,
-	    ta.alloc_count + ta.alloc_batch_count - ta.dalloc_count
-	        - ta.dalloc_batch_count);
-}
-TEST_END
-
-TEST_BEGIN(test_stats_manual_flush) {
-	pai_test_allocator_t ta;
-	pai_test_allocator_init(&ta);
-	sec_t sec;
-
-	/* See the note above -- we can't use the real tsd. */
-	tsdn_t *tsdn = TSDN_NULL;
-
-	enum {
-		FLUSH_PAGES = 10,
-	};
-
-	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE,
-	    /* max_bytes */ FLUSH_PAGES * PAGE);
-
-	bool     deferred_work_generated = false;
-	edata_t *allocs[FLUSH_PAGES];
-	for (size_t i = 0; i < FLUSH_PAGES; i++) {
-		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false, /* guarded */ false, /* frequent_reuse */
-		    false, &deferred_work_generated);
-		expect_stats_pages(tsdn, &sec, 0);
-	}
-
-	/* Dalloc the first half of the allocations. */
-	for (size_t i = 0; i < FLUSH_PAGES / 2; i++) {
-		pai_dalloc(tsdn, &sec.pai, allocs[i], &deferred_work_generated);
-		expect_stats_pages(tsdn, &sec, i + 1);
-	}
-
-	sec_flush(tsdn, &sec);
-	expect_stats_pages(tsdn, &sec, 0);
-
-	/* Flush the remaining pages. */
-	for (size_t i = 0; i < FLUSH_PAGES / 2; i++) {
-		pai_dalloc(tsdn, &sec.pai, allocs[FLUSH_PAGES / 2 + i],
-		    &deferred_work_generated);
-		expect_stats_pages(tsdn, &sec, i + 1);
-	}
-	sec_disable(tsdn, &sec);
-	expect_stats_pages(tsdn, &sec, 0);
+	destroy_test_data(tsdn, &tdata);
 }
 TEST_END
 
 int
 main(void) {
-	return test(test_reuse, test_auto_flush, test_disable, test_flush,
-	    test_max_alloc_respected, test_expand_shrink_delegate,
-	    test_nshards_0, test_stats_simple, test_stats_auto_flush,
-	    test_stats_manual_flush);
+	return test(test_max_nshards_option_zero,
+	    test_max_alloc_option_too_small, test_sec_fill, test_sec_alloc,
+	    test_sec_dalloc, test_max_bytes_too_low, test_sec_flush,
+	    test_sec_stats, test_sec_multishard);
 }

From 365747bc8d1cf202342d905555d7cd360f9ba118 Mon Sep 17 00:00:00 2001
From: Carl Shapiro <cshapiro@meta.com>
Date: Mon, 22 Dec 2025 20:36:03 -0800
Subject: [PATCH 352/395] Use the BRE construct \{1,\} for one or more
 consecutive matches

This removes duplication introduced by my earlier commit that
eliminating the use of the non-standard "\+" from BREs in the
configure script.
---
 configure.ac | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/configure.ac b/configure.ac
index 5e907511..897f1719 100644
--- a/configure.ac
+++ b/configure.ac
@@ -652,7 +652,7 @@ AC_ARG_WITH([version],
   [AS_HELP_STRING([--with-version=<major>.<minor>.<bugfix>-<nrev>-g<gid>],
    [Version string])],
   [
-    echo "${with_version}" | grep ['^[0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*-[0-9][0-9]*-g[0-9a-f][0-9a-f]*$'] 2>&1 1>/dev/null
+    echo "${with_version}" | grep ['^[0-9]\{1,\}\.[0-9]\{1,\}\.[0-9]\{1,\}-[0-9]\{1,\}-g[0-9a-f]\{1,\}$'] 2>&1 1>/dev/null
     if test $? -eq 0 ; then
       echo "$with_version" > "${objroot}VERSION"
     else
@@ -2059,7 +2059,7 @@ if test "x${je_cv_lg_hugepage}" = "x" ; then
   dnl   Hugepagesize:       2048 kB
   if test -e "/proc/meminfo" ; then
     hpsk=[`cat /proc/meminfo 2>/dev/null | \
-          grep '^Hugepagesize:[[:space:]][[:space:]]*[0-9][0-9]*[[:space:]]kB$' | \
+          grep '^Hugepagesize:[[:space:]]\{1,\}[0-9]\{1,\}[[:space:]]kB$' | \
           awk '{print $2}'`]
     if test "x${hpsk}" != "x" ; then
       je_cv_lg_hugepage=10

From 5f353dc28383d070ffa540d1679153f8101e2aa7 Mon Sep 17 00:00:00 2001
From: Carl Shapiro <cshapiro@meta.com>
Date: Tue, 23 Dec 2025 14:18:43 -0800
Subject: [PATCH 353/395] Remove an incorrect use of the address operator

The address of the local variable created_threads is a different
location than the data it points to.  Incorrectly treating these
values as being the same can cause out-of-bounds writes to the stack.

Closes: facebook/jemalloc#59
---
 src/background_thread.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/background_thread.c b/src/background_thread.c
index 2eb08dd2..82911ee7 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -448,7 +448,7 @@ background_thread0_work(tsd_t *tsd) {
 		}
 		if (check_background_thread_creation(tsd,
 		        const_max_background_threads, &n_created,
-		        (bool *)&created_threads)) {
+		        created_threads)) {
 			continue;
 		}
 		background_work_sleep_once(

From c51abba131e7665e05da0de60c66fb219976050d Mon Sep 17 00:00:00 2001
From: Carl Shapiro <cshapiro@meta.com>
Date: Tue, 20 Jan 2026 18:56:32 -0800
Subject: [PATCH 354/395] Determine the page size on Android from NDK header
 files

The definition of the PAGE_SIZE macro is used as a signal for a 32-bit
target or a 64-bit target with an older NDK.  Otherwise, a 16KiB page
size is assumed.

Closes: #2657
---
 configure.ac | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/configure.ac b/configure.ac
index 897f1719..376779b0 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1990,6 +1990,11 @@ case "${host}" in
         LG_PAGE=14
       fi
       ;;
+  *-*-linux-android)
+      if test "x$LG_PAGE" = "xdetect"; then
+	AC_CHECK_DECLS([PAGE_SIZE], [LG_PAGE=12], [LG_PAGE=14], [#include <sys/user.h>])
+      fi
+      ;;
   aarch64-unknown-linux-*)
       if test "x$LG_PAGE" = "xdetect"; then
         LG_PAGE=16

From d4908fe44a869858840fc7b9d4d3e69a3629a25f Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Thu, 5 Feb 2026 19:18:19 -0800
Subject: [PATCH 355/395] Revert "Experimental configuration option for fast
 path prefetch from cache_bin"

This reverts commit f9fae9f1f841f8c6c566746480865da8ae3a1d11.
---
 configure.ac                                  | 30 -------------------
 .../internal/jemalloc_internal_defs.h.in      |  5 ----
 .../internal/jemalloc_internal_inlines_c.h    |  6 ----
 3 files changed, 41 deletions(-)

diff --git a/configure.ac b/configure.ac
index 376779b0..ae206a19 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1434,36 +1434,6 @@ if test "x$enable_experimental_smallocx" = "x1" ; then
 fi
 AC_SUBST([enable_experimental_smallocx])
 
-dnl Do not enable fastpath prefetch by default.
-AC_ARG_ENABLE([experimental_fp_prefetch],
-  [AS_HELP_STRING([--enable-experimental-fp-prefetch], [Enable experimental fastpath prefetch])],
-[if test "x$enable_experimental_fp_prefetch" = "xno" ; then
-enable_experimental_fp_prefetch="0"
-else
-  dnl Check if we have __builtin_prefetch.
-  JE_CFLAGS_SAVE()
-  JE_CFLAGS_ADD([-Werror=implicit-function-declaration])
-  JE_COMPILABLE([builtin prefetch], [], [
-void foo(void *p) { __builtin_prefetch(p, 1, 3); }
-  	],
-	[je_cv_have_builtin_prefetch])
-
-	if test "x${je_cv_have_builtin_prefetch}" = "xyes" ; then
-	   enable_experimental_fp_prefetch="1"
-	else
-	   enable_experimental_fp_prefetch="0"
-	   AC_MSG_ERROR([--enable--experimental-fp-prefetch can only be used when builtin_preftech is available])
-	fi
-   JE_CFLAGS_RESTORE()
-fi
-],
-[enable_experimental_fp_prefetch="0"]
-)
-if test "x$enable_experimental_fp_prefetch" = "x1" ; then
-  AC_DEFINE([JEMALLOC_EXPERIMENTAL_FASTPATH_PREFETCH], [ ], [ ])
-fi
-AC_SUBST([enable_experimental_fp_prefetch])
-
 dnl Do not enable profiling by default.
 AC_ARG_ENABLE([prof],
   [AS_HELP_STRING([--enable-prof], [Enable allocation profiling])],
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 3a945ba1..31ae2e8e 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -160,11 +160,6 @@
 /* JEMALLOC_EXPERIMENTAL_SMALLOCX_API enables experimental smallocx API. */
 #undef JEMALLOC_EXPERIMENTAL_SMALLOCX_API
 
-/* JEMALLOC_EXPERIMENTAL_FASTPATH_PREFETCH enables prefetch
- * on malloc fast path.
- */
-#undef JEMALLOC_EXPERIMENTAL_FASTPATH_PREFETCH
-
 /* JEMALLOC_PROF enables allocation profiling. */
 #undef JEMALLOC_PROF
 
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index 16f86ad4..2c61f8c4 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -374,12 +374,6 @@ imalloc_fastpath(size_t size, void *(fallback_alloc)(size_t)) {
 	 */
 	ret = cache_bin_alloc_easy(bin, &tcache_success);
 	if (tcache_success) {
-#if defined(JEMALLOC_EXPERIMENTAL_FASTPATH_PREFETCH)
-		cache_bin_sz_t lb = (cache_bin_sz_t)(uintptr_t)bin->stack_head;
-		if(likely(lb != bin->low_bits_empty)) {
-			util_prefetch_write_range(*(bin->stack_head), usize);
-		}
-#endif
 		fastpath_success_finish(tsd, allocated_after, bin, ret);
 		return ret;
 	}

From 4d0ffa075b93fe9263cfd5f11467b2e8df44ed93 Mon Sep 17 00:00:00 2001
From: Andrei Pechkurov <apechkurov@gmail.com>
Date: Fri, 9 Jan 2026 21:55:45 +0200
Subject: [PATCH 356/395] Fix background thread initialization race

---
 Makefile.in                        |   1 +
 src/background_thread.c            |   8 +-
 test/unit/background_thread_init.c | 183 +++++++++++++++++++++++++++++
 3 files changed, 190 insertions(+), 2 deletions(-)
 create mode 100644 test/unit/background_thread_init.c

diff --git a/Makefile.in b/Makefile.in
index 83f04e64..4b5b6507 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -206,6 +206,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/atomic.c \
 	$(srcroot)test/unit/background_thread.c \
 	$(srcroot)test/unit/background_thread_enable.c \
+	$(srcroot)test/unit/background_thread_init.c \
 	$(srcroot)test/unit/base.c \
 	$(srcroot)test/unit/batch_alloc.c \
 	$(srcroot)test/unit/binshard.c \
diff --git a/src/background_thread.c b/src/background_thread.c
index 82911ee7..4901856a 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -547,8 +547,13 @@ background_thread_create_locked(tsd_t *tsd, unsigned arena_ind) {
 
 	bool need_new_thread;
 	malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
+	/*
+	 * The last check is there to leave Thread 0 creation entirely
+	 * to the initializing thread (arena 0).
+	 */
 	need_new_thread = background_thread_enabled()
-	    && (info->state == background_thread_stopped);
+	    && (info->state == background_thread_stopped)
+	    && (thread_ind != 0 || arena_ind == 0);
 	if (need_new_thread) {
 		background_thread_init(tsd, info);
 	}
@@ -560,7 +565,6 @@ background_thread_create_locked(tsd_t *tsd, unsigned arena_ind) {
 		/* Threads are created asynchronously by Thread 0. */
 		background_thread_info_t *t0 = &background_thread_info[0];
 		malloc_mutex_lock(tsd_tsdn(tsd), &t0->mtx);
-		assert(t0->state == background_thread_started);
 		pthread_cond_signal(&t0->cond);
 		malloc_mutex_unlock(tsd_tsdn(tsd), &t0->mtx);
 
diff --git a/test/unit/background_thread_init.c b/test/unit/background_thread_init.c
new file mode 100644
index 00000000..169b96c7
--- /dev/null
+++ b/test/unit/background_thread_init.c
@@ -0,0 +1,183 @@
+#include "test/jemalloc_test.h"
+
+/*
+ * Test to verify that background thread initialization has no race conditions.
+ *
+ * See https://github.com/facebook/jemalloc/pull/68
+ */
+
+#ifdef JEMALLOC_BACKGROUND_THREAD
+const char *malloc_conf = "background_thread:true,percpu_arena:percpu";
+#else
+const char *malloc_conf = "";
+#endif
+
+#define N_INIT_THREADS 32
+#define N_ITERATIONS 10
+
+static mtx_t barrier_mtx;
+static atomic_u32_t n_waiting;
+static unsigned n_threads;
+static atomic_b_t release;
+
+/*
+ * Simple spin barrier - all threads wait until everyone arrives,
+ * then they all proceed to call malloc() simultaneously.
+ */
+static void
+barrier_wait(void) {
+	mtx_lock(&barrier_mtx);
+	uint32_t waiting = atomic_load_u32(&n_waiting, ATOMIC_RELAXED) + 1;
+	atomic_store_u32(&n_waiting, waiting, ATOMIC_RELAXED);
+	bool should_release = (waiting == n_threads);
+	mtx_unlock(&barrier_mtx);
+
+	if (should_release) {
+		atomic_store_b(&release, true, ATOMIC_RELEASE);
+	}
+
+	while (!atomic_load_b(&release, ATOMIC_ACQUIRE)) {
+		/* Spin until released. */
+	}
+}
+
+static void
+barrier_reset(void) {
+	atomic_store_u32(&n_waiting, 0, ATOMIC_RELAXED);
+	atomic_store_b(&release, false, ATOMIC_RELAXED);
+}
+
+static void *
+thd_start(void *arg) {
+	barrier_wait();
+
+	/*
+	 * All threads race to malloc simultaneously.
+	 * This triggers concurrent arena initialization with percpu_arena.
+	 */
+	void *p = malloc(64);
+	expect_ptr_not_null(p, "malloc failed");
+	free(p);
+
+	return NULL;
+}
+
+TEST_BEGIN(test_mt_background_thread_init) {
+	test_skip_if(!have_background_thread);
+	test_skip_if(!have_percpu_arena ||
+	    !PERCPU_ARENA_ENABLED(opt_percpu_arena));
+
+	thd_t thds[N_INIT_THREADS];
+
+	expect_false(mtx_init(&barrier_mtx), "mtx_init failed");
+	n_threads = N_INIT_THREADS;
+	barrier_reset();
+
+	/* Create threads that will all race to call malloc(). */
+	for (unsigned i = 0; i < N_INIT_THREADS; i++) {
+		thd_create(&thds[i], thd_start, NULL);
+	}
+
+	/* Wait for all threads to complete. */
+	for (unsigned i = 0; i < N_INIT_THREADS; i++) {
+		thd_join(thds[i], NULL);
+	}
+
+	mtx_fini(&barrier_mtx);
+
+	/*
+	 * Verify background threads are properly running. Before the fix,
+	 * the race could leave Thread 0 marked as "started" without an
+	 * actual pthread behind it.
+	 */
+#ifdef JEMALLOC_BACKGROUND_THREAD
+	tsd_t *tsd = tsd_fetch();
+	background_thread_info_t *t0 = &background_thread_info[0];
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &t0->mtx);
+	expect_d_eq(t0->state, background_thread_started,
+	    "Thread 0 should be in started state");
+	malloc_mutex_unlock(tsd_tsdn(tsd), &t0->mtx);
+
+	expect_zu_gt(n_background_threads, 0,
+	    "At least one background thread should be running");
+#endif
+}
+TEST_END
+
+TEST_BEGIN(test_mt_background_thread_init_stress) {
+	test_skip_if(!have_background_thread);
+	test_skip_if(!config_stats);
+
+	thd_t thds[N_INIT_THREADS];
+
+	expect_false(mtx_init(&barrier_mtx), "mtx_init failed");
+	n_threads = N_INIT_THREADS;
+
+	/*
+	 * Run multiple iterations to increase the chance of hitting
+	 * any race conditions. Each iteration creates new threads that
+	 * perform allocations concurrently.
+	 */
+	for (unsigned iter = 0; iter < N_ITERATIONS; iter++) {
+		barrier_reset();
+
+		for (unsigned i = 0; i < N_INIT_THREADS; i++) {
+			thd_create(&thds[i], thd_start, NULL);
+		}
+
+		for (unsigned i = 0; i < N_INIT_THREADS; i++) {
+			thd_join(thds[i], NULL);
+		}
+	}
+
+	mtx_fini(&barrier_mtx);
+
+#ifdef JEMALLOC_BACKGROUND_THREAD
+	/*
+	 * Verify Thread 0 is actually running by checking it has done work.
+	 * Wait up to a few seconds for the background thread to run.
+	 */
+	tsd_t *tsd = tsd_fetch();
+	background_thread_info_t *t0 = &background_thread_info[0];
+
+	nstime_t start;
+	nstime_init_update(&start);
+
+	bool ran = false;
+	while (!ran) {
+		malloc_mutex_lock(tsd_tsdn(tsd), &t0->mtx);
+		if (t0->tot_n_runs > 0) {
+			ran = true;
+		}
+		malloc_mutex_unlock(tsd_tsdn(tsd), &t0->mtx);
+
+		if (ran) {
+			break;
+		}
+
+		nstime_t now;
+		nstime_init_update(&now);
+		nstime_subtract(&now, &start);
+		if (nstime_sec(&now) > 10) {
+			/*
+			 * If Thread 0 hasn't run after 10 seconds, it's
+			 * likely not actually running (the bug condition).
+			 */
+			expect_true(false,
+			    "Thread 0 did not run within 10 seconds - "
+			    "possible initialization race");
+			break;
+		}
+		sleep_ns(100 * 1000 * 1000); /* 100ms */
+	}
+#endif
+}
+TEST_END
+
+int
+main(void) {
+	return test_no_reentrancy(
+	    test_mt_background_thread_init,
+	    test_mt_background_thread_init_stress);
+}

From 34ace9169bad794cea6f8639e188d83b42310762 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Thu, 12 Feb 2026 11:49:28 -0800
Subject: [PATCH 357/395] Remove prof_threshold built-in event. It is trivial
 to implement it as user event if needed

---
 Makefile.in                                   |   3 -
 include/jemalloc/internal/prof_externs.h      |   5 -
 include/jemalloc/internal/prof_hook.h         |   6 -
 include/jemalloc/internal/prof_threshold.h    |   8 --
 .../jemalloc/internal/thread_event_registry.h |   1 -
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |   1 -
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |   3 -
 .../projects/vc2017/jemalloc/jemalloc.vcxproj |   1 -
 .../vc2017/jemalloc/jemalloc.vcxproj.filters  |   3 -
 .../projects/vc2019/jemalloc/jemalloc.vcxproj |   1 -
 .../vc2019/jemalloc/jemalloc.vcxproj.filters  |   3 -
 .../projects/vc2022/jemalloc/jemalloc.vcxproj |   1 -
 .../vc2022/jemalloc/jemalloc.vcxproj.filters  |   3 -
 src/ctl.c                                     |  30 -----
 src/jemalloc.c                                |   5 -
 src/prof_threshold.c                          |  69 -----------
 src/thread_event.c                            |   9 --
 src/thread_event_registry.c                   |   3 +-
 test/unit/mallctl.c                           |   1 -
 test/unit/prof_threshold.c                    | 112 ------------------
 test/unit/prof_threshold_small.c              |   2 -
 test/unit/prof_threshold_small.sh             |   1 -
 22 files changed, 1 insertion(+), 270 deletions(-)
 delete mode 100644 include/jemalloc/internal/prof_threshold.h
 delete mode 100644 src/prof_threshold.c
 delete mode 100644 test/unit/prof_threshold.c
 delete mode 100644 test/unit/prof_threshold_small.c
 delete mode 100644 test/unit/prof_threshold_small.sh

diff --git a/Makefile.in b/Makefile.in
index 4b5b6507..9db36530 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -145,7 +145,6 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/prof_stack_range.c \
 	$(srcroot)src/prof_stats.c \
 	$(srcroot)src/prof_sys.c \
-	$(srcroot)src/prof_threshold.c \
 	$(srcroot)src/psset.c \
 	$(srcroot)src/rtree.c \
 	$(srcroot)src/safety_check.c \
@@ -271,8 +270,6 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/prof_stats.c \
 	$(srcroot)test/unit/prof_tctx.c \
 	$(srcroot)test/unit/prof_thread_name.c \
-	$(srcroot)test/unit/prof_threshold.c \
-	$(srcroot)test/unit/prof_threshold_small.c \
 	$(srcroot)test/unit/prof_sys_thread_name.c \
 	$(srcroot)test/unit/psset.c \
 	$(srcroot)test/unit/ql.c \
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index e41e30a0..e07e69f5 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -12,8 +12,6 @@ extern bool     opt_prof_active;
 extern bool     opt_prof_thread_active_init;
 extern unsigned opt_prof_bt_max;
 extern size_t   opt_lg_prof_sample; /* Mean bytes between samples. */
-extern size_t
-    opt_experimental_lg_prof_threshold; /* Mean bytes between thresholds. */
 extern ssize_t opt_lg_prof_interval;    /* lg(prof_interval). */
 extern bool    opt_prof_gdump;          /* High-water memory dumping. */
 extern bool    opt_prof_final;          /* Final profile dumping. */
@@ -70,9 +68,6 @@ prof_sample_hook_t prof_sample_hook_get(void);
 void                    prof_sample_free_hook_set(prof_sample_free_hook_t hook);
 prof_sample_free_hook_t prof_sample_free_hook_get(void);
 
-void                  prof_threshold_hook_set(prof_threshold_hook_t hook);
-prof_threshold_hook_t prof_threshold_hook_get(void);
-
 /* Functions only accessed in prof_inlines.h */
 prof_tdata_t *prof_tdata_init(tsd_t *tsd);
 prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
diff --git a/include/jemalloc/internal/prof_hook.h b/include/jemalloc/internal/prof_hook.h
index 69dfaabf..d5a9b0ff 100644
--- a/include/jemalloc/internal/prof_hook.h
+++ b/include/jemalloc/internal/prof_hook.h
@@ -27,10 +27,4 @@ typedef void (*prof_sample_hook_t)(const void *ptr, size_t size,
 /* ptr, size */
 typedef void (*prof_sample_free_hook_t)(const void *, size_t);
 
-/*
- * A callback hook that notifies when an allocation threshold has been crossed.
- */
-typedef void (*prof_threshold_hook_t)(
-    uint64_t alloc, uint64_t dealloc, uint64_t peak);
-
 #endif /* JEMALLOC_INTERNAL_PROF_HOOK_H */
diff --git a/include/jemalloc/internal/prof_threshold.h b/include/jemalloc/internal/prof_threshold.h
deleted file mode 100644
index 93e9478e..00000000
--- a/include/jemalloc/internal/prof_threshold.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_THRESHOLD_EVENT_H
-#define JEMALLOC_INTERNAL_THRESHOLD_EVENT_H
-
-#include "jemalloc/internal/tsd_types.h"
-
-extern te_base_cb_t prof_threshold_te_handler;
-
-#endif /* JEMALLOC_INTERNAL_THRESHOLD_EVENT_H */
diff --git a/include/jemalloc/internal/thread_event_registry.h b/include/jemalloc/internal/thread_event_registry.h
index 7ded440d..bfb140aa 100644
--- a/include/jemalloc/internal/thread_event_registry.h
+++ b/include/jemalloc/internal/thread_event_registry.h
@@ -14,7 +14,6 @@ enum te_alloc_e {
 	te_alloc_stats_interval,
 	te_alloc_tcache_gc,
 #ifdef JEMALLOC_STATS
-	te_alloc_prof_threshold,
 	te_alloc_peak,
 #endif
 	te_alloc_user0,
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 1e8def75..ca2a8532 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -83,7 +83,6 @@
     <ClCompile Include="..\..\..\..\src\prof_recent.c" />
     <ClCompile Include="..\..\..\..\src\prof_stats.c" />
     <ClCompile Include="..\..\..\..\src\prof_sys.c" />
-    <ClCompile Include="..\..\..\..\src\prof_threshold.c" />
     <ClCompile Include="..\..\..\..\src\psset.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\safety_check.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index f6e340cf..443e71a5 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -133,9 +133,6 @@
     <ClCompile Include="..\..\..\..\src\prof_sys.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\prof_threshold.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\psset.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 45ddf73d..c5d1116b 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -83,7 +83,6 @@
     <ClCompile Include="..\..\..\..\src\prof_recent.c" />
     <ClCompile Include="..\..\..\..\src\prof_stats.c" />
     <ClCompile Include="..\..\..\..\src\prof_sys.c" />
-    <ClCompile Include="..\..\..\..\src\prof_threshold.c" />
     <ClCompile Include="..\..\..\..\src\psset.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\safety_check.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index f6e340cf..443e71a5 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -133,9 +133,6 @@
     <ClCompile Include="..\..\..\..\src\prof_sys.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\prof_threshold.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\psset.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
index f1a5158a..4df570c8 100644
--- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
@@ -83,7 +83,6 @@
     <ClCompile Include="..\..\..\..\src\prof_recent.c" />
     <ClCompile Include="..\..\..\..\src\prof_stats.c" />
     <ClCompile Include="..\..\..\..\src\prof_sys.c" />
-    <ClCompile Include="..\..\..\..\src\prof_threshold.c" />
     <ClCompile Include="..\..\..\..\src\psset.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\safety_check.c" />
diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
index f6e340cf..443e71a5 100644
--- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
@@ -133,9 +133,6 @@
     <ClCompile Include="..\..\..\..\src\prof_sys.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\prof_threshold.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\psset.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
index a6f92ccf..5e256ec6 100644
--- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
@@ -83,7 +83,6 @@
     <ClCompile Include="..\..\..\..\src\prof_recent.c" />
     <ClCompile Include="..\..\..\..\src\prof_stats.c" />
     <ClCompile Include="..\..\..\..\src\prof_sys.c" />
-    <ClCompile Include="..\..\..\..\src\prof_threshold.c" />
     <ClCompile Include="..\..\..\..\src\psset.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\safety_check.c" />
diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
index f6e340cf..443e71a5 100644
--- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
@@ -133,9 +133,6 @@
     <ClCompile Include="..\..\..\..\src\prof_sys.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\prof_threshold.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\psset.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/ctl.c b/src/ctl.c
index 1260e197..4cac5608 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -156,7 +156,6 @@ CTL_PROTO(opt_prof_active)
 CTL_PROTO(opt_prof_thread_active_init)
 CTL_PROTO(opt_prof_bt_max)
 CTL_PROTO(opt_lg_prof_sample)
-CTL_PROTO(opt_experimental_lg_prof_threshold)
 CTL_PROTO(opt_lg_prof_interval)
 CTL_PROTO(opt_prof_gdump)
 CTL_PROTO(opt_prof_final)
@@ -364,7 +363,6 @@ CTL_PROTO(experimental_hooks_prof_backtrace)
 CTL_PROTO(experimental_hooks_prof_dump)
 CTL_PROTO(experimental_hooks_prof_sample)
 CTL_PROTO(experimental_hooks_prof_sample_free)
-CTL_PROTO(experimental_hooks_prof_threshold)
 CTL_PROTO(experimental_hooks_thread_event)
 CTL_PROTO(experimental_hooks_safety_check_abort)
 CTL_PROTO(experimental_thread_activity_callback)
@@ -527,8 +525,6 @@ static const ctl_named_node_t opt_node[] = {{NAME("abort"), CTL(opt_abort)},
     {NAME("prof_thread_active_init"), CTL(opt_prof_thread_active_init)},
     {NAME("prof_bt_max"), CTL(opt_prof_bt_max)},
     {NAME("lg_prof_sample"), CTL(opt_lg_prof_sample)},
-    {NAME("experimental_lg_prof_threshold"),
-        CTL(opt_experimental_lg_prof_threshold)},
     {NAME("lg_prof_interval"), CTL(opt_lg_prof_interval)},
     {NAME("prof_gdump"), CTL(opt_prof_gdump)},
     {NAME("prof_final"), CTL(opt_prof_final)},
@@ -890,7 +886,6 @@ static const ctl_named_node_t experimental_hooks_node[] = {
     {NAME("prof_dump"), CTL(experimental_hooks_prof_dump)},
     {NAME("prof_sample"), CTL(experimental_hooks_prof_sample)},
     {NAME("prof_sample_free"), CTL(experimental_hooks_prof_sample_free)},
-    {NAME("prof_threshold"), CTL(experimental_hooks_prof_threshold)},
     {NAME("safety_check_abort"), CTL(experimental_hooks_safety_check_abort)},
     {NAME("thread_event"), CTL(experimental_hooks_thread_event)},
 };
@@ -2236,8 +2231,6 @@ CTL_RO_NL_CGEN(
     config_prof, opt_prof_thread_active_init, opt_prof_thread_active_init, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_bt_max, opt_prof_bt_max, unsigned)
 CTL_RO_NL_CGEN(config_prof, opt_lg_prof_sample, opt_lg_prof_sample, size_t)
-CTL_RO_NL_CGEN(config_prof, opt_experimental_lg_prof_threshold,
-    opt_experimental_lg_prof_threshold, size_t)
 CTL_RO_NL_CGEN(config_prof, opt_prof_accum, opt_prof_accum, bool)
 CTL_RO_NL_CGEN(
     config_prof, opt_prof_pid_namespace, opt_prof_pid_namespace, bool)
@@ -3681,29 +3674,6 @@ label_return:
 	return ret;
 }
 
-static int
-experimental_hooks_prof_threshold_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
-
-	if (oldp == NULL && newp == NULL) {
-		ret = EINVAL;
-		goto label_return;
-	}
-	if (oldp != NULL) {
-		prof_threshold_hook_t old_hook = prof_threshold_hook_get();
-		READ(old_hook, prof_threshold_hook_t);
-	}
-	if (newp != NULL) {
-		prof_threshold_hook_t new_hook JEMALLOC_CC_SILENCE_INIT(NULL);
-		WRITE(new_hook, prof_threshold_hook_t);
-		prof_threshold_hook_set(new_hook);
-	}
-	ret = 0;
-label_return:
-	return ret;
-}
-
 static int
 experimental_hooks_thread_event_ctl(tsd_t *tsd, const size_t *mib,
     size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 5d23962d..d82788eb 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1747,11 +1747,6 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				    "lg_prof_sample", 0,
 				    (sizeof(uint64_t) << 3) - 1,
 				    CONF_DONT_CHECK_MIN, CONF_CHECK_MAX, true)
-				CONF_HANDLE_SIZE_T(
-				    opt_experimental_lg_prof_threshold,
-				    "experimental_lg_prof_threshold", 0,
-				    (sizeof(uint64_t) << 3) - 1,
-				    CONF_DONT_CHECK_MIN, CONF_CHECK_MAX, true)
 				CONF_HANDLE_BOOL(opt_prof_accum, "prof_accum")
 				CONF_HANDLE_UNSIGNED(opt_prof_bt_max,
 				    "prof_bt_max", 1, PROF_BT_MAX_LIMIT,
diff --git a/src/prof_threshold.c b/src/prof_threshold.c
deleted file mode 100644
index 5b72a491..00000000
--- a/src/prof_threshold.c
+++ /dev/null
@@ -1,69 +0,0 @@
-#include "jemalloc/internal/jemalloc_preamble.h"
-#include "jemalloc/internal/jemalloc_internal_includes.h"
-
-#include "jemalloc/internal/activity_callback.h"
-#include "jemalloc/internal/prof_threshold.h"
-
-#include "jemalloc/internal/prof_externs.h"
-
-/*
- * Update every 128MB by default.
- */
-#define PROF_THRESHOLD_LG_WAIT_DEFAULT 27
-
-/* Logically a prof_threshold_hook_t. */
-static atomic_p_t prof_threshold_hook;
-size_t opt_experimental_lg_prof_threshold = PROF_THRESHOLD_LG_WAIT_DEFAULT;
-
-void
-prof_threshold_hook_set(prof_threshold_hook_t hook) {
-	atomic_store_p(&prof_threshold_hook, hook, ATOMIC_RELEASE);
-}
-
-prof_threshold_hook_t
-prof_threshold_hook_get(void) {
-	return (prof_threshold_hook_t)atomic_load_p(
-	    &prof_threshold_hook, ATOMIC_ACQUIRE);
-}
-
-/* Invoke callback for threshold reached */
-static inline void
-prof_threshold_update(tsd_t *tsd) {
-	prof_threshold_hook_t prof_threshold_hook = prof_threshold_hook_get();
-	if (prof_threshold_hook == NULL) {
-		return;
-	}
-	uint64_t alloc = tsd_thread_allocated_get(tsd);
-	uint64_t dalloc = tsd_thread_deallocated_get(tsd);
-	peak_t  *peak = tsd_peakp_get(tsd);
-	pre_reentrancy(tsd, NULL);
-	prof_threshold_hook(alloc, dalloc, peak->cur_max);
-	post_reentrancy(tsd);
-}
-
-uint64_t
-prof_threshold_new_event_wait(tsd_t *tsd) {
-	return 1 << opt_experimental_lg_prof_threshold;
-}
-
-uint64_t
-prof_threshold_postponed_event_wait(tsd_t *tsd) {
-	return TE_MIN_START_WAIT;
-}
-
-void
-prof_threshold_event_handler(tsd_t *tsd) {
-	prof_threshold_update(tsd);
-}
-
-static te_enabled_t
-prof_threshold_enabled(void) {
-	return config_stats ? te_enabled_yes : te_enabled_no;
-}
-
-te_base_cb_t prof_threshold_te_handler = {
-    .enabled = &prof_threshold_enabled,
-    .new_event_wait = &prof_threshold_new_event_wait,
-    .postponed_event_wait = &prof_threshold_postponed_event_wait,
-    .event_handler = &prof_threshold_event_handler,
-};
diff --git a/src/thread_event.c b/src/thread_event.c
index c59027ed..82776342 100644
--- a/src/thread_event.c
+++ b/src/thread_event.c
@@ -290,15 +290,6 @@ te_update_alloc_events(tsd_t *tsd, te_base_cb_t **to_trigger,
 		to_trigger[nto_trigger++] = te_alloc_handlers[te_alloc_peak];
 	}
 
-	assert(te_enabled_yes
-	    == te_alloc_handlers[te_alloc_prof_threshold]->enabled());
-	if (te_update_wait(tsd, accumbytes, allow,
-	        &waits[te_alloc_prof_threshold], wait,
-	        te_alloc_handlers[te_alloc_prof_threshold],
-	        1 << opt_experimental_lg_prof_threshold)) {
-		to_trigger[nto_trigger++] =
-		    te_alloc_handlers[te_alloc_prof_threshold];
-	}
 #endif
 
 	for (te_alloc_t ue = te_alloc_user0; ue <= te_alloc_user3; ue++) {
diff --git a/src/thread_event_registry.c b/src/thread_event_registry.c
index 05882616..b8307df0 100644
--- a/src/thread_event_registry.c
+++ b/src/thread_event_registry.c
@@ -6,7 +6,6 @@
 #include "jemalloc/internal/tcache_externs.h"
 #include "jemalloc/internal/peak_event.h"
 #include "jemalloc/internal/prof_externs.h"
-#include "jemalloc/internal/prof_threshold.h"
 #include "jemalloc/internal/stats.h"
 
 static malloc_mutex_t uevents_mu;
@@ -149,7 +148,7 @@ te_base_cb_t *te_alloc_handlers[te_alloc_count] = {
 #endif
     &stats_interval_te_handler, &tcache_gc_te_handler,
 #ifdef JEMALLOC_STATS
-    &prof_threshold_te_handler, &peak_te_handler,
+    &peak_te_handler,
 #endif
     &user_alloc_handler0, &user_alloc_handler1, &user_alloc_handler2,
     &user_alloc_handler3};
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 4c11e485..4cd0225b 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -342,7 +342,6 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(bool, prof_active, prof);
 	TEST_MALLCTL_OPT(unsigned, prof_bt_max, prof);
 	TEST_MALLCTL_OPT(ssize_t, lg_prof_sample, prof);
-	TEST_MALLCTL_OPT(ssize_t, experimental_lg_prof_threshold, prof);
 	TEST_MALLCTL_OPT(bool, prof_accum, prof);
 	TEST_MALLCTL_OPT(bool, prof_pid_namespace, prof);
 	TEST_MALLCTL_OPT(ssize_t, lg_prof_interval, prof);
diff --git a/test/unit/prof_threshold.c b/test/unit/prof_threshold.c
deleted file mode 100644
index a31a5a24..00000000
--- a/test/unit/prof_threshold.c
+++ /dev/null
@@ -1,112 +0,0 @@
-#include "test/jemalloc_test.h"
-
-/* Test config (set in reset_test_config) */
-#define ALLOC_ITERATIONS_IN_THRESHOLD 10
-uint64_t threshold_bytes = 0;
-uint64_t chunk_size = 0;
-
-/* Test globals for calblack */
-uint64_t hook_calls = 0;
-uint64_t last_peak = 0;
-uint64_t last_alloc = 0;
-uint64_t alloc_baseline = 0;
-
-void
-mock_prof_threshold_hook(uint64_t alloc, uint64_t dealloc, uint64_t peak) {
-	hook_calls++;
-	last_peak = peak;
-	last_alloc = alloc;
-}
-
-/* Need the do_write flag because NULL is a valid to_write value. */
-static void
-read_write_prof_threshold_hook(prof_threshold_hook_t *to_read, bool do_write,
-    prof_threshold_hook_t to_write) {
-	size_t hook_sz = sizeof(prof_threshold_hook_t);
-	expect_d_eq(
-	    mallctl("experimental.hooks.prof_threshold", (void *)to_read,
-	        &hook_sz, do_write ? &to_write : NULL, hook_sz),
-	    0, "Unexpected prof_threshold_hook mallctl failure");
-}
-
-static void
-write_prof_threshold_hook(prof_threshold_hook_t new_hook) {
-	read_write_prof_threshold_hook(NULL, true, new_hook);
-}
-
-static prof_threshold_hook_t
-read_prof_threshold_hook() {
-	prof_threshold_hook_t hook;
-	read_write_prof_threshold_hook(&hook, false, NULL);
-	return hook;
-}
-
-static void
-reset_test_config() {
-	hook_calls = 0;
-	last_peak = 0;
-	alloc_baseline = last_alloc; /* We run the test multiple times */
-	last_alloc = 0;
-	threshold_bytes = 1 << opt_experimental_lg_prof_threshold;
-	chunk_size = threshold_bytes / ALLOC_ITERATIONS_IN_THRESHOLD;
-}
-
-static void
-expect_threshold_calls(int calls) {
-	expect_u64_eq(
-	    hook_calls, calls, "Hook called the right amount of times");
-	expect_u64_lt(
-	    last_peak, chunk_size * 2, "We allocate chunk_size at a time");
-	expect_u64_ge(
-	    last_alloc, threshold_bytes * calls + alloc_baseline, "Crosses");
-}
-
-static void
-allocate_chunks(int chunks) {
-	for (int i = 0; i < chunks; i++) {
-		void *p = mallocx((size_t)chunk_size, 0);
-		expect_ptr_not_null(p, "Failed to allocate");
-		free(p);
-	}
-}
-
-TEST_BEGIN(test_prof_threshold_hook) {
-	test_skip_if(!config_stats);
-
-	/* Test setting and reading the hook (both value and null) */
-	write_prof_threshold_hook(mock_prof_threshold_hook);
-	expect_ptr_eq(read_prof_threshold_hook(), mock_prof_threshold_hook,
-	    "Unexpected hook");
-
-	write_prof_threshold_hook(NULL);
-	expect_ptr_null(read_prof_threshold_hook(), "Hook was erased");
-
-	/* Reset everything before the test */
-	reset_test_config();
-	write_prof_threshold_hook(mock_prof_threshold_hook);
-
-	int err = mallctl("thread.peak.reset", NULL, NULL, NULL, 0);
-	expect_d_eq(err, 0, "Peak reset failed");
-
-	/* Note that since we run this test multiple times and we don't reset
-	   the allocation counter, each time we offset the callback by the
-	   amount we allocate over the threshold. */
-
-	/* A simple small allocation is not enough to trigger the callback */
-	allocate_chunks(1);
-	expect_u64_eq(hook_calls, 0, "Hook not called yet");
-
-	/* Enough allocations to trigger the callback */
-	allocate_chunks(ALLOC_ITERATIONS_IN_THRESHOLD);
-	expect_threshold_calls(1);
-
-	/* Enough allocations to trigger the callback again */
-	allocate_chunks(ALLOC_ITERATIONS_IN_THRESHOLD);
-	expect_threshold_calls(2);
-}
-TEST_END
-
-int
-main(void) {
-	return test(test_prof_threshold_hook);
-}
diff --git a/test/unit/prof_threshold_small.c b/test/unit/prof_threshold_small.c
deleted file mode 100644
index 67f444b1..00000000
--- a/test/unit/prof_threshold_small.c
+++ /dev/null
@@ -1,2 +0,0 @@
-#include "test/jemalloc_test.h"
-#include "prof_threshold.c"
diff --git a/test/unit/prof_threshold_small.sh b/test/unit/prof_threshold_small.sh
deleted file mode 100644
index 62726069..00000000
--- a/test/unit/prof_threshold_small.sh
+++ /dev/null
@@ -1 +0,0 @@
-export MALLOC_CONF="experimental_lg_prof_threshold:22"

From 0fa27fd28fd75fc3305d61c742ed028c5b874231 Mon Sep 17 00:00:00 2001
From: Tony Printezis <printezis@fb.com>
Date: Thu, 19 Feb 2026 12:42:52 -0800
Subject: [PATCH 358/395] Run single subtest from a test file

Add mechanism to be able to select a test to run from a test file. The test harness will read the JEMALLOC_TEST_NAME env and, if set, it will only run subtests with that name.
---
 test/include/test/test.h | 11 +++++++----
 test/src/test.c          | 18 ++++++++++++++++--
 2 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/test/include/test/test.h b/test/include/test/test.h
index 025c167d..79f47e98 100644
--- a/test/include/test/test.h
+++ b/test/include/test/test.h
@@ -520,12 +520,15 @@ typedef void(test_t)(void);
 
 #define TEST_BEGIN(f)                                                          \
 	static void f(void) {                                                  \
-		p_test_init(#f);
+		const bool skip_test = p_test_init(#f);                        \
+		if (skip_test) {                                               \
+			goto label_test_end;                                   \
+		}
 
 #define TEST_END                                                               \
 	goto label_test_end;                                                   \
 	label_test_end:                                                        \
-	p_test_fini();                                                         \
+	p_test_fini(skip_test);                                                \
 	}
 
 #define test(...) p_test(__VA_ARGS__, NULL)
@@ -552,6 +555,6 @@ void test_fail(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2);
 test_status_t p_test(test_t *t, ...);
 test_status_t p_test_no_reentrancy(test_t *t, ...);
 test_status_t p_test_no_malloc_init(test_t *t, ...);
-void          p_test_init(const char *name);
-void          p_test_fini(void);
+bool          p_test_init(const char *name);
+void          p_test_fini(bool skip_test);
 void p_test_fail(bool may_abort, const char *prefix, const char *message);
diff --git a/test/src/test.c b/test/src/test.c
index 6eb84338..e5e33ae6 100644
--- a/test/src/test.c
+++ b/test/src/test.c
@@ -6,6 +6,7 @@ static unsigned      test_count = 0;
 static test_status_t test_counts[test_status_count] = {0, 0, 0};
 static test_status_t test_status = test_status_pass;
 static const char   *test_name = "";
+static const char   *selected_test_name = NULL;
 
 /* Reentrancy testing helpers. */
 
@@ -100,15 +101,26 @@ test_status_string(test_status_t current_status) {
 	}
 }
 
-void
+bool
 p_test_init(const char *name) {
+	if (selected_test_name != NULL && strcmp(selected_test_name, name)) {
+		/* skip test */
+		return true;
+	}
+
 	test_count++;
 	test_status = test_status_pass;
 	test_name = name;
+
+	return false;
 }
 
 void
-p_test_fini(void) {
+p_test_fini(bool skip_test) {
+	if (skip_test) {
+		return;
+	}
+
 	test_counts[test_status]++;
 	malloc_printf("%s (%s): %s\n", test_name, reentrancy_t_str(reentrancy),
 	    test_status_string(test_status));
@@ -130,6 +142,8 @@ check_global_slow(test_status_t *status) {
 
 static test_status_t
 p_test_impl(bool do_malloc_init, bool do_reentrant, test_t *t, va_list ap) {
+	selected_test_name = getenv("JEMALLOC_TEST_NAME");
+
 	test_status_t ret;
 
 	if (do_malloc_init) {

From a10ef3e1f1c7593fb1cb211329e02c542af14694 Mon Sep 17 00:00:00 2001
From: Yuxuan Chen <ych@meta.com>
Date: Tue, 24 Feb 2026 18:12:56 -0800
Subject: [PATCH 359/395] configure: add --with-cxx-stdlib option

When C++ support is enabled, configure unconditionally probes
`-lstdc++` and keeps it in LIBS if the link test succeeds. On
platforms using libc++, this probe can succeed at compile time (if
libstdc++ headers/libraries happen to be installed) but then cause
runtime failures when configure tries to execute test binaries
because `libstdc++.so.6` isn't actually available.

Add a `--with-cxx-stdlib=<libstdc++|libcxx>` option that lets the
build system specify which C++ standard library to link. When given,
the probe is skipped and the specified library is linked directly.
When not given, the original probe behavior is preserved.
---
 configure.ac | 30 +++++++++++++++++++++++++-----
 1 file changed, 25 insertions(+), 5 deletions(-)

diff --git a/configure.ac b/configure.ac
index ae206a19..e57d0667 100644
--- a/configure.ac
+++ b/configure.ac
@@ -324,6 +324,15 @@ fi
 ,
 enable_cxx="1"
 )
+AC_ARG_WITH([cxx_stdlib],
+  [AS_HELP_STRING([--with-cxx-stdlib=<libstdc++|libcxx>],
+  [Specify the C++ standard library to link (default: probe for libstdc++)])],
+  [case "${with_cxx_stdlib}" in
+    libstdc++|libcxx) ;;
+    *) AC_MSG_ERROR([bad value ${with_cxx_stdlib} for --with-cxx-stdlib]) ;;
+  esac],
+  [with_cxx_stdlib=""]
+)
 if test "x$enable_cxx" = "x1" ; then
   dnl Require at least c++14, which is the first version to support sized
   dnl deallocation.  C++ support is not compiled otherwise.
@@ -338,17 +347,28 @@ if test "x$enable_cxx" = "x1" ; then
     JE_CXXFLAGS_ADD([-g3])
 
     SAVED_LIBS="${LIBS}"
-    JE_APPEND_VS(LIBS, -lstdc++)
-    JE_COMPILABLE([libstdc++ linkage], [
+    case "${with_cxx_stdlib}" in
+      libstdc++)
+        JE_APPEND_VS(LIBS, -lstdc++)
+        ;;
+      libcxx)
+        JE_APPEND_VS(LIBS, -lc++)
+        ;;
+      *)
+        dnl Probe for libstdc++ (the default when --with-cxx-stdlib is not given).
+        JE_APPEND_VS(LIBS, -lstdc++)
+        JE_COMPILABLE([libstdc++ linkage], [
 #include <stdlib.h>
 ], [[
 	int *arr = (int *)malloc(sizeof(int) * 42);
 	if (arr == NULL)
 		return 1;
 ]], [je_cv_libstdcxx])
-    if test "x${je_cv_libstdcxx}" = "xno" ; then
-      LIBS="${SAVED_LIBS}"
-    fi
+        if test "x${je_cv_libstdcxx}" = "xno" ; then
+          LIBS="${SAVED_LIBS}"
+        fi
+        ;;
+    esac
   else
     enable_cxx="0"
   fi

From 79cc7dcc827bb506f5be0345df2a7ce356b84165 Mon Sep 17 00:00:00 2001
From: Carl Shapiro <cshapiro@meta.com>
Date: Wed, 25 Feb 2026 13:00:42 -0800
Subject: [PATCH 360/395] Guard os_page_id against a NULL address

While undocumented, the prctl system call will set errno to ENOMEM
when passed NULL as an address.  Under that condition, an assertion
that check for EINVAL as the only possible errno value will fail.  To
avoid the assertion failure, this change skips the call to os_page_id
when address is NULL.  NULL can only occur after mmap fails in which
case there is no mapping to name.
---
 src/pages.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/pages.c b/src/pages.c
index e7766fcc..2a4f0093 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -113,8 +113,12 @@ os_page_id(void *addr, size_t size, const char *name) {
 	 * While parsing `/proc/<pid>/maps` file, the block could appear as
 	 * 7f4836000000-7f4836800000 rw-p 00000000 00:00 0 [anon:jemalloc_pg_overcommit]`
 	 */
-	return prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, (uintptr_t)addr, size,
+	int n;
+	assert(addr != NULL);
+	n = prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, (uintptr_t)addr, size,
 	    (uintptr_t)name);
+	assert(n == 0 || (n == -1 && get_errno() == EINVAL));
+	return n;
 #	else
 	return 0;
 #	endif
@@ -187,9 +191,10 @@ os_pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
 	assert(ret == NULL || (addr == NULL && ret != addr)
 	    || (addr != NULL && ret == addr));
 #ifdef JEMALLOC_PAGEID
-	int n = os_page_id(ret, size,
-	    os_overcommits ? "jemalloc_pg_overcommit" : "jemalloc_pg");
-	assert(n == 0 || (n == -1 && get_errno() == EINVAL));
+	if (ret != NULL) {
+		os_page_id(ret, size,
+		    os_overcommits ? "jemalloc_pg_overcommit" : "jemalloc_pg");
+	}
 #endif
 	return ret;
 }

From 12b33ed8f1a776ea36a5bafa14c65461b9efa64d Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@meta.com>
Date: Wed, 18 Feb 2026 17:39:57 -0800
Subject: [PATCH 361/395] Fix wrong mutex stats in json-formatted malloc stats

During mutex stats emit, derived counters are not emitted for json.
Yet the array indexing counter should still be increased to skip
derived elements in the output, which was not. This commit fixes it.
---
 src/stats.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/stats.c b/src/stats.c
index be70a6fc..22b412bd 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -260,10 +260,10 @@ mutex_stats_emit(emitter_t *emitter, emitter_row_t *row,
 #define OP(counter, type, human, derived, base_counter)                        \
 	if (!derived) {                                                        \
 		col = &col_##type[k_##type];                                   \
-		++k_##type;                                                    \
 		emitter_json_kv(emitter, #counter, EMITTER_TYPE_##type,        \
 		    (const void *)&col->bool_val);                             \
-	}
+	}                                                                      \
+	++k_##type;
 	MUTEX_PROF_COUNTERS;
 #undef OP
 #undef EMITTER_TYPE_uint32_t

From c73ab1c2ff9c47ad56c2d550b7481bbc80119bcb Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@meta.com>
Date: Sat, 21 Feb 2026 11:36:55 -0800
Subject: [PATCH 362/395] Add a test to check the output in JSON-based stats is
 consistent with mallctl results.

---
 Makefile.in            |   1 +
 test/unit/json_stats.c | 243 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 244 insertions(+)
 create mode 100644 test/unit/json_stats.c

diff --git a/Makefile.in b/Makefile.in
index 9db36530..f916ad71 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -240,6 +240,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/junk.c \
 	$(srcroot)test/unit/junk_alloc.c \
 	$(srcroot)test/unit/junk_free.c \
+	$(srcroot)test/unit/json_stats.c \
 	$(srcroot)test/unit/log.c \
 	$(srcroot)test/unit/mallctl.c \
 	$(srcroot)test/unit/malloc_conf_2.c \
diff --git a/test/unit/json_stats.c b/test/unit/json_stats.c
new file mode 100644
index 00000000..ea8a170b
--- /dev/null
+++ b/test/unit/json_stats.c
@@ -0,0 +1,243 @@
+#include "test/jemalloc_test.h"
+
+typedef struct {
+	char  *buf;
+	size_t len;
+	size_t capacity;
+} stats_buf_t;
+
+static void
+stats_buf_init(stats_buf_t *sbuf) {
+	/* 1MB buffer should be enough since per-arena stats are omitted. */
+	sbuf->capacity = 1 << 20;
+	sbuf->buf = mallocx(sbuf->capacity, MALLOCX_TCACHE_NONE);
+	assert_ptr_not_null(sbuf->buf, "Failed to allocate stats buffer");
+	sbuf->len = 0;
+	sbuf->buf[0] = '\0';
+}
+
+static void
+stats_buf_fini(stats_buf_t *sbuf) {
+	dallocx(sbuf->buf, MALLOCX_TCACHE_NONE);
+}
+
+static void
+stats_buf_write_cb(void *opaque, const char *str) {
+	stats_buf_t *sbuf = (stats_buf_t *)opaque;
+	size_t       slen = strlen(str);
+
+	if (sbuf->len + slen + 1 > sbuf->capacity) {
+		return;
+	}
+	memcpy(&sbuf->buf[sbuf->len], str, slen + 1);
+	sbuf->len += slen;
+}
+
+static bool
+json_extract_uint64(const char *json, const char *key, uint64_t *result) {
+	char   search_key[128];
+	size_t key_len;
+
+	key_len = snprintf(search_key, sizeof(search_key), "\"%s\":", key);
+	if (key_len >= sizeof(search_key)) {
+		return true;
+	}
+
+	const char *pos = strstr(json, search_key);
+	if (pos == NULL) {
+		return true;
+	}
+
+	pos += key_len;
+	while (*pos == ' ' || *pos == '\t' || *pos == '\n') {
+		pos++;
+	}
+
+	char    *endptr;
+	uint64_t value = strtoull(pos, &endptr, 10);
+	if (endptr == pos) {
+		return true;
+	}
+
+	*result = value;
+	return false;
+}
+
+static const char *
+json_find_section(const char *json, const char *section_name) {
+	char   search_pattern[128];
+	size_t pattern_len;
+
+	pattern_len = snprintf(
+	    search_pattern, sizeof(search_pattern), "\"%s\":", section_name);
+	if (pattern_len >= sizeof(search_pattern)) {
+		return NULL;
+	}
+
+	return strstr(json, search_pattern);
+}
+
+static void
+verify_mutex_json(const char *mutexes_section, const char *mallctl_prefix,
+    const char *mutex_name) {
+	char   mallctl_path[128];
+	size_t sz;
+
+	const char *mutex_section = json_find_section(
+	    mutexes_section, mutex_name);
+	expect_ptr_not_null(mutex_section,
+	    "Could not find %s mutex section in JSON", mutex_name);
+
+	uint64_t ctl_num_ops, ctl_num_wait, ctl_num_spin_acq;
+	uint64_t ctl_num_owner_switch, ctl_total_wait_time, ctl_max_wait_time;
+	uint32_t ctl_max_num_thds;
+
+	sz = sizeof(uint64_t);
+	snprintf(mallctl_path, sizeof(mallctl_path), "%s.%s.num_ops",
+	    mallctl_prefix, mutex_name);
+	expect_d_eq(mallctl(mallctl_path, &ctl_num_ops, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure for %s", mallctl_path);
+
+	snprintf(mallctl_path, sizeof(mallctl_path), "%s.%s.num_wait",
+	    mallctl_prefix, mutex_name);
+	expect_d_eq(mallctl(mallctl_path, &ctl_num_wait, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure for %s", mallctl_path);
+
+	snprintf(mallctl_path, sizeof(mallctl_path), "%s.%s.num_spin_acq",
+	    mallctl_prefix, mutex_name);
+	expect_d_eq(mallctl(mallctl_path, &ctl_num_spin_acq, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure for %s", mallctl_path);
+
+	snprintf(mallctl_path, sizeof(mallctl_path), "%s.%s.num_owner_switch",
+	    mallctl_prefix, mutex_name);
+	expect_d_eq(mallctl(mallctl_path, &ctl_num_owner_switch, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure for %s", mallctl_path);
+
+	snprintf(mallctl_path, sizeof(mallctl_path), "%s.%s.total_wait_time",
+	    mallctl_prefix, mutex_name);
+	expect_d_eq(mallctl(mallctl_path, &ctl_total_wait_time, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure for %s", mallctl_path);
+
+	snprintf(mallctl_path, sizeof(mallctl_path), "%s.%s.max_wait_time",
+	    mallctl_prefix, mutex_name);
+	expect_d_eq(mallctl(mallctl_path, &ctl_max_wait_time, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure for %s", mallctl_path);
+
+	sz = sizeof(uint32_t);
+	snprintf(mallctl_path, sizeof(mallctl_path), "%s.%s.max_num_thds",
+	    mallctl_prefix, mutex_name);
+	expect_d_eq(mallctl(mallctl_path, &ctl_max_num_thds, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure for %s", mallctl_path);
+
+	uint64_t json_num_ops, json_num_wait, json_num_spin_acq;
+	uint64_t json_num_owner_switch, json_total_wait_time,
+	    json_max_wait_time;
+	uint64_t json_max_num_thds;
+
+	expect_false(
+	    json_extract_uint64(mutex_section, "num_ops", &json_num_ops),
+	    "%s: num_ops not found in JSON", mutex_name);
+	expect_false(
+	    json_extract_uint64(mutex_section, "num_wait", &json_num_wait),
+	    "%s: num_wait not found in JSON", mutex_name);
+	expect_false(json_extract_uint64(
+	                 mutex_section, "num_spin_acq", &json_num_spin_acq),
+	    "%s: num_spin_acq not found in JSON", mutex_name);
+	expect_false(json_extract_uint64(mutex_section, "num_owner_switch",
+	                 &json_num_owner_switch),
+	    "%s: num_owner_switch not found in JSON", mutex_name);
+	expect_false(json_extract_uint64(mutex_section, "total_wait_time",
+	                 &json_total_wait_time),
+	    "%s: total_wait_time not found in JSON", mutex_name);
+	expect_false(json_extract_uint64(
+	                 mutex_section, "max_wait_time", &json_max_wait_time),
+	    "%s: max_wait_time not found in JSON", mutex_name);
+	expect_false(json_extract_uint64(
+	                 mutex_section, "max_num_thds", &json_max_num_thds),
+	    "%s: max_num_thds not found in JSON", mutex_name);
+
+	expect_u64_eq(json_num_ops, ctl_num_ops,
+	    "%s: JSON num_ops doesn't match mallctl", mutex_name);
+	expect_u64_eq(json_num_wait, ctl_num_wait,
+	    "%s: JSON num_wait doesn't match mallctl", mutex_name);
+	expect_u64_eq(json_num_spin_acq, ctl_num_spin_acq,
+	    "%s: JSON num_spin_acq doesn't match mallctl", mutex_name);
+	expect_u64_eq(json_num_owner_switch, ctl_num_owner_switch,
+	    "%s: JSON num_owner_switch doesn't match mallctl", mutex_name);
+	expect_u64_eq(json_total_wait_time, ctl_total_wait_time,
+	    "%s: JSON total_wait_time doesn't match mallctl", mutex_name);
+	expect_u64_eq(json_max_wait_time, ctl_max_wait_time,
+	    "%s: JSON max_wait_time doesn't match mallctl", mutex_name);
+	expect_u32_eq((uint32_t)json_max_num_thds, ctl_max_num_thds,
+	    "%s: JSON max_num_thds doesn't match mallctl", mutex_name);
+}
+
+static const char  *global_mutex_names[] = {"background_thread",
+     "max_per_bg_thd", "ctl", "prof", "prof_thds_data", "prof_dump",
+     "prof_recent_alloc", "prof_recent_dump", "prof_stats"};
+static const size_t num_global_mutexes = sizeof(global_mutex_names)
+    / sizeof(global_mutex_names[0]);
+
+static const char  *arena_mutex_names[] = {"large", "extent_avail",
+     "extents_dirty", "extents_muzzy", "extents_retained", "decay_dirty",
+     "decay_muzzy", "base", "tcache_list", "hpa_shard", "hpa_shard_grow",
+     "hpa_sec"};
+static const size_t num_arena_mutexes = sizeof(arena_mutex_names)
+    / sizeof(arena_mutex_names[0]);
+
+TEST_BEGIN(test_json_stats_mutexes) {
+	test_skip_if(!config_stats);
+
+	uint64_t epoch;
+	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+	    0, "Unexpected mallctl() failure");
+
+	stats_buf_t sbuf;
+	stats_buf_init(&sbuf);
+	/* "J" for JSON format, "a" to omit per-arena stats. */
+	malloc_stats_print(stats_buf_write_cb, &sbuf, "Ja");
+
+	/* Verify global mutexes under stats.mutexes. */
+	const char *global_mutexes_section = json_find_section(
+	    sbuf.buf, "mutexes");
+	expect_ptr_not_null(global_mutexes_section,
+	    "Could not find global mutexes section in JSON output");
+
+	for (size_t i = 0; i < num_global_mutexes; i++) {
+		verify_mutex_json(global_mutexes_section, "stats.mutexes",
+		    global_mutex_names[i]);
+	}
+
+	/* Verify arena mutexes under stats.arenas.merged.mutexes. */
+	const char *arenas_section = json_find_section(
+	    sbuf.buf, "stats.arenas");
+	expect_ptr_not_null(arenas_section,
+	    "Could not find stats.arenas section in JSON output");
+
+	const char *merged_section = json_find_section(
+	    arenas_section, "merged");
+	expect_ptr_not_null(
+	    merged_section, "Could not find merged section in JSON output");
+
+	const char *arena_mutexes_section = json_find_section(
+	    merged_section, "mutexes");
+	expect_ptr_not_null(arena_mutexes_section,
+	    "Could not find arena mutexes section in JSON output");
+
+	for (size_t i = 0; i < num_arena_mutexes; i++) {
+		/*
+		 * MALLCTL_ARENAS_ALL is 4096 representing all arenas in
+		 * mallctl queries.
+		 */
+		verify_mutex_json(arena_mutexes_section,
+		    "stats.arenas.4096.mutexes", arena_mutex_names[i]);
+	}
+
+	stats_buf_fini(&sbuf);
+}
+TEST_END
+
+int
+main(void) {
+	return test(test_json_stats_mutexes);
+}

From 1cc563f531ae26ffa17f7afb3568cf773d80550a Mon Sep 17 00:00:00 2001
From: Carl Shapiro <cshapiro@meta.com>
Date: Mon, 23 Feb 2026 23:31:12 -0800
Subject: [PATCH 363/395] Move bin functions from arena.c to bin.c

This is a clean-up change that gives the bin functions implemented in
the area code a prefix of bin_ and moves them into the bin code.

To further decouple the bin code from the arena code, bin functions
that had taken an arena_t to check arena_is_auto now take an is_auto
parameter instead.
---
 include/jemalloc/internal/arena_externs.h   |   6 -
 include/jemalloc/internal/arena_inlines_b.h |   8 +-
 include/jemalloc/internal/bin.h             |  38 +++
 src/arena.c                                 | 308 ++------------------
 src/bin.c                                   | 263 +++++++++++++++++
 src/large.c                                 |   4 +-
 src/tcache.c                                |   4 +-
 7 files changed, 326 insertions(+), 305 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 1d004635..39794b3e 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -79,10 +79,6 @@ void arena_dalloc_promoted(
     tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path);
 void arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, edata_t *slab);
 
-void arena_dalloc_bin_locked_handle_newly_empty(
-    tsdn_t *tsdn, arena_t *arena, edata_t *slab, bin_t *bin);
-void arena_dalloc_bin_locked_handle_newly_nonempty(
-    tsdn_t *tsdn, arena_t *arena, edata_t *slab, bin_t *bin);
 void  arena_dalloc_small(tsdn_t *tsdn, void *ptr);
 void  arena_ptr_array_flush(tsd_t *tsd, szind_t binind,
      cache_bin_ptr_array_t *arr, unsigned nflush, bool small,
@@ -111,8 +107,6 @@ void     arena_nthreads_dec(arena_t *arena, bool internal);
 arena_t *arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config);
 bool     arena_init_huge(tsdn_t *tsdn, arena_t *a0);
 arena_t *arena_choose_huge(tsd_t *tsd);
-bin_t   *arena_bin_choose(
-      tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned *binshard);
 size_t arena_fill_small_fresh(tsdn_t *tsdn, arena_t *arena, szind_t binind,
     void **ptrs, size_t nfill, bool zero);
 bool   arena_boot(sc_data_t *sc_data, base_t *base, bool hpa);
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 6276deaa..a0caf586 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -609,12 +609,12 @@ arena_dalloc_bin_locked_step(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
 
 	unsigned nfree = edata_nfree_get(slab);
 	if (nfree == bin_info->nregs) {
-		arena_dalloc_bin_locked_handle_newly_empty(
-		    tsdn, arena, slab, bin);
+		bin_dalloc_locked_handle_newly_empty(
+		    tsdn, arena_is_auto(arena), slab, bin);
 		return true;
 	} else if (nfree == 1 && slab != bin->slabcur) {
-		arena_dalloc_bin_locked_handle_newly_nonempty(
-		    tsdn, arena, slab, bin);
+		bin_dalloc_locked_handle_newly_nonempty(
+		    tsdn, arena_is_auto(arena), slab, bin);
 	}
 	return false;
 }
diff --git a/include/jemalloc/internal/bin.h b/include/jemalloc/internal/bin.h
index 05a2f845..51d4c89e 100644
--- a/include/jemalloc/internal/bin.h
+++ b/include/jemalloc/internal/bin.h
@@ -2,6 +2,7 @@
 #define JEMALLOC_INTERNAL_BIN_H
 
 #include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/bin_info.h"
 #include "jemalloc/internal/bin_stats.h"
 #include "jemalloc/internal/bin_types.h"
 #include "jemalloc/internal/edata.h"
@@ -61,6 +62,43 @@ void bin_prefork(tsdn_t *tsdn, bin_t *bin);
 void bin_postfork_parent(tsdn_t *tsdn, bin_t *bin);
 void bin_postfork_child(tsdn_t *tsdn, bin_t *bin);
 
+/* Slab region allocation. */
+void *bin_slab_reg_alloc(edata_t *slab, const bin_info_t *bin_info);
+void  bin_slab_reg_alloc_batch(
+     edata_t *slab, const bin_info_t *bin_info, unsigned cnt, void **ptrs);
+
+/* Slab list management. */
+void     bin_slabs_nonfull_insert(bin_t *bin, edata_t *slab);
+void     bin_slabs_nonfull_remove(bin_t *bin, edata_t *slab);
+edata_t *bin_slabs_nonfull_tryget(bin_t *bin);
+void     bin_slabs_full_insert(bool is_auto, bin_t *bin, edata_t *slab);
+void     bin_slabs_full_remove(bool is_auto, bin_t *bin, edata_t *slab);
+
+/* Slab association / demotion. */
+void bin_dissociate_slab(bool is_auto, edata_t *slab, bin_t *bin);
+void bin_lower_slab(tsdn_t *tsdn, bool is_auto, edata_t *slab, bin_t *bin);
+
+/* Deallocation helpers (called under bin lock). */
+void bin_dalloc_slab_prepare(tsdn_t *tsdn, edata_t *slab, bin_t *bin);
+void bin_dalloc_locked_handle_newly_empty(
+    tsdn_t *tsdn, bool is_auto, edata_t *slab, bin_t *bin);
+void bin_dalloc_locked_handle_newly_nonempty(
+    tsdn_t *tsdn, bool is_auto, edata_t *slab, bin_t *bin);
+
+/* Slabcur refill and allocation. */
+void  bin_refill_slabcur_with_fresh_slab(tsdn_t *tsdn, bin_t *bin,
+    szind_t binind, edata_t *fresh_slab);
+void *bin_malloc_with_fresh_slab(tsdn_t *tsdn, bin_t *bin,
+    szind_t binind, edata_t *fresh_slab);
+bool  bin_refill_slabcur_no_fresh_slab(tsdn_t *tsdn, bool is_auto,
+    bin_t *bin);
+void *bin_malloc_no_fresh_slab(tsdn_t *tsdn, bool is_auto, bin_t *bin,
+    szind_t binind);
+
+/* Bin selection. */
+bin_t *bin_choose(tsdn_t *tsdn, arena_t *arena, szind_t binind,
+    unsigned *binshard_p);
+
 /* Stats. */
 static inline void
 bin_stats_merge(tsdn_t *tsdn, bin_stats_data_t *dst_bin_stats, bin_t *bin) {
diff --git a/src/arena.c b/src/arena.c
index 5b144c63..338cc330 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -66,8 +66,6 @@ const arena_config_t arena_config_default = {
 
 static bool arena_decay_dirty(
     tsdn_t *tsdn, arena_t *arena, bool is_background_thread, bool all);
-static void arena_bin_lower_slab(
-    tsdn_t *tsdn, arena_t *arena, edata_t *slab, bin_t *bin);
 static void arena_maybe_do_deferred_work(
     tsdn_t *tsdn, arena_t *arena, decay_t *decay, size_t npages_new);
 
@@ -241,71 +239,6 @@ arena_handle_deferred_work(tsdn_t *tsdn, arena_t *arena) {
 	arena_background_thread_inactivity_check(tsdn, arena, false);
 }
 
-static void *
-arena_slab_reg_alloc(edata_t *slab, const bin_info_t *bin_info) {
-	void        *ret;
-	slab_data_t *slab_data = edata_slab_data_get(slab);
-	size_t       regind;
-
-	assert(edata_nfree_get(slab) > 0);
-	assert(!bitmap_full(slab_data->bitmap, &bin_info->bitmap_info));
-
-	regind = bitmap_sfu(slab_data->bitmap, &bin_info->bitmap_info);
-	ret = (void *)((byte_t *)edata_addr_get(slab)
-	    + (uintptr_t)(bin_info->reg_size * regind));
-	edata_nfree_dec(slab);
-	return ret;
-}
-
-static void
-arena_slab_reg_alloc_batch(
-    edata_t *slab, const bin_info_t *bin_info, unsigned cnt, void **ptrs) {
-	slab_data_t *slab_data = edata_slab_data_get(slab);
-
-	assert(edata_nfree_get(slab) >= cnt);
-	assert(!bitmap_full(slab_data->bitmap, &bin_info->bitmap_info));
-
-#if (!defined JEMALLOC_INTERNAL_POPCOUNTL) || (defined BITMAP_USE_TREE)
-	for (unsigned i = 0; i < cnt; i++) {
-		size_t regind = bitmap_sfu(
-		    slab_data->bitmap, &bin_info->bitmap_info);
-		*(ptrs + i) = (void *)((uintptr_t)edata_addr_get(slab)
-		    + (uintptr_t)(bin_info->reg_size * regind));
-	}
-#else
-	unsigned group = 0;
-	bitmap_t g = slab_data->bitmap[group];
-	unsigned i = 0;
-	while (i < cnt) {
-		while (g == 0) {
-			g = slab_data->bitmap[++group];
-		}
-		size_t shift = group << LG_BITMAP_GROUP_NBITS;
-		size_t pop = popcount_lu(g);
-		if (pop > (cnt - i)) {
-			pop = cnt - i;
-		}
-
-		/*
-		 * Load from memory locations only once, outside the
-		 * hot loop below.
-		 */
-		uintptr_t base = (uintptr_t)edata_addr_get(slab);
-		uintptr_t regsize = (uintptr_t)bin_info->reg_size;
-		while (pop--) {
-			size_t bit = cfs_lu(&g);
-			size_t regind = shift + bit;
-			/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
-			*(ptrs + i) = (void *)(base + regsize * regind);
-
-			i++;
-		}
-		slab_data->bitmap[group] = g;
-	}
-#endif
-	edata_nfree_sub(slab, cnt);
-}
-
 static void
 arena_large_malloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) {
 	cassert(config_stats);
@@ -622,58 +555,6 @@ arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, edata_t *slab) {
 	}
 }
 
-static void
-arena_bin_slabs_nonfull_insert(bin_t *bin, edata_t *slab) {
-	assert(edata_nfree_get(slab) > 0);
-	edata_heap_insert(&bin->slabs_nonfull, slab);
-	if (config_stats) {
-		bin->stats.nonfull_slabs++;
-	}
-}
-
-static void
-arena_bin_slabs_nonfull_remove(bin_t *bin, edata_t *slab) {
-	edata_heap_remove(&bin->slabs_nonfull, slab);
-	if (config_stats) {
-		bin->stats.nonfull_slabs--;
-	}
-}
-
-static edata_t *
-arena_bin_slabs_nonfull_tryget(bin_t *bin) {
-	edata_t *slab = edata_heap_remove_first(&bin->slabs_nonfull);
-	if (slab == NULL) {
-		return NULL;
-	}
-	if (config_stats) {
-		bin->stats.reslabs++;
-		bin->stats.nonfull_slabs--;
-	}
-	return slab;
-}
-
-static void
-arena_bin_slabs_full_insert(arena_t *arena, bin_t *bin, edata_t *slab) {
-	assert(edata_nfree_get(slab) == 0);
-	/*
-	 *  Tracking extents is required by arena_reset, which is not allowed
-	 *  for auto arenas.  Bypass this step to avoid touching the edata
-	 *  linkage (often results in cache misses) for auto arenas.
-	 */
-	if (arena_is_auto(arena)) {
-		return;
-	}
-	edata_list_active_append(&bin->slabs_full, slab);
-}
-
-static void
-arena_bin_slabs_full_remove(arena_t *arena, bin_t *bin, edata_t *slab) {
-	if (arena_is_auto(arena)) {
-		return;
-	}
-	edata_list_active_remove(&bin->slabs_full, slab);
-}
-
 static void
 arena_bin_reset(tsd_t *tsd, arena_t *arena, bin_t *bin) {
 	edata_t *slab;
@@ -694,7 +575,7 @@ arena_bin_reset(tsd_t *tsd, arena_t *arena, bin_t *bin) {
 	}
 	for (slab = edata_list_active_first(&bin->slabs_full); slab != NULL;
 	    slab = edata_list_active_first(&bin->slabs_full)) {
-		arena_bin_slabs_full_remove(arena, bin, slab);
+		bin_slabs_full_remove(false, bin, slab);
 		malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
 		arena_slab_dalloc(tsd_tsdn(tsd), arena, slab);
 		malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
@@ -985,73 +866,6 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 	return slab;
 }
 
-/*
- * Before attempting the _with_fresh_slab approaches below, the _no_fresh_slab
- * variants (i.e. through slabcur and nonfull) must be tried first.
- */
-static void
-arena_bin_refill_slabcur_with_fresh_slab(tsdn_t *tsdn, arena_t *arena,
-    bin_t *bin, szind_t binind, edata_t *fresh_slab) {
-	malloc_mutex_assert_owner(tsdn, &bin->lock);
-	/* Only called after slabcur and nonfull both failed. */
-	assert(bin->slabcur == NULL);
-	assert(edata_heap_first(&bin->slabs_nonfull) == NULL);
-	assert(fresh_slab != NULL);
-
-	/* A new slab from arena_slab_alloc() */
-	assert(edata_nfree_get(fresh_slab) == bin_infos[binind].nregs);
-	if (config_stats) {
-		bin->stats.nslabs++;
-		bin->stats.curslabs++;
-	}
-	bin->slabcur = fresh_slab;
-}
-
-/* Refill slabcur and then alloc using the fresh slab */
-static void *
-arena_bin_malloc_with_fresh_slab(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
-    szind_t binind, edata_t *fresh_slab) {
-	malloc_mutex_assert_owner(tsdn, &bin->lock);
-	arena_bin_refill_slabcur_with_fresh_slab(
-	    tsdn, arena, bin, binind, fresh_slab);
-
-	return arena_slab_reg_alloc(bin->slabcur, &bin_infos[binind]);
-}
-
-static bool
-arena_bin_refill_slabcur_no_fresh_slab(
-    tsdn_t *tsdn, arena_t *arena, bin_t *bin) {
-	malloc_mutex_assert_owner(tsdn, &bin->lock);
-	/* Only called after arena_slab_reg_alloc[_batch] failed. */
-	assert(bin->slabcur == NULL || edata_nfree_get(bin->slabcur) == 0);
-
-	if (bin->slabcur != NULL) {
-		arena_bin_slabs_full_insert(arena, bin, bin->slabcur);
-	}
-
-	/* Look for a usable slab. */
-	bin->slabcur = arena_bin_slabs_nonfull_tryget(bin);
-	assert(bin->slabcur == NULL || edata_nfree_get(bin->slabcur) > 0);
-
-	return (bin->slabcur == NULL);
-}
-
-bin_t *
-arena_bin_choose(
-    tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned *binshard_p) {
-	unsigned binshard;
-	if (tsdn_null(tsdn) || tsd_arena_get(tsdn_tsd(tsdn)) == NULL) {
-		binshard = 0;
-	} else {
-		binshard = tsd_binshardsp_get(tsdn_tsd(tsdn))->binshard[binind];
-	}
-	assert(binshard < bin_infos[binind].n_shards);
-	if (binshard_p != NULL) {
-		*binshard_p = binshard;
-	}
-	return arena_get_bin(arena, binind, binshard);
-}
-
 cache_bin_sz_t
 arena_ptr_array_fill_small(tsdn_t *tsdn, arena_t *arena, szind_t binind,
     cache_bin_ptr_array_t *arr, const cache_bin_sz_t nfill_min,
@@ -1088,9 +902,10 @@ arena_ptr_array_fill_small(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 	bool           made_progress = true;
 	edata_t       *fresh_slab = NULL;
 	bool           alloc_and_retry = false;
+	bool           is_auto = arena_is_auto(arena);
 	cache_bin_sz_t filled = 0;
 	unsigned       binshard;
-	bin_t         *bin = arena_bin_choose(tsdn, arena, binind, &binshard);
+	bin_t         *bin = bin_choose(tsdn, arena, binind, &binshard);
 
 label_refill:
 	malloc_mutex_lock(tsdn, &bin->lock);
@@ -1109,22 +924,22 @@ label_refill:
 				cnt = nfill_min - filled;
 			}
 
-			arena_slab_reg_alloc_batch(
+			bin_slab_reg_alloc_batch(
 			    slabcur, bin_info, cnt, &arr->ptr[filled]);
 			made_progress = true;
 			filled += cnt;
 			continue;
 		}
 		/* Next try refilling slabcur from nonfull slabs. */
-		if (!arena_bin_refill_slabcur_no_fresh_slab(tsdn, arena, bin)) {
+		if (!bin_refill_slabcur_no_fresh_slab(tsdn, is_auto, bin)) {
 			assert(bin->slabcur != NULL);
 			continue;
 		}
 
 		/* Then see if a new slab was reserved already. */
 		if (fresh_slab != NULL) {
-			arena_bin_refill_slabcur_with_fresh_slab(
-			    tsdn, arena, bin, binind, fresh_slab);
+			bin_refill_slabcur_with_fresh_slab(
+			    tsdn, bin, binind, fresh_slab);
 			assert(bin->slabcur != NULL);
 			fresh_slab = NULL;
 			continue;
@@ -1193,7 +1008,7 @@ arena_fill_small_fresh(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 
 	const bool manual_arena = !arena_is_auto(arena);
 	unsigned   binshard;
-	bin_t     *bin = arena_bin_choose(tsdn, arena, binind, &binshard);
+	bin_t     *bin = bin_choose(tsdn, arena, binind, &binshard);
 
 	size_t              nslab = 0;
 	size_t              filled = 0;
@@ -1212,7 +1027,7 @@ arena_fill_small_fresh(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 			batch = nregs;
 		}
 		assert(batch > 0);
-		arena_slab_reg_alloc_batch(
+		bin_slab_reg_alloc_batch(
 		    slab, bin_info, (unsigned)batch, &ptrs[filled]);
 		assert(edata_addr_get(slab) == ptrs[filled]);
 		if (zero) {
@@ -1233,7 +1048,7 @@ arena_fill_small_fresh(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 	 * iff slab != NULL.
 	 */
 	if (slab != NULL) {
-		arena_bin_lower_slab(tsdn, arena, slab, bin);
+		bin_lower_slab(tsdn, !manual_arena, slab, bin);
 	}
 	if (manual_arena) {
 		edata_list_active_concat(&bin->slabs_full, &fulls);
@@ -1252,35 +1067,18 @@ arena_fill_small_fresh(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 	return filled;
 }
 
-/*
- * Without allocating a new slab, try arena_slab_reg_alloc() and re-fill
- * bin->slabcur if necessary.
- */
-static void *
-arena_bin_malloc_no_fresh_slab(
-    tsdn_t *tsdn, arena_t *arena, bin_t *bin, szind_t binind) {
-	malloc_mutex_assert_owner(tsdn, &bin->lock);
-	if (bin->slabcur == NULL || edata_nfree_get(bin->slabcur) == 0) {
-		if (arena_bin_refill_slabcur_no_fresh_slab(tsdn, arena, bin)) {
-			return NULL;
-		}
-	}
-
-	assert(bin->slabcur != NULL && edata_nfree_get(bin->slabcur) > 0);
-	return arena_slab_reg_alloc(bin->slabcur, &bin_infos[binind]);
-}
-
 static void *
 arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) {
 	assert(binind < SC_NBINS);
 	const bin_info_t *bin_info = &bin_infos[binind];
 	size_t            usize = sz_index2size(binind);
+	bool              is_auto = arena_is_auto(arena);
 	unsigned          binshard;
-	bin_t *bin = arena_bin_choose(tsdn, arena, binind, &binshard);
+	bin_t *bin = bin_choose(tsdn, arena, binind, &binshard);
 
 	malloc_mutex_lock(tsdn, &bin->lock);
 	edata_t *fresh_slab = NULL;
-	void    *ret = arena_bin_malloc_no_fresh_slab(tsdn, arena, bin, binind);
+	void    *ret = bin_malloc_no_fresh_slab(tsdn, is_auto, bin, binind);
 	if (ret == NULL) {
 		malloc_mutex_unlock(tsdn, &bin->lock);
 		/******************************/
@@ -1289,15 +1087,15 @@ arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) {
 		/********************************/
 		malloc_mutex_lock(tsdn, &bin->lock);
 		/* Retry since the lock was dropped. */
-		ret = arena_bin_malloc_no_fresh_slab(tsdn, arena, bin, binind);
+		ret = bin_malloc_no_fresh_slab(tsdn, is_auto, bin, binind);
 		if (ret == NULL) {
 			if (fresh_slab == NULL) {
 				/* OOM */
 				malloc_mutex_unlock(tsdn, &bin->lock);
 				return NULL;
 			}
-			ret = arena_bin_malloc_with_fresh_slab(
-			    tsdn, arena, bin, binind, fresh_slab);
+			ret = bin_malloc_with_fresh_slab(
+			    tsdn, bin, binind, fresh_slab);
 			fresh_slab = NULL;
 		}
 	}
@@ -1366,78 +1164,6 @@ arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 	}
 }
 
-static void
-arena_dissociate_bin_slab(arena_t *arena, edata_t *slab, bin_t *bin) {
-	/* Dissociate slab from bin. */
-	if (slab == bin->slabcur) {
-		bin->slabcur = NULL;
-	} else {
-		szind_t           binind = edata_szind_get(slab);
-		const bin_info_t *bin_info = &bin_infos[binind];
-
-		/*
-		 * The following block's conditional is necessary because if the
-		 * slab only contains one region, then it never gets inserted
-		 * into the non-full slabs heap.
-		 */
-		if (bin_info->nregs == 1) {
-			arena_bin_slabs_full_remove(arena, bin, slab);
-		} else {
-			arena_bin_slabs_nonfull_remove(bin, slab);
-		}
-	}
-}
-
-static void
-arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, edata_t *slab, bin_t *bin) {
-	assert(edata_nfree_get(slab) > 0);
-
-	/*
-	 * Make sure that if bin->slabcur is non-NULL, it refers to the
-	 * oldest/lowest non-full slab.  It is okay to NULL slabcur out rather
-	 * than proactively keeping it pointing at the oldest/lowest non-full
-	 * slab.
-	 */
-	if (bin->slabcur != NULL && edata_snad_comp(bin->slabcur, slab) > 0) {
-		/* Switch slabcur. */
-		if (edata_nfree_get(bin->slabcur) > 0) {
-			arena_bin_slabs_nonfull_insert(bin, bin->slabcur);
-		} else {
-			arena_bin_slabs_full_insert(arena, bin, bin->slabcur);
-		}
-		bin->slabcur = slab;
-		if (config_stats) {
-			bin->stats.reslabs++;
-		}
-	} else {
-		arena_bin_slabs_nonfull_insert(bin, slab);
-	}
-}
-
-static void
-arena_dalloc_bin_slab_prepare(tsdn_t *tsdn, edata_t *slab, bin_t *bin) {
-	malloc_mutex_assert_owner(tsdn, &bin->lock);
-
-	assert(slab != bin->slabcur);
-	if (config_stats) {
-		bin->stats.curslabs--;
-	}
-}
-
-void
-arena_dalloc_bin_locked_handle_newly_empty(
-    tsdn_t *tsdn, arena_t *arena, edata_t *slab, bin_t *bin) {
-	arena_dissociate_bin_slab(arena, slab, bin);
-	arena_dalloc_bin_slab_prepare(tsdn, slab, bin);
-}
-
-void
-arena_dalloc_bin_locked_handle_newly_nonempty(
-    tsdn_t *tsdn, arena_t *arena, edata_t *slab, bin_t *bin) {
-	arena_bin_slabs_full_remove(arena, bin, slab);
-	arena_bin_lower_slab(tsdn, arena, slab, bin);
-}
-
 static void
 arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, edata_t *edata, void *ptr) {
 	szind_t  binind = edata_szind_get(edata);
@@ -1637,7 +1363,7 @@ arena_ptr_array_flush_impl_small(tsdn_t *tsdn, szind_t binind,
 		 * thread's arena, so the stats didn't get merged.
 		 * Manually do so now.
 		 */
-		bin_t *bin = arena_bin_choose(tsdn, stats_arena, binind, NULL);
+		bin_t *bin = bin_choose(tsdn, stats_arena, binind, NULL);
 		malloc_mutex_lock(tsdn, &bin->lock);
 		bin->stats.nflushes++;
 		bin->stats.nrequests += (*merge_stats)->nrequests;
diff --git a/src/bin.c b/src/bin.c
index a11b108e..6bab4b22 100644
--- a/src/bin.c
+++ b/src/bin.c
@@ -67,3 +67,266 @@ void
 bin_postfork_child(tsdn_t *tsdn, bin_t *bin) {
 	malloc_mutex_postfork_child(tsdn, &bin->lock);
 }
+
+void *
+bin_slab_reg_alloc(edata_t *slab, const bin_info_t *bin_info) {
+	void        *ret;
+	slab_data_t *slab_data = edata_slab_data_get(slab);
+	size_t       regind;
+
+	assert(edata_nfree_get(slab) > 0);
+	assert(!bitmap_full(slab_data->bitmap, &bin_info->bitmap_info));
+
+	regind = bitmap_sfu(slab_data->bitmap, &bin_info->bitmap_info);
+	ret = (void *)((byte_t *)edata_addr_get(slab)
+	    + (uintptr_t)(bin_info->reg_size * regind));
+	edata_nfree_dec(slab);
+	return ret;
+}
+
+void
+bin_slab_reg_alloc_batch(
+    edata_t *slab, const bin_info_t *bin_info, unsigned cnt, void **ptrs) {
+	slab_data_t *slab_data = edata_slab_data_get(slab);
+
+	assert(edata_nfree_get(slab) >= cnt);
+	assert(!bitmap_full(slab_data->bitmap, &bin_info->bitmap_info));
+
+#if (!defined JEMALLOC_INTERNAL_POPCOUNTL) || (defined BITMAP_USE_TREE)
+	for (unsigned i = 0; i < cnt; i++) {
+		size_t regind = bitmap_sfu(
+		    slab_data->bitmap, &bin_info->bitmap_info);
+		*(ptrs + i) = (void *)((uintptr_t)edata_addr_get(slab)
+		    + (uintptr_t)(bin_info->reg_size * regind));
+	}
+#else
+	unsigned group = 0;
+	bitmap_t g = slab_data->bitmap[group];
+	unsigned i = 0;
+	while (i < cnt) {
+		while (g == 0) {
+			g = slab_data->bitmap[++group];
+		}
+		size_t shift = group << LG_BITMAP_GROUP_NBITS;
+		size_t pop = popcount_lu(g);
+		if (pop > (cnt - i)) {
+			pop = cnt - i;
+		}
+
+		/*
+		 * Load from memory locations only once, outside the
+		 * hot loop below.
+		 */
+		uintptr_t base = (uintptr_t)edata_addr_get(slab);
+		uintptr_t regsize = (uintptr_t)bin_info->reg_size;
+		while (pop--) {
+			size_t bit = cfs_lu(&g);
+			size_t regind = shift + bit;
+			/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
+			*(ptrs + i) = (void *)(base + regsize * regind);
+
+			i++;
+		}
+		slab_data->bitmap[group] = g;
+	}
+#endif
+	edata_nfree_sub(slab, cnt);
+}
+
+void
+bin_slabs_nonfull_insert(bin_t *bin, edata_t *slab) {
+	assert(edata_nfree_get(slab) > 0);
+	edata_heap_insert(&bin->slabs_nonfull, slab);
+	if (config_stats) {
+		bin->stats.nonfull_slabs++;
+	}
+}
+
+void
+bin_slabs_nonfull_remove(bin_t *bin, edata_t *slab) {
+	edata_heap_remove(&bin->slabs_nonfull, slab);
+	if (config_stats) {
+		bin->stats.nonfull_slabs--;
+	}
+}
+
+edata_t *
+bin_slabs_nonfull_tryget(bin_t *bin) {
+	edata_t *slab = edata_heap_remove_first(&bin->slabs_nonfull);
+	if (slab == NULL) {
+		return NULL;
+	}
+	if (config_stats) {
+		bin->stats.reslabs++;
+		bin->stats.nonfull_slabs--;
+	}
+	return slab;
+}
+
+void
+bin_slabs_full_insert(bool is_auto, bin_t *bin, edata_t *slab) {
+	assert(edata_nfree_get(slab) == 0);
+	/*
+	 *  Tracking extents is required by arena_reset, which is not allowed
+	 *  for auto arenas.  Bypass this step to avoid touching the edata
+	 *  linkage (often results in cache misses) for auto arenas.
+	 */
+	if (is_auto) {
+		return;
+	}
+	edata_list_active_append(&bin->slabs_full, slab);
+}
+
+void
+bin_slabs_full_remove(bool is_auto, bin_t *bin, edata_t *slab) {
+	if (is_auto) {
+		return;
+	}
+	edata_list_active_remove(&bin->slabs_full, slab);
+}
+
+void
+bin_dissociate_slab(bool is_auto, edata_t *slab, bin_t *bin) {
+	/* Dissociate slab from bin. */
+	if (slab == bin->slabcur) {
+		bin->slabcur = NULL;
+	} else {
+		szind_t           binind = edata_szind_get(slab);
+		const bin_info_t *bin_info = &bin_infos[binind];
+
+		/*
+		 * The following block's conditional is necessary because if the
+		 * slab only contains one region, then it never gets inserted
+		 * into the non-full slabs heap.
+		 */
+		if (bin_info->nregs == 1) {
+			bin_slabs_full_remove(is_auto, bin, slab);
+		} else {
+			bin_slabs_nonfull_remove(bin, slab);
+		}
+	}
+}
+
+void
+bin_lower_slab(tsdn_t *tsdn, bool is_auto, edata_t *slab, bin_t *bin) {
+	assert(edata_nfree_get(slab) > 0);
+
+	/*
+	 * Make sure that if bin->slabcur is non-NULL, it refers to the
+	 * oldest/lowest non-full slab.  It is okay to NULL slabcur out rather
+	 * than proactively keeping it pointing at the oldest/lowest non-full
+	 * slab.
+	 */
+	if (bin->slabcur != NULL && edata_snad_comp(bin->slabcur, slab) > 0) {
+		/* Switch slabcur. */
+		if (edata_nfree_get(bin->slabcur) > 0) {
+			bin_slabs_nonfull_insert(bin, bin->slabcur);
+		} else {
+			bin_slabs_full_insert(is_auto, bin, bin->slabcur);
+		}
+		bin->slabcur = slab;
+		if (config_stats) {
+			bin->stats.reslabs++;
+		}
+	} else {
+		bin_slabs_nonfull_insert(bin, slab);
+	}
+}
+
+void
+bin_dalloc_slab_prepare(tsdn_t *tsdn, edata_t *slab, bin_t *bin) {
+	malloc_mutex_assert_owner(tsdn, &bin->lock);
+
+	assert(slab != bin->slabcur);
+	if (config_stats) {
+		bin->stats.curslabs--;
+	}
+}
+
+void
+bin_dalloc_locked_handle_newly_empty(
+    tsdn_t *tsdn, bool is_auto, edata_t *slab, bin_t *bin) {
+	bin_dissociate_slab(is_auto, slab, bin);
+	bin_dalloc_slab_prepare(tsdn, slab, bin);
+}
+
+void
+bin_dalloc_locked_handle_newly_nonempty(
+    tsdn_t *tsdn, bool is_auto, edata_t *slab, bin_t *bin) {
+	bin_slabs_full_remove(is_auto, bin, slab);
+	bin_lower_slab(tsdn, is_auto, slab, bin);
+}
+
+void
+bin_refill_slabcur_with_fresh_slab(tsdn_t *tsdn, bin_t *bin,
+    szind_t binind, edata_t *fresh_slab) {
+	malloc_mutex_assert_owner(tsdn, &bin->lock);
+	/* Only called after slabcur and nonfull both failed. */
+	assert(bin->slabcur == NULL);
+	assert(edata_heap_first(&bin->slabs_nonfull) == NULL);
+	assert(fresh_slab != NULL);
+
+	/* A new slab from arena_slab_alloc() */
+	assert(edata_nfree_get(fresh_slab) == bin_infos[binind].nregs);
+	if (config_stats) {
+		bin->stats.nslabs++;
+		bin->stats.curslabs++;
+	}
+	bin->slabcur = fresh_slab;
+}
+
+void *
+bin_malloc_with_fresh_slab(tsdn_t *tsdn, bin_t *bin,
+    szind_t binind, edata_t *fresh_slab) {
+	malloc_mutex_assert_owner(tsdn, &bin->lock);
+	bin_refill_slabcur_with_fresh_slab(tsdn, bin, binind, fresh_slab);
+
+	return bin_slab_reg_alloc(bin->slabcur, &bin_infos[binind]);
+}
+
+bool
+bin_refill_slabcur_no_fresh_slab(tsdn_t *tsdn, bool is_auto, bin_t *bin) {
+	malloc_mutex_assert_owner(tsdn, &bin->lock);
+	/* Only called after bin_slab_reg_alloc[_batch] failed. */
+	assert(bin->slabcur == NULL || edata_nfree_get(bin->slabcur) == 0);
+
+	if (bin->slabcur != NULL) {
+		bin_slabs_full_insert(is_auto, bin, bin->slabcur);
+	}
+
+	/* Look for a usable slab. */
+	bin->slabcur = bin_slabs_nonfull_tryget(bin);
+	assert(bin->slabcur == NULL || edata_nfree_get(bin->slabcur) > 0);
+
+	return (bin->slabcur == NULL);
+}
+
+void *
+bin_malloc_no_fresh_slab(tsdn_t *tsdn, bool is_auto, bin_t *bin,
+    szind_t binind) {
+	malloc_mutex_assert_owner(tsdn, &bin->lock);
+	if (bin->slabcur == NULL || edata_nfree_get(bin->slabcur) == 0) {
+		if (bin_refill_slabcur_no_fresh_slab(tsdn, is_auto, bin)) {
+			return NULL;
+		}
+	}
+
+	assert(bin->slabcur != NULL && edata_nfree_get(bin->slabcur) > 0);
+	return bin_slab_reg_alloc(bin->slabcur, &bin_infos[binind]);
+}
+
+bin_t *
+bin_choose(tsdn_t *tsdn, arena_t *arena, szind_t binind,
+    unsigned *binshard_p) {
+	unsigned binshard;
+	if (tsdn_null(tsdn) || tsd_arena_get(tsdn_tsd(tsdn)) == NULL) {
+		binshard = 0;
+	} else {
+		binshard = tsd_binshardsp_get(tsdn_tsd(tsdn))->binshard[binind];
+	}
+	assert(binshard < bin_infos[binind].n_shards);
+	if (binshard_p != NULL) {
+		*binshard_p = binshard;
+	}
+	return arena_get_bin(arena, binind, binshard);
+}
diff --git a/src/large.c b/src/large.c
index 7cae61ae..087df99d 100644
--- a/src/large.c
+++ b/src/large.c
@@ -41,7 +41,7 @@ large_palloc(
 		return NULL;
 	}
 
-	/* See comments in arena_bin_slabs_full_insert(). */
+	/* See comments in bin_slabs_full_insert(). */
 	if (!arena_is_auto(arena)) {
 		/* Insert edata into large. */
 		malloc_mutex_lock(tsdn, &arena->large_mtx);
@@ -233,7 +233,7 @@ static void
 large_dalloc_prep_impl(
     tsdn_t *tsdn, arena_t *arena, edata_t *edata, bool locked) {
 	if (!locked) {
-		/* See comments in arena_bin_slabs_full_insert(). */
+		/* See comments in bin_slabs_full_insert(). */
 		if (!arena_is_auto(arena)) {
 			malloc_mutex_lock(tsdn, &arena->large_mtx);
 			edata_list_active_remove(&arena->large, edata);
diff --git a/src/tcache.c b/src/tcache.c
index 74ff4718..172d9320 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -218,7 +218,7 @@ tcache_gc_small_heuristic_addr_get(
     tsd_t *tsd, tcache_slow_t *tcache_slow, szind_t szind) {
 	assert(szind < SC_NBINS);
 	tsdn_t *tsdn = tsd_tsdn(tsd);
-	bin_t  *bin = arena_bin_choose(tsdn, tcache_slow->arena, szind, NULL);
+	bin_t  *bin = bin_choose(tsdn, tcache_slow->arena, szind, NULL);
 	assert(bin != NULL);
 
 	malloc_mutex_lock(tsdn, &bin->lock);
@@ -1275,7 +1275,7 @@ tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
 			continue;
 		}
 		if (i < SC_NBINS) {
-			bin_t *bin = arena_bin_choose(tsdn, arena, i, NULL);
+			bin_t *bin = bin_choose(tsdn, arena, i, NULL);
 			malloc_mutex_lock(tsdn, &bin->lock);
 			bin->stats.nrequests += cache_bin->tstats.nrequests;
 			malloc_mutex_unlock(tsdn, &bin->lock);

From 0ac9380cf1b2fe1b255a96c5d57d6eab33a78330 Mon Sep 17 00:00:00 2001
From: Carl Shapiro <cshapiro@meta.com>
Date: Mon, 23 Feb 2026 23:31:27 -0800
Subject: [PATCH 364/395] Move bin inline functions from arena_inlines_b.h to
 bin_inlines.h

This is a continuation of my previous clean-up change, now focusing on
the inline functions defined in header files.
---
 include/jemalloc/internal/arena_inlines_b.h | 106 +-----------------
 include/jemalloc/internal/bin_inlines.h     | 112 ++++++++++++++++++++
 src/arena.c                                 |  21 ++--
 test/unit/slab.c                            |  10 +-
 4 files changed, 131 insertions(+), 118 deletions(-)
 create mode 100644 include/jemalloc/internal/bin_inlines.h

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index a0caf586..bda256b9 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -4,6 +4,7 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/arena_externs.h"
 #include "jemalloc/internal/arena_structs.h"
+#include "jemalloc/internal/bin_inlines.h"
 #include "jemalloc/internal/div.h"
 #include "jemalloc/internal/emap.h"
 #include "jemalloc/internal/jemalloc_internal_inlines_b.h"
@@ -335,29 +336,6 @@ arena_dalloc_large(tsdn_t *tsdn, void *ptr, tcache_t *tcache, szind_t szind,
 	}
 }
 
-/* Find the region index of a pointer. */
-JEMALLOC_ALWAYS_INLINE size_t
-arena_slab_regind_impl(
-    div_info_t *div_info, szind_t binind, edata_t *slab, const void *ptr) {
-	size_t diff, regind;
-
-	/* Freeing a pointer outside the slab can cause assertion failure. */
-	assert((uintptr_t)ptr >= (uintptr_t)edata_addr_get(slab));
-	assert((uintptr_t)ptr < (uintptr_t)edata_past_get(slab));
-	/* Freeing an interior pointer can cause assertion failure. */
-	assert(((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab))
-	        % (uintptr_t)bin_infos[binind].reg_size
-	    == 0);
-
-	diff = (size_t)((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab));
-
-	/* Avoid doing division with a variable divisor. */
-	regind = div_compute(div_info, diff);
-	assert(regind < bin_infos[binind].nregs);
-	return regind;
-}
-
-/* Checks whether ptr is currently active in the arena. */
 JEMALLOC_ALWAYS_INLINE bool
 arena_tcache_dalloc_small_safety_check(tsdn_t *tsdn, void *ptr) {
 	if (!config_debug) {
@@ -367,10 +345,10 @@ arena_tcache_dalloc_small_safety_check(tsdn_t *tsdn, void *ptr) {
 	szind_t    binind = edata_szind_get(edata);
 	div_info_t div_info = arena_binind_div_info[binind];
 	/*
-	 * Calls the internal function arena_slab_regind_impl because the
+	 * Calls the internal function bin_slab_regind_impl because the
 	 * safety check does not require a lock.
 	 */
-	size_t regind = arena_slab_regind_impl(&div_info, binind, edata, ptr);
+	size_t regind = bin_slab_regind_impl(&div_info, binind, edata, ptr);
 	slab_data_t      *slab_data = edata_slab_data_get(edata);
 	const bin_info_t *bin_info = &bin_infos[binind];
 	assert(edata_nfree_get(edata) < bin_info->nregs);
@@ -551,84 +529,6 @@ arena_cache_oblivious_randomize(
 	}
 }
 
-/*
- * The dalloc bin info contains just the information that the common paths need
- * during tcache flushes.  By force-inlining these paths, and using local copies
- * of data (so that the compiler knows it's constant), we avoid a whole bunch of
- * redundant loads and stores by leaving this information in registers.
- */
-typedef struct arena_dalloc_bin_locked_info_s arena_dalloc_bin_locked_info_t;
-struct arena_dalloc_bin_locked_info_s {
-	div_info_t div_info;
-	uint32_t   nregs;
-	uint64_t   ndalloc;
-};
-
-JEMALLOC_ALWAYS_INLINE size_t
-arena_slab_regind(arena_dalloc_bin_locked_info_t *info, szind_t binind,
-    edata_t *slab, const void *ptr) {
-	size_t regind = arena_slab_regind_impl(
-	    &info->div_info, binind, slab, ptr);
-	return regind;
-}
-
-JEMALLOC_ALWAYS_INLINE void
-arena_dalloc_bin_locked_begin(
-    arena_dalloc_bin_locked_info_t *info, szind_t binind) {
-	info->div_info = arena_binind_div_info[binind];
-	info->nregs = bin_infos[binind].nregs;
-	info->ndalloc = 0;
-}
-
-/*
- * Does the deallocation work associated with freeing a single pointer (a
- * "step") in between a arena_dalloc_bin_locked begin and end call.
- *
- * Returns true if arena_slab_dalloc must be called on slab.  Doesn't do
- * stats updates, which happen during finish (this lets running counts get left
- * in a register).
- */
-JEMALLOC_ALWAYS_INLINE bool
-arena_dalloc_bin_locked_step(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
-    arena_dalloc_bin_locked_info_t *info, szind_t binind, edata_t *slab,
-    void *ptr) {
-	const bin_info_t *bin_info = &bin_infos[binind];
-	size_t            regind = arena_slab_regind(info, binind, slab, ptr);
-	slab_data_t      *slab_data = edata_slab_data_get(slab);
-
-	assert(edata_nfree_get(slab) < bin_info->nregs);
-	/* Freeing an unallocated pointer can cause assertion failure. */
-	assert(bitmap_get(slab_data->bitmap, &bin_info->bitmap_info, regind));
-
-	bitmap_unset(slab_data->bitmap, &bin_info->bitmap_info, regind);
-	edata_nfree_inc(slab);
-
-	if (config_stats) {
-		info->ndalloc++;
-	}
-
-	unsigned nfree = edata_nfree_get(slab);
-	if (nfree == bin_info->nregs) {
-		bin_dalloc_locked_handle_newly_empty(
-		    tsdn, arena_is_auto(arena), slab, bin);
-		return true;
-	} else if (nfree == 1 && slab != bin->slabcur) {
-		bin_dalloc_locked_handle_newly_nonempty(
-		    tsdn, arena_is_auto(arena), slab, bin);
-	}
-	return false;
-}
-
-JEMALLOC_ALWAYS_INLINE void
-arena_dalloc_bin_locked_finish(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
-    arena_dalloc_bin_locked_info_t *info) {
-	if (config_stats) {
-		bin->stats.ndalloc += info->ndalloc;
-		assert(bin->stats.curregs >= (size_t)info->ndalloc);
-		bin->stats.curregs -= (size_t)info->ndalloc;
-	}
-}
-
 static inline bin_t *
 arena_get_bin(arena_t *arena, szind_t binind, unsigned binshard) {
 	bin_t *shard0 = (bin_t *)((byte_t *)arena + arena_bin_offsets[binind]);
diff --git a/include/jemalloc/internal/bin_inlines.h b/include/jemalloc/internal/bin_inlines.h
new file mode 100644
index 00000000..f4291169
--- /dev/null
+++ b/include/jemalloc/internal/bin_inlines.h
@@ -0,0 +1,112 @@
+#ifndef JEMALLOC_INTERNAL_BIN_INLINES_H
+#define JEMALLOC_INTERNAL_BIN_INLINES_H
+
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/bin.h"
+#include "jemalloc/internal/bin_info.h"
+#include "jemalloc/internal/bitmap.h"
+#include "jemalloc/internal/div.h"
+#include "jemalloc/internal/edata.h"
+#include "jemalloc/internal/sc.h"
+
+/*
+ * The dalloc bin info contains just the information that the common paths need
+ * during tcache flushes.  By force-inlining these paths, and using local copies
+ * of data (so that the compiler knows it's constant), we avoid a whole bunch of
+ * redundant loads and stores by leaving this information in registers.
+ */
+typedef struct bin_dalloc_locked_info_s bin_dalloc_locked_info_t;
+struct bin_dalloc_locked_info_s {
+	div_info_t div_info;
+	uint32_t   nregs;
+	uint64_t   ndalloc;
+};
+
+/* Find the region index of a pointer within a slab. */
+JEMALLOC_ALWAYS_INLINE size_t
+bin_slab_regind_impl(
+    div_info_t *div_info, szind_t binind, edata_t *slab, const void *ptr) {
+	size_t diff, regind;
+
+	/* Freeing a pointer outside the slab can cause assertion failure. */
+	assert((uintptr_t)ptr >= (uintptr_t)edata_addr_get(slab));
+	assert((uintptr_t)ptr < (uintptr_t)edata_past_get(slab));
+	/* Freeing an interior pointer can cause assertion failure. */
+	assert(((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab))
+	        % (uintptr_t)bin_infos[binind].reg_size
+	    == 0);
+
+	diff = (size_t)((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab));
+
+	/* Avoid doing division with a variable divisor. */
+	regind = div_compute(div_info, diff);
+	assert(regind < bin_infos[binind].nregs);
+	return regind;
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+bin_slab_regind(bin_dalloc_locked_info_t *info, szind_t binind,
+    edata_t *slab, const void *ptr) {
+	size_t regind = bin_slab_regind_impl(
+	    &info->div_info, binind, slab, ptr);
+	return regind;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+bin_dalloc_locked_begin(
+    bin_dalloc_locked_info_t *info, szind_t binind) {
+	info->div_info = arena_binind_div_info[binind];
+	info->nregs = bin_infos[binind].nregs;
+	info->ndalloc = 0;
+}
+
+/*
+ * Does the deallocation work associated with freeing a single pointer (a
+ * "step") in between a bin_dalloc_locked begin and end call.
+ *
+ * Returns true if arena_slab_dalloc must be called on slab.  Doesn't do
+ * stats updates, which happen during finish (this lets running counts get left
+ * in a register).
+ */
+JEMALLOC_ALWAYS_INLINE bool
+bin_dalloc_locked_step(tsdn_t *tsdn, bool is_auto, bin_t *bin,
+    bin_dalloc_locked_info_t *info, szind_t binind, edata_t *slab,
+    void *ptr) {
+	const bin_info_t *bin_info = &bin_infos[binind];
+	size_t            regind = bin_slab_regind(info, binind, slab, ptr);
+	slab_data_t      *slab_data = edata_slab_data_get(slab);
+
+	assert(edata_nfree_get(slab) < bin_info->nregs);
+	/* Freeing an unallocated pointer can cause assertion failure. */
+	assert(bitmap_get(slab_data->bitmap, &bin_info->bitmap_info, regind));
+
+	bitmap_unset(slab_data->bitmap, &bin_info->bitmap_info, regind);
+	edata_nfree_inc(slab);
+
+	if (config_stats) {
+		info->ndalloc++;
+	}
+
+	unsigned nfree = edata_nfree_get(slab);
+	if (nfree == bin_info->nregs) {
+		bin_dalloc_locked_handle_newly_empty(
+		    tsdn, is_auto, slab, bin);
+		return true;
+	} else if (nfree == 1 && slab != bin->slabcur) {
+		bin_dalloc_locked_handle_newly_nonempty(
+		    tsdn, is_auto, slab, bin);
+	}
+	return false;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+bin_dalloc_locked_finish(tsdn_t *tsdn, bin_t *bin,
+    bin_dalloc_locked_info_t *info) {
+	if (config_stats) {
+		bin->stats.ndalloc += info->ndalloc;
+		assert(bin->stats.curregs >= (size_t)info->ndalloc);
+		bin->stats.curregs -= (size_t)info->ndalloc;
+	}
+}
+
+#endif /* JEMALLOC_INTERNAL_BIN_INLINES_H */
diff --git a/src/arena.c b/src/arena.c
index 338cc330..d7c8cd1f 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1171,11 +1171,11 @@ arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, edata_t *edata, void *ptr) {
 	bin_t   *bin = arena_get_bin(arena, binind, binshard);
 
 	malloc_mutex_lock(tsdn, &bin->lock);
-	arena_dalloc_bin_locked_info_t info;
-	arena_dalloc_bin_locked_begin(&info, binind);
-	bool ret = arena_dalloc_bin_locked_step(
-	    tsdn, arena, bin, &info, binind, edata, ptr);
-	arena_dalloc_bin_locked_finish(tsdn, arena, bin, &info);
+	bin_dalloc_locked_info_t info;
+	bin_dalloc_locked_begin(&info, binind);
+	bool ret = bin_dalloc_locked_step(
+	    tsdn, arena_is_auto(arena), bin, &info, binind, edata, ptr);
+	bin_dalloc_locked_finish(tsdn, bin, &info);
 	malloc_mutex_unlock(tsdn, &bin->lock);
 
 	if (ret) {
@@ -1330,12 +1330,13 @@ arena_ptr_array_flush_impl_small(tsdn_t *tsdn, szind_t binind,
 
 		/* Next flush objects. */
 		/* Init only to avoid used-uninitialized warning. */
-		arena_dalloc_bin_locked_info_t dalloc_bin_info = {0};
-		arena_dalloc_bin_locked_begin(&dalloc_bin_info, binind);
+		bin_dalloc_locked_info_t dalloc_bin_info = {0};
+		bin_dalloc_locked_begin(&dalloc_bin_info, binind);
 		for (unsigned i = prev_flush_start; i < flush_start; i++) {
 			void    *ptr = arr->ptr[i];
 			edata_t *edata = item_edata[i].edata;
-			if (arena_dalloc_bin_locked_step(tsdn, cur_arena,
+			if (bin_dalloc_locked_step(tsdn,
+			        arena_is_auto(cur_arena),
 			        cur_bin, &dalloc_bin_info, binind, edata,
 			        ptr)) {
 				dalloc_slabs[dalloc_count] = edata;
@@ -1343,8 +1344,8 @@ arena_ptr_array_flush_impl_small(tsdn_t *tsdn, szind_t binind,
 			}
 		}
 
-		arena_dalloc_bin_locked_finish(
-		    tsdn, cur_arena, cur_bin, &dalloc_bin_info);
+		bin_dalloc_locked_finish(
+		    tsdn, cur_bin, &dalloc_bin_info);
 		malloc_mutex_unlock(tsdn, &cur_bin->lock);
 
 		arena_decay_ticks(
diff --git a/test/unit/slab.c b/test/unit/slab.c
index 5c48e762..d98663e8 100644
--- a/test/unit/slab.c
+++ b/test/unit/slab.c
@@ -2,7 +2,7 @@
 
 #define INVALID_ARENA_IND ((1U << MALLOCX_ARENA_BITS) - 1)
 
-TEST_BEGIN(test_arena_slab_regind) {
+TEST_BEGIN(test_bin_slab_regind) {
 	szind_t binind;
 
 	for (binind = 0; binind < SC_NBINS; binind++) {
@@ -15,13 +15,13 @@ TEST_BEGIN(test_arena_slab_regind) {
 		    false, true, EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
 		expect_ptr_not_null(
 		    edata_addr_get(&slab), "Unexpected malloc() failure");
-		arena_dalloc_bin_locked_info_t dalloc_info;
-		arena_dalloc_bin_locked_begin(&dalloc_info, binind);
+		bin_dalloc_locked_info_t dalloc_info;
+		bin_dalloc_locked_begin(&dalloc_info, binind);
 		for (regind = 0; regind < bin_info->nregs; regind++) {
 			void *reg = (void *)((uintptr_t)edata_addr_get(&slab)
 			    + (bin_info->reg_size * regind));
 			expect_zu_eq(
-			    arena_slab_regind(&dalloc_info, binind, &slab, reg),
+			    bin_slab_regind(&dalloc_info, binind, &slab, reg),
 			    regind,
 			    "Incorrect region index computed for size %zu",
 			    bin_info->reg_size);
@@ -33,5 +33,5 @@ TEST_END
 
 int
 main(void) {
-	return test(test_arena_slab_regind);
+	return test(test_bin_slab_regind);
 }

From a75655badf31a2c6187bf069f8103c626542941f Mon Sep 17 00:00:00 2001
From: Carl Shapiro <cshapiro@meta.com>
Date: Fri, 27 Feb 2026 12:02:07 -0800
Subject: [PATCH 365/395] Add unit test coverage for bin interfaces

---
 Makefile.in     |   1 +
 test/unit/bin.c | 825 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 826 insertions(+)
 create mode 100644 test/unit/bin.c

diff --git a/Makefile.in b/Makefile.in
index f916ad71..463693df 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -208,6 +208,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/background_thread_init.c \
 	$(srcroot)test/unit/base.c \
 	$(srcroot)test/unit/batch_alloc.c \
+	$(srcroot)test/unit/bin.c \
 	$(srcroot)test/unit/binshard.c \
 	$(srcroot)test/unit/bitmap.c \
 	$(srcroot)test/unit/bit_util.c \
diff --git a/test/unit/bin.c b/test/unit/bin.c
new file mode 100644
index 00000000..002bbf11
--- /dev/null
+++ b/test/unit/bin.c
@@ -0,0 +1,825 @@
+#include "test/jemalloc_test.h"
+
+#define INVALID_ARENA_IND ((1U << MALLOCX_ARENA_BITS) - 1)
+
+/* Create a page-aligned mock slab with all regions free. */
+static void
+create_mock_slab(edata_t *slab, szind_t binind, uint64_t sn) {
+	const bin_info_t *bin_info = &bin_infos[binind];
+	void *addr;
+	slab_data_t *slab_data;
+
+	addr = mallocx(bin_info->slab_size, MALLOCX_LG_ALIGN(LG_PAGE));
+	assert_ptr_not_null(addr, "Unexpected mallocx failure");
+
+	memset(slab, 0, sizeof(edata_t));
+	edata_init(slab, INVALID_ARENA_IND, addr, bin_info->slab_size,
+	    true, binind, sn, extent_state_active, false, true,
+	    EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
+	edata_nfree_set(slab, bin_info->nregs);
+
+	/* Initialize bitmap to all regions free. */
+	slab_data = edata_slab_data_get(slab);
+	bitmap_init(slab_data->bitmap, &bin_info->bitmap_info, false);
+}
+
+/*
+ * Test that bin_init produces a valid empty bin.
+ */
+TEST_BEGIN(test_bin_init) {
+	bin_t bin;
+	bool err;
+
+	err = bin_init(&bin);
+	expect_false(err, "bin_init should succeed");
+	expect_ptr_null(bin.slabcur, "New bin should have NULL slabcur");
+	expect_ptr_null(edata_heap_first(&bin.slabs_nonfull),
+	    "New bin should have empty nonfull heap");
+	expect_true(edata_list_active_empty(&bin.slabs_full),
+	    "New bin should have empty full list");
+	if (config_stats) {
+		expect_u64_eq(bin.stats.nmalloc, 0,
+		    "New bin should have zero nmalloc");
+		expect_u64_eq(bin.stats.ndalloc, 0,
+		    "New bin should have zero ndalloc");
+		expect_zu_eq(bin.stats.curregs, 0,
+		    "New bin should have zero curregs");
+		expect_zu_eq(bin.stats.curslabs, 0,
+		    "New bin should have zero curslabs");
+	}
+}
+TEST_END
+
+/*
+ * Test single-region allocation from a slab.
+ */
+TEST_BEGIN(test_bin_slab_reg_alloc) {
+	szind_t binind = 0;
+	const bin_info_t *bin_info = &bin_infos[binind];
+	edata_t slab;
+	unsigned nregs;
+	unsigned i;
+
+	create_mock_slab(&slab, binind, 0);
+	nregs = bin_info->nregs;
+
+	for (i = 0; i < nregs; i++) {
+		void *reg;
+
+		expect_u_gt(edata_nfree_get(&slab), 0,
+		    "Slab should have free regions");
+		reg = bin_slab_reg_alloc(&slab, bin_info);
+		expect_ptr_not_null(reg,
+		    "bin_slab_reg_alloc should return non-NULL");
+		/* Verify the pointer is within the slab. */
+		expect_true(
+		    (uintptr_t)reg >= (uintptr_t)edata_addr_get(&slab) &&
+		    (uintptr_t)reg < (uintptr_t)edata_addr_get(&slab)
+		    + bin_info->slab_size,
+		    "Allocated region should be within slab bounds");
+	}
+	expect_u_eq(edata_nfree_get(&slab), 0,
+	    "Slab should be full after allocating all regions");
+	free(edata_addr_get(&slab));
+}
+TEST_END
+
+/*
+ * Test batch allocation from a slab.
+ */
+TEST_BEGIN(test_bin_slab_reg_alloc_batch) {
+	szind_t binind = 0;
+	const bin_info_t *bin_info = &bin_infos[binind];
+	edata_t slab;
+	unsigned nregs;
+	void **ptrs;
+	unsigned i;
+
+	create_mock_slab(&slab, binind, 0);
+	nregs = bin_info->nregs;
+	ptrs = mallocx(nregs * sizeof(void *), 0);
+	assert_ptr_not_null(ptrs, "Unexpected mallocx failure");
+
+	bin_slab_reg_alloc_batch(&slab, bin_info, nregs, ptrs);
+	expect_u_eq(edata_nfree_get(&slab), 0,
+	    "Slab should be full after batch alloc of all regions");
+
+	/* Verify all pointers are within the slab and distinct. */
+	for (i = 0; i < nregs; i++) {
+		unsigned j;
+
+		expect_ptr_not_null(ptrs[i], "Batch pointer should be non-NULL");
+		expect_true(
+		    (uintptr_t)ptrs[i] >= (uintptr_t)edata_addr_get(&slab) &&
+		    (uintptr_t)ptrs[i] < (uintptr_t)edata_addr_get(&slab)
+		    + bin_info->slab_size,
+		    "Batch pointer should be within slab bounds");
+		for (j = 0; j < i; j++) {
+			expect_ptr_ne(ptrs[i], ptrs[j],
+			    "Batch pointers should be distinct");
+		}
+	}
+	free(ptrs);
+	free(edata_addr_get(&slab));
+}
+TEST_END
+
+/*
+ * Test partial batch allocation from a slab.
+ */
+TEST_BEGIN(test_bin_slab_reg_alloc_batch_partial) {
+	szind_t binind = 0;
+	const bin_info_t *bin_info = &bin_infos[binind];
+	edata_t slab;
+	unsigned nregs;
+	unsigned half;
+	void **ptrs;
+
+	create_mock_slab(&slab, binind, 0);
+	nregs = bin_info->nregs;
+
+	/* Only allocate half. */
+	half = nregs / 2;
+	if (half == 0) {
+		half = 1;
+	}
+	ptrs = mallocx(half * sizeof(void *), 0);
+	assert_ptr_not_null(ptrs, "Unexpected mallocx failure");
+
+	bin_slab_reg_alloc_batch(&slab, bin_info, half, ptrs);
+	expect_u_eq(edata_nfree_get(&slab), nregs - half,
+	    "Slab nfree should reflect partial batch alloc");
+
+	free(ptrs);
+	free(edata_addr_get(&slab));
+}
+TEST_END
+
+/*
+ * Test nonfull slab list insert, remove, and tryget.
+ */
+TEST_BEGIN(test_bin_slabs_nonfull) {
+	bin_t bin;
+	szind_t binind = 0;
+	edata_t slab1, slab2;
+	edata_t *got;
+	edata_t *remaining;
+
+	bin_init(&bin);
+
+	/* Create two non-full slabs with different serial numbers. */
+	create_mock_slab(&slab1, binind, 1);
+	create_mock_slab(&slab2, binind, 2);
+
+	/* Insert both into the nonfull heap. */
+	bin_slabs_nonfull_insert(&bin, &slab1);
+	expect_ptr_not_null(edata_heap_first(&bin.slabs_nonfull),
+	    "Nonfull heap should be non-empty after insert");
+
+	bin_slabs_nonfull_insert(&bin, &slab2);
+
+	/* tryget should return a slab. */
+	got = bin_slabs_nonfull_tryget(&bin);
+	expect_ptr_not_null(got, "tryget should return a slab");
+
+	/* Remove the remaining one explicitly. */
+	remaining = edata_heap_first(&bin.slabs_nonfull);
+	expect_ptr_not_null(remaining, "One slab should still remain");
+	bin_slabs_nonfull_remove(&bin, remaining);
+	expect_ptr_null(edata_heap_first(&bin.slabs_nonfull),
+	    "Nonfull heap should be empty after removing both slabs");
+
+	free(edata_addr_get(&slab1));
+	free(edata_addr_get(&slab2));
+}
+TEST_END
+
+/*
+ * Test full slab list insert and remove (non-auto arena case).
+ */
+TEST_BEGIN(test_bin_slabs_full) {
+	bin_t bin;
+	szind_t binind = 0;
+	const bin_info_t *bin_info = &bin_infos[binind];
+	edata_t slab;
+	unsigned i;
+
+	bin_init(&bin);
+	create_mock_slab(&slab, binind, 0);
+
+	/* Consume all regions so the slab appears full. */
+	for (i = 0; i < bin_info->nregs; i++) {
+		bin_slab_reg_alloc(&slab, bin_info);
+	}
+	expect_u_eq(edata_nfree_get(&slab), 0, "Slab should be full");
+
+	/* Insert into full list (is_auto=false to actually track). */
+	bin_slabs_full_insert(false, &bin, &slab);
+	expect_false(edata_list_active_empty(&bin.slabs_full),
+	    "Full list should be non-empty after insert");
+
+	/* Remove from full list. */
+	bin_slabs_full_remove(false, &bin, &slab);
+	expect_true(edata_list_active_empty(&bin.slabs_full),
+	    "Full list should be empty after remove");
+
+	free(edata_addr_get(&slab));
+}
+TEST_END
+
+/*
+ * Test that full slab insert/remove is a no-op for auto arenas.
+ */
+TEST_BEGIN(test_bin_slabs_full_auto) {
+	bin_t bin;
+	szind_t binind = 0;
+	const bin_info_t *bin_info = &bin_infos[binind];
+	edata_t slab;
+	unsigned i;
+
+	bin_init(&bin);
+	create_mock_slab(&slab, binind, 0);
+	for (i = 0; i < bin_info->nregs; i++) {
+		bin_slab_reg_alloc(&slab, bin_info);
+	}
+
+	/* is_auto=true: insert should be a no-op. */
+	bin_slabs_full_insert(true, &bin, &slab);
+	expect_true(edata_list_active_empty(&bin.slabs_full),
+	    "Full list should remain empty for auto arenas");
+
+	/* Remove should also be a no-op without crashing. */
+	bin_slabs_full_remove(true, &bin, &slab);
+
+	free(edata_addr_get(&slab));
+}
+TEST_END
+
+/*
+ * Test dissociate_slab when the slab is slabcur.
+ */
+TEST_BEGIN(test_bin_dissociate_slabcur) {
+	bin_t bin;
+	szind_t binind = 0;
+	edata_t slab;
+
+	bin_init(&bin);
+	create_mock_slab(&slab, binind, 0);
+
+	bin.slabcur = &slab;
+	bin_dissociate_slab(true, &slab, &bin);
+	expect_ptr_null(bin.slabcur,
+	    "Dissociating slabcur should NULL it out");
+
+	free(edata_addr_get(&slab));
+}
+TEST_END
+
+/*
+ * Test dissociate_slab when the slab is in the nonfull heap.
+ */
+TEST_BEGIN(test_bin_dissociate_nonfull) {
+	bin_t bin;
+	szind_t binind = 0;
+	const bin_info_t *bin_info = &bin_infos[binind];
+	edata_t slab;
+
+	bin_init(&bin);
+	create_mock_slab(&slab, binind, 0);
+
+	/*
+	 * Only dissociate from nonfull when nregs > 1.  For nregs == 1,
+	 * the slab goes directly to the full list, never nonfull.
+	 */
+	test_skip_if(bin_info->nregs == 1);
+
+	bin_slabs_nonfull_insert(&bin, &slab);
+	bin_dissociate_slab(true, &slab, &bin);
+	expect_ptr_null(edata_heap_first(&bin.slabs_nonfull),
+	    "Nonfull heap should be empty after dissociating the slab");
+
+	free(edata_addr_get(&slab));
+}
+TEST_END
+
+/*
+ * Test refill slabcur with a fresh slab.
+ */
+TEST_BEGIN(test_bin_refill_slabcur_with_fresh_slab) {
+	tsdn_t *tsdn = tsdn_fetch();
+	bin_t bin;
+	szind_t binind = 0;
+	const bin_info_t *bin_info = &bin_infos[binind];
+	edata_t fresh;
+
+	bin_init(&bin);
+	create_mock_slab(&fresh, binind, 0);
+
+	malloc_mutex_lock(tsdn, &bin.lock);
+	bin_refill_slabcur_with_fresh_slab(tsdn, &bin, binind, &fresh);
+	expect_ptr_eq(bin.slabcur, &fresh,
+	    "Fresh slab should become slabcur");
+	if (config_stats) {
+		expect_u64_eq(bin.stats.nslabs, 1,
+		    "nslabs should be 1 after installing fresh slab");
+		expect_zu_eq(bin.stats.curslabs, 1,
+		    "curslabs should be 1 after installing fresh slab");
+	}
+	expect_u_eq(edata_nfree_get(bin.slabcur), bin_info->nregs,
+	    "Fresh slab should have all regions free");
+	malloc_mutex_unlock(tsdn, &bin.lock);
+
+	free(edata_addr_get(&fresh));
+}
+TEST_END
+
+/*
+ * Test refill slabcur without a fresh slab (from the nonfull heap).
+ */
+TEST_BEGIN(test_bin_refill_slabcur_no_fresh_slab) {
+	tsdn_t *tsdn = tsdn_fetch();
+	bin_t bin;
+	szind_t binind = 0;
+	edata_t slab;
+	bool empty;
+
+	bin_init(&bin);
+	create_mock_slab(&slab, binind, 0);
+
+	malloc_mutex_lock(tsdn, &bin.lock);
+
+	/* With no slabcur and empty nonfull heap, refill should fail. */
+	empty = bin_refill_slabcur_no_fresh_slab(tsdn, true, &bin);
+	expect_true(empty,
+	    "Refill should fail when nonfull heap is empty");
+	expect_ptr_null(bin.slabcur, "slabcur should remain NULL");
+
+	/* Insert a slab into nonfull, then refill should succeed. */
+	bin_slabs_nonfull_insert(&bin, &slab);
+	empty = bin_refill_slabcur_no_fresh_slab(tsdn, true, &bin);
+	expect_false(empty,
+	    "Refill should succeed when nonfull heap has a slab");
+	expect_ptr_eq(bin.slabcur, &slab,
+	    "slabcur should be the slab from nonfull heap");
+
+	malloc_mutex_unlock(tsdn, &bin.lock);
+	free(edata_addr_get(&slab));
+}
+TEST_END
+
+/*
+ * Test that refill moves a full slabcur into the full list.
+ */
+TEST_BEGIN(test_bin_refill_slabcur_full_to_list) {
+	tsdn_t *tsdn = tsdn_fetch();
+	bin_t bin;
+	szind_t binind = 0;
+	const bin_info_t *bin_info = &bin_infos[binind];
+	edata_t full_slab, nonfull_slab;
+	unsigned i;
+	bool empty;
+
+	bin_init(&bin);
+	create_mock_slab(&full_slab, binind, 0);
+	create_mock_slab(&nonfull_slab, binind, 1);
+
+	/* Make full_slab actually full. */
+	for (i = 0; i < bin_info->nregs; i++) {
+		bin_slab_reg_alloc(&full_slab, bin_info);
+	}
+
+	malloc_mutex_lock(tsdn, &bin.lock);
+	bin.slabcur = &full_slab;
+	bin_slabs_nonfull_insert(&bin, &nonfull_slab);
+
+	/* Refill should move the full slabcur to full list and pick nonfull. */
+	empty = bin_refill_slabcur_no_fresh_slab(tsdn, false, &bin);
+	expect_false(empty, "Refill should succeed");
+	expect_ptr_eq(bin.slabcur, &nonfull_slab,
+	    "slabcur should now be the nonfull slab");
+	expect_false(edata_list_active_empty(&bin.slabs_full),
+	    "Old full slabcur should be in the full list");
+	malloc_mutex_unlock(tsdn, &bin.lock);
+
+	free(edata_addr_get(&full_slab));
+	free(edata_addr_get(&nonfull_slab));
+}
+TEST_END
+
+/*
+ * Test malloc with a fresh slab.
+ */
+TEST_BEGIN(test_bin_malloc_with_fresh_slab) {
+	tsdn_t *tsdn = tsdn_fetch();
+	bin_t bin;
+	szind_t binind = 0;
+	const bin_info_t *bin_info = &bin_infos[binind];
+	edata_t fresh;
+	void *ptr;
+
+	bin_init(&bin);
+	create_mock_slab(&fresh, binind, 0);
+
+	malloc_mutex_lock(tsdn, &bin.lock);
+	ptr = bin_malloc_with_fresh_slab(tsdn, &bin, binind, &fresh);
+	expect_ptr_not_null(ptr, "Should allocate from fresh slab");
+	expect_ptr_eq(bin.slabcur, &fresh,
+	    "Fresh slab should be installed as slabcur");
+	expect_u_eq(edata_nfree_get(&fresh), bin_info->nregs - 1,
+	    "One region should be consumed from fresh slab");
+	if (config_stats) {
+		expect_u64_eq(bin.stats.nslabs, 1, "nslabs should be 1");
+		expect_zu_eq(bin.stats.curslabs, 1, "curslabs should be 1");
+	}
+	malloc_mutex_unlock(tsdn, &bin.lock);
+
+	free(edata_addr_get(&fresh));
+}
+TEST_END
+
+/*
+ * Test malloc without a fresh slab (from existing slabcur).
+ */
+TEST_BEGIN(test_bin_malloc_no_fresh_slab) {
+	tsdn_t *tsdn = tsdn_fetch();
+	bin_t bin;
+	szind_t binind = 0;
+	const bin_info_t *bin_info = &bin_infos[binind];
+	edata_t slab;
+	void *ptr;
+
+	bin_init(&bin);
+	create_mock_slab(&slab, binind, 0);
+
+	malloc_mutex_lock(tsdn, &bin.lock);
+
+	/* With no slabcur and empty nonfull, should return NULL. */
+	ptr = bin_malloc_no_fresh_slab(tsdn, true, &bin, binind);
+	expect_ptr_null(ptr,
+	    "Should return NULL when no slabs available");
+
+	/* Set up a slabcur; malloc should succeed. */
+	bin.slabcur = &slab;
+	ptr = bin_malloc_no_fresh_slab(tsdn, true, &bin, binind);
+	expect_ptr_not_null(ptr,
+	    "Should allocate from slabcur");
+	expect_u_eq(edata_nfree_get(&slab), bin_info->nregs - 1,
+	    "One region should be consumed");
+	malloc_mutex_unlock(tsdn, &bin.lock);
+
+	free(edata_addr_get(&slab));
+}
+TEST_END
+
+/*
+ * Test the bin_dalloc_locked begin/step/finish sequence.
+ */
+TEST_BEGIN(test_bin_dalloc_locked) {
+	tsdn_t *tsdn = tsdn_fetch();
+	bin_t bin;
+	szind_t binind = 0;
+	const bin_info_t *bin_info = &bin_infos[binind];
+	edata_t slab;
+	unsigned nregs;
+	void **ptrs;
+	unsigned i;
+	bin_dalloc_locked_info_t info;
+	bool slab_empty;
+	bool found_empty;
+
+	bin_init(&bin);
+	create_mock_slab(&slab, binind, 0);
+
+	/* Allocate all regions from the slab. */
+	nregs = bin_info->nregs;
+	ptrs = mallocx(nregs * sizeof(void *), 0);
+	assert_ptr_not_null(ptrs, "Unexpected mallocx failure");
+	for (i = 0; i < nregs; i++) {
+		ptrs[i] = bin_slab_reg_alloc(&slab, bin_info);
+		assert_ptr_not_null(ptrs[i], "Alloc should succeed");
+	}
+	expect_u_eq(edata_nfree_get(&slab), 0, "Slab should be full");
+
+	/* Set this slab as slabcur so dalloc steps work correctly. */
+	bin.slabcur = &slab;
+	if (config_stats) {
+		bin.stats.nmalloc = nregs;
+		bin.stats.curregs = nregs;
+		bin.stats.nslabs = 1;
+		bin.stats.curslabs = 1;
+	}
+
+	malloc_mutex_lock(tsdn, &bin.lock);
+
+	/* Free one region and verify step returns false (not yet empty). */
+	bin_dalloc_locked_begin(&info, binind);
+	slab_empty = bin_dalloc_locked_step(
+	    tsdn, true, &bin, &info, binind, &slab, ptrs[0]);
+	if (nregs > 1) {
+		expect_false(slab_empty,
+		    "Slab should not be empty after freeing one region");
+	}
+	bin_dalloc_locked_finish(tsdn, &bin, &info);
+	if (config_stats) {
+		expect_zu_eq(bin.stats.curregs, nregs - 1,
+		    "curregs should decrement by 1");
+	}
+
+	/* Free all remaining regions; the last one should empty the slab. */
+	bin_dalloc_locked_begin(&info, binind);
+	found_empty = false;
+	for (i = 1; i < nregs; i++) {
+		slab_empty = bin_dalloc_locked_step(
+		    tsdn, true, &bin, &info, binind, &slab, ptrs[i]);
+		if (slab_empty) {
+			found_empty = true;
+		}
+	}
+	bin_dalloc_locked_finish(tsdn, &bin, &info);
+	expect_true(found_empty,
+	    "Freeing all regions should produce an empty slab");
+	expect_u_eq(edata_nfree_get(&slab), nregs,
+	    "All regions should be free");
+	if (config_stats) {
+		expect_zu_eq(bin.stats.curregs, 0,
+		    "curregs should be 0 after freeing all");
+	}
+
+	malloc_mutex_unlock(tsdn, &bin.lock);
+	free(ptrs);
+	free(edata_addr_get(&slab));
+}
+TEST_END
+
+/*
+ * Test that bin_lower_slab replaces slabcur when the new slab is older.
+ */
+TEST_BEGIN(test_bin_lower_slab_replaces_slabcur) {
+	tsdn_t *tsdn = tsdn_fetch();
+	bin_t bin;
+	szind_t binind = 0;
+	edata_t slab_old, slab_new;
+
+	bin_init(&bin);
+
+	/* slab_old has sn=0 (older), slab_new has sn=1 (newer). */
+	create_mock_slab(&slab_old, binind, 0);
+	create_mock_slab(&slab_new, binind, 1);
+
+	/* Make slab_new the slabcur. */
+	bin.slabcur = &slab_new;
+
+	/*
+	 * bin_lower_slab with the older slab should replace slabcur and move
+	 * slab_new into either nonfull or full.
+	 */
+	malloc_mutex_lock(tsdn, &bin.lock);
+	bin_lower_slab(tsdn, true, &slab_old, &bin);
+	expect_ptr_eq(bin.slabcur, &slab_old,
+	    "Older slab should replace slabcur");
+	malloc_mutex_unlock(tsdn, &bin.lock);
+
+	free(edata_addr_get(&slab_old));
+	free(edata_addr_get(&slab_new));
+}
+TEST_END
+
+/*
+ * Test that bin_lower_slab inserts into the nonfull heap when the new slab
+ * is newer than slabcur.
+ */
+TEST_BEGIN(test_bin_lower_slab_inserts_nonfull) {
+	tsdn_t *tsdn = tsdn_fetch();
+	bin_t bin;
+	szind_t binind = 0;
+	edata_t slab_old, slab_new;
+
+	bin_init(&bin);
+	create_mock_slab(&slab_old, binind, 0);
+	create_mock_slab(&slab_new, binind, 1);
+
+	/* Make slab_old the slabcur (older). */
+	bin.slabcur = &slab_old;
+
+	/* bin_lower_slab with the newer slab should insert into nonfull. */
+	malloc_mutex_lock(tsdn, &bin.lock);
+	bin_lower_slab(tsdn, true, &slab_new, &bin);
+	expect_ptr_eq(bin.slabcur, &slab_old,
+	    "Older slabcur should remain");
+	expect_ptr_not_null(edata_heap_first(&bin.slabs_nonfull),
+	    "Newer slab should be inserted into nonfull heap");
+	malloc_mutex_unlock(tsdn, &bin.lock);
+
+	free(edata_addr_get(&slab_old));
+	free(edata_addr_get(&slab_new));
+}
+TEST_END
+
+/*
+ * Test bin_dalloc_slab_prepare updates stats.
+ */
+TEST_BEGIN(test_bin_dalloc_slab_prepare) {
+	tsdn_t *tsdn = tsdn_fetch();
+	bin_t bin;
+	szind_t binind = 0;
+	edata_t slab;
+
+	bin_init(&bin);
+	create_mock_slab(&slab, binind, 0);
+
+	if (config_stats) {
+		bin.stats.curslabs = 2;
+	}
+
+	/*
+	 * bin_dalloc_slab_prepare requires the slab is not slabcur,
+	 * so leave slabcur NULL.
+	 */
+	malloc_mutex_lock(tsdn, &bin.lock);
+	bin_dalloc_slab_prepare(tsdn, &slab, &bin);
+	if (config_stats) {
+		expect_zu_eq(bin.stats.curslabs, 1,
+		    "curslabs should decrement");
+	}
+	malloc_mutex_unlock(tsdn, &bin.lock);
+
+	free(edata_addr_get(&slab));
+}
+TEST_END
+
+/*
+ * Test bin_shard_sizes_boot and bin_update_shard_size.
+ */
+TEST_BEGIN(test_bin_shard_sizes) {
+	unsigned shard_sizes[SC_NBINS];
+	unsigned i;
+	bool err;
+	szind_t ind1, ind2;
+
+	/* Boot should set all to the default. */
+	bin_shard_sizes_boot(shard_sizes);
+	for (i = 0; i < SC_NBINS; i++) {
+		expect_u_eq(shard_sizes[i], N_BIN_SHARDS_DEFAULT,
+		    "Shard sizes should be default after boot");
+	}
+
+	/* Update with nshards=0 should fail (returns true). */
+	err = bin_update_shard_size(shard_sizes, 1, 1, 0);
+	expect_true(err, "nshards=0 should be an error");
+
+	/* Update with nshards > BIN_SHARDS_MAX should fail. */
+	err = bin_update_shard_size(shard_sizes, 1, 1, BIN_SHARDS_MAX + 1);
+	expect_true(err, "nshards > BIN_SHARDS_MAX should be an error");
+
+	/* Valid update: set a range to 4 shards. */
+	err = bin_update_shard_size(shard_sizes, 1, 128, 4);
+	expect_false(err, "Valid update should succeed");
+	/* Verify the range was updated. */
+	ind1 = sz_size2index_compute(1);
+	ind2 = sz_size2index_compute(128);
+	for (i = ind1; i <= ind2; i++) {
+		expect_u_eq(shard_sizes[i], 4,
+		    "Updated range should have nshards=4");
+	}
+
+	/* Update beyond SC_SMALL_MAXCLASS should be clamped, not fail. */
+	err = bin_update_shard_size(shard_sizes,
+	    SC_SMALL_MAXCLASS, SC_SMALL_MAXCLASS * 2, 2);
+	expect_false(err,
+	    "Update with end beyond SMALL_MAXCLASS should succeed");
+}
+TEST_END
+
+/*
+ * Test a full alloc-then-free cycle by allocating all regions from a bin
+ * via bin_malloc_with_fresh_slab, then freeing them all via the
+ * bin_dalloc_locked sequence.
+ */
+TEST_BEGIN(test_bin_alloc_free_cycle) {
+	tsdn_t *tsdn = tsdn_fetch();
+	bin_t bin;
+	szind_t binind = 0;
+	const bin_info_t *bin_info = &bin_infos[binind];
+	unsigned nregs = bin_info->nregs;
+	edata_t slab;
+	void **ptrs;
+	unsigned i;
+	bin_dalloc_locked_info_t info;
+
+	bin_init(&bin);
+	create_mock_slab(&slab, binind, 0);
+
+	ptrs = mallocx(nregs * sizeof(void *), 0);
+	assert_ptr_not_null(ptrs, "Unexpected mallocx failure");
+
+	malloc_mutex_lock(tsdn, &bin.lock);
+
+	/* Allocate the first pointer via fresh slab path. */
+	ptrs[0] = bin_malloc_with_fresh_slab(tsdn, &bin, binind, &slab);
+	expect_ptr_not_null(ptrs[0], "First alloc should succeed");
+
+	/* Allocate the rest from slabcur. */
+	for (i = 1; i < nregs; i++) {
+		ptrs[i] = bin_malloc_no_fresh_slab(tsdn, true, &bin, binind);
+		expect_ptr_not_null(ptrs[i], "Alloc should succeed");
+	}
+	if (config_stats) {
+		bin.stats.nmalloc += nregs;
+		bin.stats.curregs += nregs;
+	}
+
+	expect_u_eq(edata_nfree_get(&slab), 0, "Slab should be full");
+
+	/* Free all regions. */
+	bin_dalloc_locked_begin(&info, binind);
+	for (i = 0; i < nregs; i++) {
+		bin_dalloc_locked_step(
+		    tsdn, true, &bin, &info, binind, &slab, ptrs[i]);
+	}
+	bin_dalloc_locked_finish(tsdn, &bin, &info);
+
+	expect_u_eq(edata_nfree_get(&slab), nregs,
+	    "All regions should be free after full cycle");
+	if (config_stats) {
+		expect_zu_eq(bin.stats.curregs, 0,
+		    "curregs should be 0 after full cycle");
+	}
+
+	malloc_mutex_unlock(tsdn, &bin.lock);
+	free(ptrs);
+	free(edata_addr_get(&slab));
+}
+TEST_END
+
+/*
+ * Test alloc/free cycle across multiple bin size classes.
+ */
+TEST_BEGIN(test_bin_multi_size_class) {
+	tsdn_t *tsdn = tsdn_fetch();
+	szind_t test_indices[] = {0, SC_NBINS / 2, SC_NBINS - 1};
+	unsigned nindices = sizeof(test_indices) / sizeof(test_indices[0]);
+	unsigned t;
+
+	for (t = 0; t < nindices; t++) {
+		szind_t binind = test_indices[t];
+		const bin_info_t *bin_info = &bin_infos[binind];
+		bin_t bin;
+		edata_t slab;
+		void *ptr;
+		bin_dalloc_locked_info_t info;
+
+		bin_init(&bin);
+		create_mock_slab(&slab, binind, 0);
+
+		malloc_mutex_lock(tsdn, &bin.lock);
+		ptr = bin_malloc_with_fresh_slab(
+		    tsdn, &bin, binind, &slab);
+		expect_ptr_not_null(ptr,
+		    "Alloc should succeed for binind %u", binind);
+		expect_u_eq(edata_nfree_get(&slab), bin_info->nregs - 1,
+		    "nfree should be nregs-1 for binind %u", binind);
+
+		/* Free the allocated region. */
+		if (config_stats) {
+			bin.stats.nmalloc = 1;
+			bin.stats.curregs = 1;
+		}
+		bin_dalloc_locked_begin(&info, binind);
+		bin_dalloc_locked_step(
+		    tsdn, true, &bin, &info, binind, &slab, ptr);
+		bin_dalloc_locked_finish(tsdn, &bin, &info);
+
+		expect_u_eq(edata_nfree_get(&slab), bin_info->nregs,
+		    "All regions should be free for binind %u", binind);
+		malloc_mutex_unlock(tsdn, &bin.lock);
+
+		free(edata_addr_get(&slab));
+	}
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_bin_init,
+	    test_bin_slab_reg_alloc,
+	    test_bin_slab_reg_alloc_batch,
+	    test_bin_slab_reg_alloc_batch_partial,
+	    test_bin_slabs_nonfull,
+	    test_bin_slabs_full,
+	    test_bin_slabs_full_auto,
+	    test_bin_dissociate_slabcur,
+	    test_bin_dissociate_nonfull,
+	    test_bin_refill_slabcur_with_fresh_slab,
+	    test_bin_refill_slabcur_no_fresh_slab,
+	    test_bin_refill_slabcur_full_to_list,
+	    test_bin_malloc_with_fresh_slab,
+	    test_bin_malloc_no_fresh_slab,
+	    test_bin_dalloc_locked,
+	    test_bin_lower_slab_replaces_slabcur,
+	    test_bin_lower_slab_inserts_nonfull,
+	    test_bin_dalloc_slab_prepare,
+	    test_bin_shard_sizes,
+	    test_bin_alloc_free_cycle,
+	    test_bin_multi_size_class);
+}

From a056c20d671e5d001d9d232a7c6d9bb30288e9ef Mon Sep 17 00:00:00 2001
From: Carl Shapiro <cshapiro@meta.com>
Date: Mon, 2 Mar 2026 17:15:35 -0800
Subject: [PATCH 366/395] Handle tcache init failures gracefully

tsd_tcache_data_init() returns true on failure but its callers ignore
this return value, leaving the per-thread tcache in an uninitialized
state after a failure.

This change disables the tcache on an initialization failure and logs
an error message.  If opt_abort is true, it will also abort.

New unit tests have been added to test tcache initialization failures.
---
 Makefile.in                                |   1 +
 include/jemalloc/internal/tcache_externs.h |   5 +-
 src/tcache.c                               |  61 +++++++----
 test/unit/tcache_init.c                    | 116 +++++++++++++++++++++
 4 files changed, 162 insertions(+), 21 deletions(-)
 create mode 100644 test/unit/tcache_init.c

diff --git a/Makefile.in b/Makefile.in
index 463693df..ec2215b3 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -292,6 +292,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/stats.c \
 	$(srcroot)test/unit/stats_print.c \
 	$(srcroot)test/unit/sz.c \
+	$(srcroot)test/unit/tcache_init.c \
 	$(srcroot)test/unit/tcache_max.c \
 	$(srcroot)test/unit/test_hooks.c \
 	$(srcroot)test/unit/thread_event.c \
diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index 73126db7..b7fdb5a4 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -64,7 +64,7 @@ bool tcache_bin_ncached_max_read(
 void tcache_arena_reassociate(
     tsdn_t *tsdn, tcache_slow_t *tcache_slow, tcache_t *tcache, arena_t *arena);
 tcache_t *tcache_create_explicit(tsd_t *tsd);
-void      thread_tcache_max_set(tsd_t *tsd, size_t tcache_max);
+bool      thread_tcache_max_set(tsd_t *tsd, size_t tcache_max);
 void      tcache_cleanup(tsd_t *tsd);
 void      tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena);
 bool      tcaches_create(tsd_t *tsd, base_t *base, unsigned *r_ind);
@@ -80,6 +80,9 @@ void tcache_flush(tsd_t *tsd);
 bool tsd_tcache_enabled_data_init(tsd_t *tsd);
 void tcache_enabled_set(tsd_t *tsd, bool enabled);
 
+extern void *(*JET_MUTABLE tcache_stack_alloc)(tsdn_t *tsdn, size_t size,
+    size_t alignment);
+
 void tcache_assert_initialized(tcache_t *tcache);
 
 extern te_base_cb_t tcache_gc_te_handler;
diff --git a/src/tcache.c b/src/tcache.c
index 172d9320..10fa7c21 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -949,6 +949,21 @@ tcache_bin_info_compute(cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]) {
 	}
 }
 
+static void *
+tcache_stack_alloc_impl(tsdn_t *tsdn, size_t size, size_t alignment) {
+	if (cache_bin_stack_use_thp()) {
+		/* Alignment is ignored since it comes from THP. */
+		assert(alignment == QUANTUM);
+		return b0_alloc_tcache_stack(tsdn, size);
+	}
+	size = sz_sa2u(size, alignment);
+	return ipallocztm(tsdn, size, alignment, true, NULL,
+	    true, arena_get(TSDN_NULL, 0, true));
+}
+
+void *(*JET_MUTABLE tcache_stack_alloc)(tsdn_t *tsdn, size_t size,
+    size_t alignment) = tcache_stack_alloc_impl;
+
 static bool
 tsd_tcache_data_init_impl(
     tsd_t *tsd, arena_t *arena, const cache_bin_info_t *tcache_bin_info) {
@@ -961,16 +976,7 @@ tsd_tcache_data_init_impl(
 	cache_bin_info_compute_alloc(
 	    tcache_bin_info, tcache_nbins, &size, &alignment);
 
-	void *mem;
-	if (cache_bin_stack_use_thp()) {
-		/* Alignment is ignored since it comes from THP. */
-		assert(alignment == QUANTUM);
-		mem = b0_alloc_tcache_stack(tsd_tsdn(tsd), size);
-	} else {
-		size = sz_sa2u(size, alignment);
-		mem = ipallocztm(tsd_tsdn(tsd), size, alignment, true, NULL,
-		    true, arena_get(TSDN_NULL, 0, true));
-	}
+	void *mem = tcache_stack_alloc(tsd_tsdn(tsd), size, alignment);
 	if (mem == NULL) {
 		return true;
 	}
@@ -1010,7 +1016,20 @@ static bool
 tsd_tcache_data_init(tsd_t *tsd, arena_t *arena,
     const cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]) {
 	assert(tcache_bin_info != NULL);
-	return tsd_tcache_data_init_impl(tsd, arena, tcache_bin_info);
+	bool err = tsd_tcache_data_init_impl(tsd, arena, tcache_bin_info);
+	if (unlikely(err)) {
+		/*
+		 * Disable the tcache before calling malloc_write to
+		 * avoid recursive allocations through libc hooks.
+		 */
+		tsd_tcache_enabled_set(tsd, false);
+		tsd_slow_update(tsd);
+		malloc_write("<jemalloc>: Failed to allocate tcache data\n");
+		if (opt_abort) {
+			abort();
+		}
+	}
+	return err;
 }
 
 /* Created manual tcache for tcache.create mallctl. */
@@ -1062,8 +1081,8 @@ tsd_tcache_enabled_data_init(tsd_t *tsd) {
 
 	if (opt_tcache) {
 		/* Trigger tcache init. */
-		tsd_tcache_data_init(
-		    tsd, NULL, tcache_get_default_ncached_max());
+		return tsd_tcache_data_init(
+			tsd, NULL, tcache_get_default_ncached_max());
 	}
 
 	return false;
@@ -1074,8 +1093,10 @@ tcache_enabled_set(tsd_t *tsd, bool enabled) {
 	bool was_enabled = tsd_tcache_enabled_get(tsd);
 
 	if (!was_enabled && enabled) {
-		tsd_tcache_data_init(
-		    tsd, NULL, tcache_get_default_ncached_max());
+		if (tsd_tcache_data_init(
+		    tsd, NULL, tcache_get_default_ncached_max())) {
+			return;
+		}
 	} else if (was_enabled && !enabled) {
 		tcache_cleanup(tsd);
 	}
@@ -1084,13 +1105,14 @@ tcache_enabled_set(tsd_t *tsd, bool enabled) {
 	tsd_slow_update(tsd);
 }
 
-void
+bool
 thread_tcache_max_set(tsd_t *tsd, size_t tcache_max) {
 	assert(tcache_max <= TCACHE_MAXCLASS_LIMIT);
 	assert(tcache_max == sz_s2u(tcache_max));
 	tcache_t        *tcache = tsd_tcachep_get(tsd);
 	tcache_slow_t   *tcache_slow = tcache->tcache_slow;
 	cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX] = {{0}};
+	bool             ret = false;
 	assert(tcache != NULL && tcache_slow != NULL);
 
 	bool                    enabled = tcache_available(tsd);
@@ -1111,10 +1133,11 @@ thread_tcache_max_set(tsd_t *tsd, size_t tcache_max) {
 	tcache_max_set(tcache_slow, tcache_max);
 
 	if (enabled) {
-		tsd_tcache_data_init(tsd, assigned_arena, tcache_bin_info);
+		ret = tsd_tcache_data_init(tsd, assigned_arena, tcache_bin_info);
 	}
 
 	assert(tcache_nbins_get(tcache_slow) == sz_size2index(tcache_max) + 1);
+	return ret;
 }
 
 static bool
@@ -1177,9 +1200,7 @@ tcache_bins_ncached_max_write(tsd_t *tsd, char *settings, size_t len) {
 
 	arena_t *assigned_arena = tcache->tcache_slow->arena;
 	tcache_cleanup(tsd);
-	tsd_tcache_data_init(tsd, assigned_arena, tcache_bin_info);
-
-	return false;
+	return tsd_tcache_data_init(tsd, assigned_arena, tcache_bin_info);
 }
 
 static void
diff --git a/test/unit/tcache_init.c b/test/unit/tcache_init.c
new file mode 100644
index 00000000..11d4b654
--- /dev/null
+++ b/test/unit/tcache_init.c
@@ -0,0 +1,116 @@
+#include "test/jemalloc_test.h"
+
+static void *
+tcache_stack_alloc_fail(tsdn_t *tsdn, size_t size, size_t alignment) {
+	return NULL;
+}
+
+TEST_BEGIN(test_tcache_data_init_oom) {
+	bool orig_opt_abort = opt_abort;
+	void *(*orig_tcache_stack_alloc)(tsdn_t *, size_t, size_t) =
+	    tcache_stack_alloc;
+
+	opt_abort = false;
+	tcache_stack_alloc = tcache_stack_alloc_fail;
+
+	/*
+	 * Trigger init through tcache_enabled_set by enabling and
+	 * disabling the tcache.
+	 */
+	bool e0, e1;
+	size_t bool_sz = sizeof(bool);
+
+	/* Disable the tcache. */
+	e1 = false;
+	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &bool_sz,
+	    (void *)&e1, bool_sz), 0, "Unexpected mallctl failure");
+
+	/* Try to enable the tcache.  Initialization should fail. */
+	e1 = true;
+	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &bool_sz,
+	    (void *)&e1, bool_sz), 0, "Unexpected mallctl failure");
+
+	/* The tcache should be disabled. */
+	tsd_t *tsd = tsd_fetch();
+	expect_false(tsd_tcache_enabled_get(tsd),
+	    "tcache should be disabled after init failure");
+
+	/* Allocations should go to the arena. */
+	void *p = malloc(64);
+	expect_ptr_not_null(p, "malloc should succeed without tcache");
+	free(p);
+
+	/* Restore the original values */
+	tcache_stack_alloc = orig_tcache_stack_alloc;
+	opt_abort = orig_opt_abort;
+
+	/*
+	 * Try to enable the tcache again.  This time initialization
+	 * should succeed.
+	 */
+	e1 = true;
+	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &bool_sz,
+	    (void *)&e1, bool_sz), 0, "Unexpected mallctl failure");
+}
+TEST_END
+
+TEST_BEGIN(test_tcache_reinit_oom) {
+	bool orig_opt_abort = opt_abort;
+	void *(*orig_tcache_stack_alloc)(tsdn_t *, size_t, size_t) =
+	    tcache_stack_alloc;
+
+	/* Read current tcache max. */
+	size_t old_tcache_max, sz;
+	sz = sizeof(old_tcache_max);
+	expect_d_eq(mallctl("thread.tcache.max", (void *)&old_tcache_max, &sz,
+	    NULL, 0), 0, "Unexpected mallctl failure");
+
+	opt_abort = false;
+	tcache_stack_alloc = tcache_stack_alloc_fail;
+
+	/*
+	 * Setting thread.tcache.max causes a reinitialization.  With
+	 * the thread_stack_alloc override reinitialization should
+	 * fail and disable tcache.
+	 */
+	size_t new_tcache_max = 1024;
+	new_tcache_max = sz_s2u(new_tcache_max);
+	expect_d_eq(mallctl("thread.tcache.max", NULL, NULL,
+	    (void *)&new_tcache_max, sizeof(new_tcache_max)), 0,
+	    "Unexpected mallctl failure");
+
+	/* Check that the tcache was disabled. */
+	tsd_t *tsd = tsd_fetch();
+	expect_false(tsd_tcache_enabled_get(tsd),
+	    "tcache should be disabled after reinit failure");
+
+	/* Allocations should go to the arena. */
+	void *p = malloc(64);
+	expect_ptr_not_null(p, "malloc should succeed without tcache");
+	free(p);
+
+	/* Restore the original values */
+	tcache_stack_alloc = orig_tcache_stack_alloc;
+	opt_abort = orig_opt_abort;
+
+	/*
+	 * Try to enable the tcache again.  This time initialization
+	 * should succeed.
+	 */
+	bool e0, e1;
+	size_t bool_sz = sizeof(bool);
+	e1 = true;
+	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &bool_sz,
+	    (void *)&e1, bool_sz), 0, "Unexpected mallctl failure");
+
+	/* Restore the original tcache max. */
+	expect_d_eq(mallctl("thread.tcache.max", NULL, NULL,
+	    (void *)&old_tcache_max, sizeof(old_tcache_max)), 0,
+	    "Unexpected mallctl failure");
+}
+TEST_END
+
+int
+main(void) {
+	return test(test_tcache_data_init_oom, test_tcache_reinit_oom);
+}

From ad726adf7539f78bf652db04f215333f1536bf85 Mon Sep 17 00:00:00 2001
From: Carl Shapiro <cshapiro@meta.com>
Date: Mon, 2 Mar 2026 13:02:46 -0800
Subject: [PATCH 367/395] Separate out the configuration code from
 initialization

---
 Makefile.in                                   |    1 +
 include/jemalloc/internal/conf.h              |   27 +
 .../internal/jemalloc_internal_externs.h      |    2 +
 src/conf.c                                    | 1228 +++++++++++++++++
 src/jemalloc.c                                | 1137 +--------------
 5 files changed, 1261 insertions(+), 1134 deletions(-)
 create mode 100644 include/jemalloc/internal/conf.h
 create mode 100644 src/conf.c

diff --git a/Makefile.in b/Makefile.in
index ec2215b3..1a7207e0 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -131,6 +131,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/large.c \
 	$(srcroot)src/log.c \
 	$(srcroot)src/malloc_io.c \
+	$(srcroot)src/conf.c \
 	$(srcroot)src/mutex.c \
 	$(srcroot)src/nstime.c \
 	$(srcroot)src/pa.c \
diff --git a/include/jemalloc/internal/conf.h b/include/jemalloc/internal/conf.h
new file mode 100644
index 00000000..21661955
--- /dev/null
+++ b/include/jemalloc/internal/conf.h
@@ -0,0 +1,27 @@
+#ifndef JEMALLOC_INTERNAL_CONF_H
+#define JEMALLOC_INTERNAL_CONF_H
+
+#include "jemalloc/internal/sc.h"
+
+void malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
+    char readlink_buf[PATH_MAX + 1]);
+void malloc_abort_invalid_conf(void);
+
+#ifdef JEMALLOC_JET
+extern bool had_conf_error;
+bool conf_next(char const **opts_p, char const **k_p, size_t *klen_p,
+    char const **v_p, size_t *vlen_p);
+void conf_error(const char *msg, const char *k, size_t klen,
+    const char *v, size_t vlen);
+bool conf_handle_bool(const char *v, size_t vlen, bool *result);
+bool conf_handle_unsigned(const char *v, size_t vlen,
+    uintmax_t min, uintmax_t max, bool check_min, bool check_max,
+    bool clip, uintmax_t *result);
+bool conf_handle_signed(const char *v, size_t vlen,
+    intmax_t min, intmax_t max, bool check_min, bool check_max,
+    bool clip, intmax_t *result);
+bool conf_handle_char_p(const char *v, size_t vlen,
+    char *dest, size_t dest_sz);
+#endif
+
+#endif /* JEMALLOC_INTERNAL_CONF_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index ea739ea8..9911c199 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -3,6 +3,7 @@
 
 #include "jemalloc/internal/arena_types.h"
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/fxp.h"
 #include "jemalloc/internal/hpa_opts.h"
 #include "jemalloc/internal/nstime.h"
 #include "jemalloc/internal/sec_opts.h"
@@ -34,6 +35,7 @@ extern bool                  opt_experimental_infallible_new;
 extern bool                  opt_experimental_tcache_gc;
 extern bool                  opt_zero;
 extern unsigned              opt_narenas;
+extern fxp_t                 opt_narenas_ratio;
 extern zero_realloc_action_t opt_zero_realloc_action;
 extern malloc_init_t         malloc_init_state;
 extern const char *const     zero_realloc_mode_names[];
diff --git a/src/conf.c b/src/conf.c
new file mode 100644
index 00000000..8a23bda6
--- /dev/null
+++ b/src/conf.c
@@ -0,0 +1,1228 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/extent_dss.h"
+#include "jemalloc/internal/extent_mmap.h"
+#include "jemalloc/internal/fxp.h"
+#include "jemalloc/internal/log.h"
+#include "jemalloc/internal/malloc_io.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/nstime.h"
+#include "jemalloc/internal/safety_check.h"
+#include "jemalloc/internal/san.h"
+#include "jemalloc/internal/sc.h"
+#include "jemalloc/internal/util.h"
+
+#include "jemalloc/internal/conf.h"
+
+/* Whether encountered any invalid config options. */
+bool had_conf_error;
+
+static char *
+jemalloc_getenv(const char *name) {
+#ifdef JEMALLOC_FORCE_GETENV
+	return getenv(name);
+#else
+#	ifdef JEMALLOC_HAVE_SECURE_GETENV
+	return secure_getenv(name);
+#	else
+#		ifdef JEMALLOC_HAVE_ISSETUGID
+	if (issetugid() != 0) {
+		return NULL;
+	}
+#		endif
+	return getenv(name);
+#	endif
+#endif
+}
+
+static void
+init_opt_stats_opts(const char *v, size_t vlen, char *dest) {
+	size_t opts_len = strlen(dest);
+	assert(opts_len <= stats_print_tot_num_options);
+
+	for (size_t i = 0; i < vlen; i++) {
+		switch (v[i]) {
+#define OPTION(o, v, d, s)                                                     \
+	case o:                                                                \
+		break;
+			STATS_PRINT_OPTIONS
+#undef OPTION
+		default:
+			continue;
+		}
+
+		if (strchr(dest, v[i]) != NULL) {
+			/* Ignore repeated. */
+			continue;
+		}
+
+		dest[opts_len++] = v[i];
+		dest[opts_len] = '\0';
+		assert(opts_len <= stats_print_tot_num_options);
+	}
+	assert(opts_len == strlen(dest));
+}
+
+static void
+malloc_conf_format_error(const char *msg, const char *begin, const char *end) {
+	size_t len = end - begin + 1;
+	len = len > BUFERROR_BUF ? BUFERROR_BUF : len;
+
+	malloc_printf("<jemalloc>: %s -- %.*s\n", msg, (int)len, begin);
+}
+
+JET_EXTERN bool
+conf_next(char const **opts_p, char const **k_p, size_t *klen_p,
+    char const **v_p, size_t *vlen_p) {
+	bool        accept;
+	const char *opts = *opts_p;
+
+	*k_p = opts;
+
+	for (accept = false; !accept;) {
+		switch (*opts) {
+		case 'A':
+		case 'B':
+		case 'C':
+		case 'D':
+		case 'E':
+		case 'F':
+		case 'G':
+		case 'H':
+		case 'I':
+		case 'J':
+		case 'K':
+		case 'L':
+		case 'M':
+		case 'N':
+		case 'O':
+		case 'P':
+		case 'Q':
+		case 'R':
+		case 'S':
+		case 'T':
+		case 'U':
+		case 'V':
+		case 'W':
+		case 'X':
+		case 'Y':
+		case 'Z':
+		case 'a':
+		case 'b':
+		case 'c':
+		case 'd':
+		case 'e':
+		case 'f':
+		case 'g':
+		case 'h':
+		case 'i':
+		case 'j':
+		case 'k':
+		case 'l':
+		case 'm':
+		case 'n':
+		case 'o':
+		case 'p':
+		case 'q':
+		case 'r':
+		case 's':
+		case 't':
+		case 'u':
+		case 'v':
+		case 'w':
+		case 'x':
+		case 'y':
+		case 'z':
+		case '0':
+		case '1':
+		case '2':
+		case '3':
+		case '4':
+		case '5':
+		case '6':
+		case '7':
+		case '8':
+		case '9':
+		case '_':
+			opts++;
+			break;
+		case ':':
+			opts++;
+			*klen_p = (uintptr_t)opts - 1 - (uintptr_t)*k_p;
+			*v_p = opts;
+			accept = true;
+			break;
+		case '\0':
+			if (opts != *opts_p) {
+				malloc_conf_format_error(
+				    "Conf string ends with key", *opts_p,
+				    opts - 1);
+				had_conf_error = true;
+			}
+			return true;
+		default:
+			malloc_conf_format_error(
+			    "Malformed conf string", *opts_p, opts);
+			had_conf_error = true;
+			return true;
+		}
+	}
+
+	for (accept = false; !accept;) {
+		switch (*opts) {
+		case ',':
+			opts++;
+			/*
+			 * Look ahead one character here, because the next time
+			 * this function is called, it will assume that end of
+			 * input has been cleanly reached if no input remains,
+			 * but we have optimistically already consumed the
+			 * comma if one exists.
+			 */
+			if (*opts == '\0') {
+				malloc_conf_format_error(
+				    "Conf string ends with comma", *opts_p,
+				    opts - 1);
+				had_conf_error = true;
+			}
+			*vlen_p = (uintptr_t)opts - 1 - (uintptr_t)*v_p;
+			accept = true;
+			break;
+		case '\0':
+			*vlen_p = (uintptr_t)opts - (uintptr_t)*v_p;
+			accept = true;
+			break;
+		default:
+			opts++;
+			break;
+		}
+	}
+
+	*opts_p = opts;
+	return false;
+}
+
+void
+malloc_abort_invalid_conf(void) {
+	assert(opt_abort_conf);
+	malloc_printf(
+	    "<jemalloc>: Abort (abort_conf:true) on invalid conf "
+	    "value (see above).\n");
+	invalid_conf_abort();
+}
+
+JET_EXTERN void
+conf_error(
+    const char *msg, const char *k, size_t klen, const char *v, size_t vlen) {
+	malloc_printf(
+	    "<jemalloc>: %s: %.*s:%.*s\n", msg, (int)klen, k, (int)vlen, v);
+	/* If abort_conf is set, error out after processing all options. */
+	const char *experimental = "experimental_";
+	if (strncmp(k, experimental, strlen(experimental)) == 0) {
+		/* However, tolerate experimental features. */
+		return;
+	}
+	const char  *deprecated[] = {"hpa_sec_bytes_after_flush"};
+	const size_t deprecated_cnt = (sizeof(deprecated)
+	    / sizeof(deprecated[0]));
+	for (size_t i = 0; i < deprecated_cnt; ++i) {
+		if (strncmp(k, deprecated[i], strlen(deprecated[i])) == 0) {
+			/* Tolerate deprecated features. */
+			return;
+		}
+	}
+	had_conf_error = true;
+}
+
+JET_EXTERN bool
+conf_handle_bool(const char *v, size_t vlen, bool *result) {
+	if (sizeof("true") - 1 == vlen && strncmp("true", v, vlen) == 0) {
+		*result = true;
+	} else if (sizeof("false") - 1 == vlen
+	    && strncmp("false", v, vlen) == 0) {
+		*result = false;
+	} else {
+		return true;
+	}
+	return false;
+}
+
+JEMALLOC_DIAGNOSTIC_PUSH
+JEMALLOC_DIAGNOSTIC_IGNORE("-Wunused-function")
+
+JET_EXTERN bool
+conf_handle_unsigned(const char *v, size_t vlen,
+    uintmax_t min, uintmax_t max, bool check_min, bool check_max,
+    bool clip, uintmax_t *result) {
+	char *end;
+	set_errno(0);
+	uintmax_t mv = (uintmax_t)malloc_strtoumax(v, &end, 0);
+	if (get_errno() != 0 || (uintptr_t)end - (uintptr_t)v != vlen) {
+		return true;
+	}
+	if (clip) {
+		if (check_min && mv < min) {
+			*result = min;
+		} else if (check_max && mv > max) {
+			*result = max;
+		} else {
+			*result = mv;
+		}
+	} else {
+		if ((check_min && mv < min) || (check_max && mv > max)) {
+			return true;
+		}
+		*result = mv;
+	}
+	return false;
+}
+
+JET_EXTERN bool
+conf_handle_signed(const char *v, size_t vlen,
+    intmax_t min, intmax_t max, bool check_min, bool check_max,
+    bool clip, intmax_t *result) {
+	char *end;
+	set_errno(0);
+	intmax_t mv = (intmax_t)malloc_strtoumax(v, &end, 0);
+	if (get_errno() != 0 || (uintptr_t)end - (uintptr_t)v != vlen) {
+		return true;
+	}
+	if (clip) {
+		if (check_min && mv < min) {
+			*result = min;
+		} else if (check_max && mv > max) {
+			*result = max;
+		} else {
+			*result = mv;
+		}
+	} else {
+		if ((check_min && mv < min) || (check_max && mv > max)) {
+			return true;
+		}
+		*result = mv;
+	}
+	return false;
+}
+
+JET_EXTERN bool
+conf_handle_char_p(const char *v, size_t vlen, char *dest, size_t dest_sz) {
+	size_t cpylen = (vlen <= dest_sz - 1) ? vlen : dest_sz - 1;
+	strncpy(dest, v, cpylen);
+	dest[cpylen] = '\0';
+	return false;
+}
+
+JEMALLOC_DIAGNOSTIC_POP
+
+/* Number of sources for initializing malloc_conf */
+#define MALLOC_CONF_NSOURCES 5
+
+static const char *
+obtain_malloc_conf(unsigned which_source, char readlink_buf[PATH_MAX + 1]) {
+	if (config_debug) {
+		static unsigned read_source = 0;
+		/*
+		 * Each source should only be read once, to minimize # of
+		 * syscalls on init.
+		 */
+		assert(read_source == which_source);
+		read_source++;
+	}
+	assert(which_source < MALLOC_CONF_NSOURCES);
+
+	const char *ret;
+	switch (which_source) {
+	case 0:
+		ret = config_malloc_conf;
+		break;
+	case 1:
+		if (je_malloc_conf != NULL) {
+			/* Use options that were compiled into the program. */
+			ret = je_malloc_conf;
+		} else {
+			/* No configuration specified. */
+			ret = NULL;
+		}
+		break;
+	case 2: {
+#ifndef JEMALLOC_CONFIG_FILE
+		ret = NULL;
+		break;
+#else
+		ssize_t linklen = 0;
+#	ifndef _WIN32
+		int         saved_errno = errno;
+		const char *linkname =
+#		ifdef JEMALLOC_PREFIX
+		    "/etc/" JEMALLOC_PREFIX "malloc.conf"
+#		else
+		    "/etc/malloc.conf"
+#		endif
+		    ;
+
+		/*
+		 * Try to use the contents of the "/etc/malloc.conf" symbolic
+		 * link's name.
+		 */
+#		ifndef JEMALLOC_READLINKAT
+		linklen = readlink(linkname, readlink_buf, PATH_MAX);
+#		else
+		linklen = readlinkat(
+		    AT_FDCWD, linkname, readlink_buf, PATH_MAX);
+#		endif
+		if (linklen == -1) {
+			/* No configuration specified. */
+			linklen = 0;
+			/* Restore errno. */
+			set_errno(saved_errno);
+		}
+#	endif
+		readlink_buf[linklen] = '\0';
+		ret = readlink_buf;
+		break;
+#endif
+	}
+	case 3: {
+#ifndef JEMALLOC_CONFIG_ENV
+		ret = NULL;
+		break;
+#else
+		const char *envname =
+#	ifdef JEMALLOC_PREFIX
+		    JEMALLOC_CPREFIX "MALLOC_CONF"
+#	else
+		    "MALLOC_CONF"
+#	endif
+		    ;
+
+		if ((ret = jemalloc_getenv(envname)) != NULL) {
+			opt_malloc_conf_env_var = ret;
+		} else {
+			/* No configuration specified. */
+			ret = NULL;
+		}
+		break;
+#endif
+	}
+	case 4: {
+		ret = je_malloc_conf_2_conf_harder;
+		break;
+	}
+	default:
+		not_reached();
+		ret = NULL;
+	}
+	return ret;
+}
+
+static void
+validate_hpa_settings(void) {
+	if (!hpa_supported() || !opt_hpa) {
+		return;
+	}
+	if (HUGEPAGE > HUGEPAGE_MAX_EXPECTED_SIZE) {
+		had_conf_error = true;
+		malloc_printf(
+		    "<jemalloc>: huge page size (%zu) greater than expected."
+		    "May not be supported or behave as expected.",
+		    HUGEPAGE);
+	}
+#ifndef JEMALLOC_HAVE_MADVISE_COLLAPSE
+	if (opt_hpa_opts.hugify_sync) {
+		had_conf_error = true;
+		malloc_printf(
+		    "<jemalloc>: hpa_hugify_sync config option is enabled, "
+		    "but MADV_COLLAPSE support was not detected at build "
+		    "time.");
+	}
+#endif
+}
+
+static void
+malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
+    bool initial_call, const char *opts_cache[MALLOC_CONF_NSOURCES],
+    char readlink_buf[PATH_MAX + 1]) {
+	static const char *opts_explain[MALLOC_CONF_NSOURCES] = {
+	    "string specified via --with-malloc-conf",
+	    "string pointed to by the global variable malloc_conf",
+	    "\"name\" of the file referenced by the symbolic link named "
+	    "/etc/malloc.conf",
+	    "value of the environment variable MALLOC_CONF",
+	    "string pointed to by the global variable "
+	    "malloc_conf_2_conf_harder",
+	};
+	unsigned    i;
+	const char *opts, *k, *v;
+	size_t      klen, vlen;
+
+	for (i = 0; i < MALLOC_CONF_NSOURCES; i++) {
+		/* Get runtime configuration. */
+		if (initial_call) {
+			opts_cache[i] = obtain_malloc_conf(i, readlink_buf);
+		}
+		opts = opts_cache[i];
+		if (!initial_call && opt_confirm_conf) {
+			malloc_printf(
+			    "<jemalloc>: malloc_conf #%u (%s): \"%s\"\n", i + 1,
+			    opts_explain[i], opts != NULL ? opts : "");
+		}
+		if (opts == NULL) {
+			continue;
+		}
+
+		while (*opts != '\0'
+		    && !conf_next(&opts, &k, &klen, &v, &vlen)) {
+#define CONF_ERROR(msg, k, klen, v, vlen)                                      \
+	if (!initial_call) {                                                   \
+		conf_error(msg, k, klen, v, vlen);                      \
+		cur_opt_valid = false;                                         \
+	}
+#define CONF_CONTINUE                                                          \
+	{                                                                      \
+		if (!initial_call && opt_confirm_conf && cur_opt_valid) {      \
+			malloc_printf(                                         \
+			    "<jemalloc>: -- "                                  \
+			    "Set conf value: %.*s:%.*s"                        \
+			    "\n",                                              \
+			    (int)klen, k, (int)vlen, v);                       \
+		}                                                              \
+		continue;                                                      \
+	}
+#define CONF_MATCH(n) (sizeof(n) - 1 == klen && strncmp(n, k, klen) == 0)
+#define CONF_MATCH_VALUE(n) (sizeof(n) - 1 == vlen && strncmp(n, v, vlen) == 0)
+#define CONF_HANDLE_BOOL(o, n)                                                 \
+	if (CONF_MATCH(n)) {                                                   \
+		if (conf_handle_bool(v, vlen, &o)) {                           \
+			CONF_ERROR("Invalid conf value", k, klen, v, vlen);    \
+		}                                                              \
+		CONF_CONTINUE;                                                 \
+	}
+			/*
+       * One of the CONF_MIN macros below expands, in one of the use points,
+       * to "unsigned integer < 0", which is always false, triggering the
+       * GCC -Wtype-limits warning, which we disable here and re-enable below.
+       */
+			JEMALLOC_DIAGNOSTIC_PUSH
+			JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS
+
+#define CONF_DONT_CHECK_MIN(um, min) false
+#define CONF_CHECK_MIN(um, min) ((um) < (min))
+#define CONF_DONT_CHECK_MAX(um, max) false
+#define CONF_CHECK_MAX(um, max) ((um) > (max))
+
+#define CONF_VALUE_READ(max_t, result)                                         \
+	char *end;                                                             \
+	set_errno(0);                                                          \
+	result = (max_t)malloc_strtoumax(v, &end, 0);
+#define CONF_VALUE_READ_FAIL()                                                 \
+	(get_errno() != 0 || (uintptr_t)end - (uintptr_t)v != vlen)
+
+#define CONF_HANDLE_T(t, max_t, o, n, min, max, check_min, check_max, clip)    \
+	if (CONF_MATCH(n)) {                                                   \
+		max_t mv;                                                      \
+		CONF_VALUE_READ(max_t, mv)                                     \
+		if (CONF_VALUE_READ_FAIL()) {                                  \
+			CONF_ERROR("Invalid conf value", k, klen, v, vlen);    \
+		} else if (clip) {                                             \
+			if (check_min(mv, (t)(min))) {                         \
+				o = (t)(min);                                  \
+			} else if (check_max(mv, (t)(max))) {                  \
+				o = (t)(max);                                  \
+			} else {                                               \
+				o = (t)mv;                                     \
+			}                                                      \
+		} else {                                                       \
+			if (check_min(mv, (t)(min))                            \
+			    || check_max(mv, (t)(max))) {                      \
+				CONF_ERROR(                                    \
+				    "Out-of-range "                            \
+				    "conf value",                              \
+				    k, klen, v, vlen);                         \
+			} else {                                               \
+				o = (t)mv;                                     \
+			}                                                      \
+		}                                                              \
+		CONF_CONTINUE;                                                 \
+	}
+#define CONF_HANDLE_T_U(t, o, n, min, max, check_min, check_max, clip)         \
+	CONF_HANDLE_T(t, uintmax_t, o, n, min, max, check_min, check_max, clip)
+#define CONF_HANDLE_T_SIGNED(t, o, n, min, max, check_min, check_max, clip)    \
+	CONF_HANDLE_T(t, intmax_t, o, n, min, max, check_min, check_max, clip)
+
+#define CONF_HANDLE_UNSIGNED(o, n, min, max, check_min, check_max, clip)       \
+	CONF_HANDLE_T_U(unsigned, o, n, min, max, check_min, check_max, clip)
+#define CONF_HANDLE_SIZE_T(o, n, min, max, check_min, check_max, clip)         \
+	CONF_HANDLE_T_U(size_t, o, n, min, max, check_min, check_max, clip)
+#define CONF_HANDLE_INT64_T(o, n, min, max, check_min, check_max, clip)        \
+	CONF_HANDLE_T_SIGNED(                                                  \
+	    int64_t, o, n, min, max, check_min, check_max, clip)
+#define CONF_HANDLE_UINT64_T(o, n, min, max, check_min, check_max, clip)       \
+	CONF_HANDLE_T_U(uint64_t, o, n, min, max, check_min, check_max, clip)
+#define CONF_HANDLE_SSIZE_T(o, n, min, max)                                    \
+	CONF_HANDLE_T_SIGNED(                                                  \
+	    ssize_t, o, n, min, max, CONF_CHECK_MIN, CONF_CHECK_MAX, false)
+#define CONF_HANDLE_CHAR_P(o, n, d)                                            \
+	if (CONF_MATCH(n)) {                                                   \
+		size_t cpylen = (vlen <= sizeof(o) - 1) ? vlen                 \
+		                                        : sizeof(o) - 1;       \
+		strncpy(o, v, cpylen);                                         \
+		o[cpylen] = '\0';                                              \
+		CONF_CONTINUE;                                                 \
+	}
+
+			bool cur_opt_valid = true;
+
+			CONF_HANDLE_BOOL(opt_confirm_conf, "confirm_conf")
+			if (initial_call) {
+				continue;
+			}
+
+			CONF_HANDLE_BOOL(opt_abort, "abort")
+			CONF_HANDLE_BOOL(opt_abort_conf, "abort_conf")
+			CONF_HANDLE_BOOL(opt_cache_oblivious, "cache_oblivious")
+			CONF_HANDLE_BOOL(opt_trust_madvise, "trust_madvise")
+			CONF_HANDLE_BOOL(
+			    opt_experimental_hpa_start_huge_if_thp_always,
+			    "experimental_hpa_start_huge_if_thp_always")
+			CONF_HANDLE_BOOL(opt_experimental_hpa_enforce_hugify,
+			    "experimental_hpa_enforce_hugify")
+			CONF_HANDLE_BOOL(
+			    opt_huge_arena_pac_thp, "huge_arena_pac_thp")
+			if (strncmp("metadata_thp", k, klen) == 0) {
+				int  m;
+				bool match = false;
+				for (m = 0; m < metadata_thp_mode_limit; m++) {
+					if (strncmp(metadata_thp_mode_names[m],
+					        v, vlen)
+					    == 0) {
+						opt_metadata_thp = m;
+						match = true;
+						break;
+					}
+				}
+				if (!match) {
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
+				}
+				CONF_CONTINUE;
+			}
+			CONF_HANDLE_BOOL(opt_retain, "retain")
+			if (strncmp("dss", k, klen) == 0) {
+				int  m;
+				bool match = false;
+				for (m = 0; m < dss_prec_limit; m++) {
+					if (strncmp(dss_prec_names[m], v, vlen)
+					    == 0) {
+						if (extent_dss_prec_set(m)) {
+							CONF_ERROR(
+							    "Error setting dss",
+							    k, klen, v, vlen);
+						} else {
+							opt_dss =
+							    dss_prec_names[m];
+							match = true;
+							break;
+						}
+					}
+				}
+				if (!match) {
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
+				}
+				CONF_CONTINUE;
+			}
+			if (CONF_MATCH("narenas")) {
+				if (CONF_MATCH_VALUE("default")) {
+					opt_narenas = 0;
+					CONF_CONTINUE;
+				} else {
+					CONF_HANDLE_UNSIGNED(opt_narenas,
+					    "narenas", 1, UINT_MAX,
+					    CONF_CHECK_MIN, CONF_DONT_CHECK_MAX,
+					    /* clip */ false)
+				}
+			}
+			if (CONF_MATCH("narenas_ratio")) {
+				char *end;
+				bool  err = fxp_parse(
+                                    &opt_narenas_ratio, v, &end);
+				if (err || (size_t)(end - v) != vlen) {
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
+				}
+				CONF_CONTINUE;
+			}
+			if (CONF_MATCH("bin_shards")) {
+				const char *bin_shards_segment_cur = v;
+				size_t      vlen_left = vlen;
+				do {
+					size_t size_start;
+					size_t size_end;
+					size_t nshards;
+					bool   err = multi_setting_parse_next(
+                                            &bin_shards_segment_cur, &vlen_left,
+                                            &size_start, &size_end, &nshards);
+					if (err
+					    || bin_update_shard_size(
+					        bin_shard_sizes, size_start,
+					        size_end, nshards)) {
+						CONF_ERROR(
+						    "Invalid settings for "
+						    "bin_shards",
+						    k, klen, v, vlen);
+						break;
+					}
+				} while (vlen_left > 0);
+				CONF_CONTINUE;
+			}
+			if (CONF_MATCH("tcache_ncached_max")) {
+				bool err = tcache_bin_info_default_init(
+				    v, vlen);
+				if (err) {
+					CONF_ERROR(
+					    "Invalid settings for "
+					    "tcache_ncached_max",
+					    k, klen, v, vlen);
+				}
+				CONF_CONTINUE;
+			}
+			CONF_HANDLE_INT64_T(opt_mutex_max_spin,
+			    "mutex_max_spin", -1, INT64_MAX, CONF_CHECK_MIN,
+			    CONF_DONT_CHECK_MAX, false);
+			CONF_HANDLE_SSIZE_T(opt_dirty_decay_ms,
+			    "dirty_decay_ms", -1,
+			    NSTIME_SEC_MAX * KQU(1000) < QU(SSIZE_MAX)
+			        ? NSTIME_SEC_MAX * KQU(1000)
+			        : SSIZE_MAX);
+			CONF_HANDLE_SSIZE_T(opt_muzzy_decay_ms,
+			    "muzzy_decay_ms", -1,
+			    NSTIME_SEC_MAX * KQU(1000) < QU(SSIZE_MAX)
+			        ? NSTIME_SEC_MAX * KQU(1000)
+			        : SSIZE_MAX);
+			CONF_HANDLE_SIZE_T(opt_process_madvise_max_batch,
+			    "process_madvise_max_batch", 0,
+			    PROCESS_MADVISE_MAX_BATCH_LIMIT,
+			    CONF_DONT_CHECK_MIN, CONF_CHECK_MAX,
+			    /* clip */ true)
+			CONF_HANDLE_BOOL(opt_stats_print, "stats_print")
+			if (CONF_MATCH("stats_print_opts")) {
+				init_opt_stats_opts(
+				    v, vlen, opt_stats_print_opts);
+				CONF_CONTINUE;
+			}
+			CONF_HANDLE_INT64_T(opt_stats_interval,
+			    "stats_interval", -1, INT64_MAX, CONF_CHECK_MIN,
+			    CONF_DONT_CHECK_MAX, false)
+			if (CONF_MATCH("stats_interval_opts")) {
+				init_opt_stats_opts(
+				    v, vlen, opt_stats_interval_opts);
+				CONF_CONTINUE;
+			}
+			if (config_fill) {
+				if (CONF_MATCH("junk")) {
+					if (CONF_MATCH_VALUE("true")) {
+						opt_junk = "true";
+						opt_junk_alloc = opt_junk_free =
+						    true;
+					} else if (CONF_MATCH_VALUE("false")) {
+						opt_junk = "false";
+						opt_junk_alloc = opt_junk_free =
+						    false;
+					} else if (CONF_MATCH_VALUE("alloc")) {
+						opt_junk = "alloc";
+						opt_junk_alloc = true;
+						opt_junk_free = false;
+					} else if (CONF_MATCH_VALUE("free")) {
+						opt_junk = "free";
+						opt_junk_alloc = false;
+						opt_junk_free = true;
+					} else {
+						CONF_ERROR("Invalid conf value",
+						    k, klen, v, vlen);
+					}
+					CONF_CONTINUE;
+				}
+				CONF_HANDLE_BOOL(opt_zero, "zero")
+			}
+			if (config_utrace) {
+				CONF_HANDLE_BOOL(opt_utrace, "utrace")
+			}
+			if (config_xmalloc) {
+				CONF_HANDLE_BOOL(opt_xmalloc, "xmalloc")
+			}
+			if (config_enable_cxx) {
+				CONF_HANDLE_BOOL(
+				    opt_experimental_infallible_new,
+				    "experimental_infallible_new")
+			}
+
+			CONF_HANDLE_BOOL(opt_experimental_tcache_gc,
+			    "experimental_tcache_gc")
+			CONF_HANDLE_BOOL(opt_tcache, "tcache")
+			CONF_HANDLE_SIZE_T(opt_tcache_max, "tcache_max", 0,
+			    TCACHE_MAXCLASS_LIMIT, CONF_DONT_CHECK_MIN,
+			    CONF_CHECK_MAX, /* clip */ true)
+			if (CONF_MATCH("lg_tcache_max")) {
+				size_t m;
+				CONF_VALUE_READ(size_t, m)
+				if (CONF_VALUE_READ_FAIL()) {
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
+				} else {
+					/* clip if necessary */
+					if (m > TCACHE_LG_MAXCLASS_LIMIT) {
+						m = TCACHE_LG_MAXCLASS_LIMIT;
+					}
+					opt_tcache_max = (size_t)1 << m;
+				}
+				CONF_CONTINUE;
+			}
+			/*
+			 * Anyone trying to set a value outside -16 to 16 is
+			 * deeply confused.
+			 */
+			CONF_HANDLE_SSIZE_T(opt_lg_tcache_nslots_mul,
+			    "lg_tcache_nslots_mul", -16, 16)
+			/* Ditto with values past 2048. */
+			CONF_HANDLE_UNSIGNED(opt_tcache_nslots_small_min,
+			    "tcache_nslots_small_min", 1, 2048, CONF_CHECK_MIN,
+			    CONF_CHECK_MAX, /* clip */ true)
+			CONF_HANDLE_UNSIGNED(opt_tcache_nslots_small_max,
+			    "tcache_nslots_small_max", 1, 2048, CONF_CHECK_MIN,
+			    CONF_CHECK_MAX, /* clip */ true)
+			CONF_HANDLE_UNSIGNED(opt_tcache_nslots_large,
+			    "tcache_nslots_large", 1, 2048, CONF_CHECK_MIN,
+			    CONF_CHECK_MAX, /* clip */ true)
+			CONF_HANDLE_SIZE_T(opt_tcache_gc_incr_bytes,
+			    "tcache_gc_incr_bytes", 1024, SIZE_T_MAX,
+			    CONF_CHECK_MIN, CONF_DONT_CHECK_MAX,
+			    /* clip */ true)
+			CONF_HANDLE_SIZE_T(opt_tcache_gc_delay_bytes,
+			    "tcache_gc_delay_bytes", 0, SIZE_T_MAX,
+			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX,
+			    /* clip */ false)
+			CONF_HANDLE_UNSIGNED(opt_lg_tcache_flush_small_div,
+			    "lg_tcache_flush_small_div", 1, 16, CONF_CHECK_MIN,
+			    CONF_CHECK_MAX, /* clip */ true)
+			CONF_HANDLE_UNSIGNED(opt_lg_tcache_flush_large_div,
+			    "lg_tcache_flush_large_div", 1, 16, CONF_CHECK_MIN,
+			    CONF_CHECK_MAX, /* clip */ true)
+			CONF_HANDLE_UNSIGNED(opt_debug_double_free_max_scan,
+			    "debug_double_free_max_scan", 0, UINT_MAX,
+			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX,
+			    /* clip */ false)
+			CONF_HANDLE_SIZE_T(opt_calloc_madvise_threshold,
+			    "calloc_madvise_threshold", 0, SC_LARGE_MAXCLASS,
+			    CONF_DONT_CHECK_MIN, CONF_CHECK_MAX,
+			    /* clip */ false)
+
+			/*
+			 * The runtime option of oversize_threshold remains
+			 * undocumented.  It may be tweaked in the next major
+			 * release (6.0).  The default value 8M is rather
+			 * conservative / safe.  Tuning it further down may
+			 * improve fragmentation a bit more, but may also cause
+			 * contention on the huge arena.
+			 */
+			CONF_HANDLE_SIZE_T(opt_oversize_threshold,
+			    "oversize_threshold", 0, SC_LARGE_MAXCLASS,
+			    CONF_DONT_CHECK_MIN, CONF_CHECK_MAX, false)
+			CONF_HANDLE_SIZE_T(opt_lg_extent_max_active_fit,
+			    "lg_extent_max_active_fit", 0,
+			    (sizeof(size_t) << 3), CONF_DONT_CHECK_MIN,
+			    CONF_CHECK_MAX, false)
+
+			if (strncmp("percpu_arena", k, klen) == 0) {
+				bool match = false;
+				for (int m = percpu_arena_mode_names_base;
+				    m < percpu_arena_mode_names_limit; m++) {
+					if (strncmp(percpu_arena_mode_names[m],
+					        v, vlen)
+					    == 0) {
+						if (!have_percpu_arena) {
+							CONF_ERROR(
+							    "No getcpu support",
+							    k, klen, v, vlen);
+						}
+						opt_percpu_arena = m;
+						match = true;
+						break;
+					}
+				}
+				if (!match) {
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
+				}
+				CONF_CONTINUE;
+			}
+			CONF_HANDLE_BOOL(
+			    opt_background_thread, "background_thread");
+			CONF_HANDLE_SIZE_T(opt_max_background_threads,
+			    "max_background_threads", 1,
+			    opt_max_background_threads, CONF_CHECK_MIN,
+			    CONF_CHECK_MAX, true);
+			CONF_HANDLE_BOOL(opt_hpa, "hpa")
+			CONF_HANDLE_SIZE_T(opt_hpa_opts.slab_max_alloc,
+			    "hpa_slab_max_alloc", PAGE, HUGEPAGE,
+			    CONF_CHECK_MIN, CONF_CHECK_MAX, true);
+
+			/*
+			 * Accept either a ratio-based or an exact hugification
+			 * threshold.
+			 */
+			CONF_HANDLE_SIZE_T(opt_hpa_opts.hugification_threshold,
+			    "hpa_hugification_threshold", PAGE, HUGEPAGE,
+			    CONF_CHECK_MIN, CONF_CHECK_MAX, true);
+			if (CONF_MATCH("hpa_hugification_threshold_ratio")) {
+				fxp_t ratio;
+				char *end;
+				bool  err = fxp_parse(&ratio, v, &end);
+				if (err || (size_t)(end - v) != vlen
+				    || ratio > FXP_INIT_INT(1)) {
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
+				} else {
+					opt_hpa_opts.hugification_threshold =
+					    fxp_mul_frac(HUGEPAGE, ratio);
+				}
+				CONF_CONTINUE;
+			}
+
+			CONF_HANDLE_UINT64_T(opt_hpa_opts.hugify_delay_ms,
+			    "hpa_hugify_delay_ms", 0, 0, CONF_DONT_CHECK_MIN,
+			    CONF_DONT_CHECK_MAX, false);
+
+			CONF_HANDLE_BOOL(
+			    opt_hpa_opts.hugify_sync, "hpa_hugify_sync");
+
+			CONF_HANDLE_UINT64_T(opt_hpa_opts.min_purge_interval_ms,
+			    "hpa_min_purge_interval_ms", 0, 0,
+			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, false);
+
+			CONF_HANDLE_SSIZE_T(
+			    opt_hpa_opts.experimental_max_purge_nhp,
+			    "experimental_hpa_max_purge_nhp", -1, SSIZE_MAX);
+
+			/*
+			 * Accept either a ratio-based or an exact purge
+			 * threshold.
+			 */
+			CONF_HANDLE_SIZE_T(opt_hpa_opts.purge_threshold,
+			    "hpa_purge_threshold", PAGE, HUGEPAGE,
+			    CONF_CHECK_MIN, CONF_CHECK_MAX, true);
+			if (CONF_MATCH("hpa_purge_threshold_ratio")) {
+				fxp_t ratio;
+				char *end;
+				bool  err = fxp_parse(&ratio, v, &end);
+				if (err || (size_t)(end - v) != vlen
+				    || ratio > FXP_INIT_INT(1)) {
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
+				} else {
+					opt_hpa_opts.purge_threshold =
+					    fxp_mul_frac(HUGEPAGE, ratio);
+				}
+				CONF_CONTINUE;
+			}
+
+			CONF_HANDLE_UINT64_T(opt_hpa_opts.min_purge_delay_ms,
+			    "hpa_min_purge_delay_ms", 0, UINT64_MAX,
+			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, false);
+
+			if (strncmp("hpa_hugify_style", k, klen) == 0) {
+				bool match = false;
+				for (int m = 0; m < hpa_hugify_style_limit;
+				    m++) {
+					if (strncmp(hpa_hugify_style_names[m],
+					        v, vlen)
+					    == 0) {
+						opt_hpa_opts.hugify_style = m;
+						match = true;
+						break;
+					}
+				}
+				if (!match) {
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
+				}
+				CONF_CONTINUE;
+			}
+
+			if (CONF_MATCH("hpa_dirty_mult")) {
+				if (CONF_MATCH_VALUE("-1")) {
+					opt_hpa_opts.dirty_mult = (fxp_t)-1;
+					CONF_CONTINUE;
+				}
+				fxp_t ratio;
+				char *end;
+				bool  err = fxp_parse(&ratio, v, &end);
+				if (err || (size_t)(end - v) != vlen) {
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
+				} else {
+					opt_hpa_opts.dirty_mult = ratio;
+				}
+				CONF_CONTINUE;
+			}
+			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.nshards,
+			    "hpa_sec_nshards", 0, 0, CONF_CHECK_MIN,
+			    CONF_DONT_CHECK_MAX, true);
+			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.max_alloc,
+			    "hpa_sec_max_alloc", PAGE,
+			    USIZE_GROW_SLOW_THRESHOLD, CONF_CHECK_MIN,
+			    CONF_CHECK_MAX, true);
+			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.max_bytes,
+			    "hpa_sec_max_bytes", SEC_OPTS_MAX_BYTES_DEFAULT, 0,
+			    CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
+			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.batch_fill_extra,
+			    "hpa_sec_batch_fill_extra", 1, HUGEPAGE_PAGES,
+			    CONF_CHECK_MIN, CONF_CHECK_MAX, true);
+
+			if (CONF_MATCH("slab_sizes")) {
+				if (CONF_MATCH_VALUE("default")) {
+					sc_data_init(sc_data);
+					CONF_CONTINUE;
+				}
+				bool        err;
+				const char *slab_size_segment_cur = v;
+				size_t      vlen_left = vlen;
+				do {
+					size_t slab_start;
+					size_t slab_end;
+					size_t pgs;
+					err = multi_setting_parse_next(
+					    &slab_size_segment_cur, &vlen_left,
+					    &slab_start, &slab_end, &pgs);
+					if (!err) {
+						sc_data_update_slab_size(
+						    sc_data, slab_start,
+						    slab_end, (int)pgs);
+					} else {
+						CONF_ERROR(
+						    "Invalid settings "
+						    "for slab_sizes",
+						    k, klen, v, vlen);
+					}
+				} while (!err && vlen_left > 0);
+				CONF_CONTINUE;
+			}
+			if (config_prof) {
+				CONF_HANDLE_BOOL(opt_prof, "prof")
+				CONF_HANDLE_CHAR_P(
+				    opt_prof_prefix, "prof_prefix", "jeprof")
+				CONF_HANDLE_BOOL(opt_prof_active, "prof_active")
+				CONF_HANDLE_BOOL(opt_prof_thread_active_init,
+				    "prof_thread_active_init")
+				CONF_HANDLE_SIZE_T(opt_lg_prof_sample,
+				    "lg_prof_sample", 0,
+				    (sizeof(uint64_t) << 3) - 1,
+				    CONF_DONT_CHECK_MIN, CONF_CHECK_MAX, true)
+				CONF_HANDLE_BOOL(opt_prof_accum, "prof_accum")
+				CONF_HANDLE_UNSIGNED(opt_prof_bt_max,
+				    "prof_bt_max", 1, PROF_BT_MAX_LIMIT,
+				    CONF_CHECK_MIN, CONF_CHECK_MAX,
+				    /* clip */ true)
+				CONF_HANDLE_SSIZE_T(opt_lg_prof_interval,
+				    "lg_prof_interval", -1,
+				    (sizeof(uint64_t) << 3) - 1)
+				CONF_HANDLE_BOOL(opt_prof_gdump, "prof_gdump")
+				CONF_HANDLE_BOOL(opt_prof_final, "prof_final")
+				CONF_HANDLE_BOOL(opt_prof_leak, "prof_leak")
+				CONF_HANDLE_BOOL(
+				    opt_prof_leak_error, "prof_leak_error")
+				CONF_HANDLE_BOOL(opt_prof_log, "prof_log")
+				CONF_HANDLE_BOOL(opt_prof_pid_namespace,
+				    "prof_pid_namespace")
+				CONF_HANDLE_SSIZE_T(opt_prof_recent_alloc_max,
+				    "prof_recent_alloc_max", -1, SSIZE_MAX)
+				CONF_HANDLE_BOOL(opt_prof_stats, "prof_stats")
+				CONF_HANDLE_BOOL(opt_prof_sys_thread_name,
+				    "prof_sys_thread_name")
+				if (CONF_MATCH("prof_time_resolution")) {
+					if (CONF_MATCH_VALUE("default")) {
+						opt_prof_time_res =
+						    prof_time_res_default;
+					} else if (CONF_MATCH_VALUE("high")) {
+						if (!config_high_res_timer) {
+							CONF_ERROR(
+							    "No high resolution"
+							    " timer support",
+							    k, klen, v, vlen);
+						} else {
+							opt_prof_time_res =
+							    prof_time_res_high;
+						}
+					} else {
+						CONF_ERROR("Invalid conf value",
+						    k, klen, v, vlen);
+					}
+					CONF_CONTINUE;
+				}
+				/*
+				 * Undocumented.  When set to false, don't
+				 * correct for an unbiasing bug in jeprof
+				 * attribution.  This can be handy if you want
+				 * to get consistent numbers from your binary
+				 * across different jemalloc versions, even if
+				 * those numbers are incorrect.  The default is
+				 * true.
+				 */
+				CONF_HANDLE_BOOL(opt_prof_unbias, "prof_unbias")
+			}
+			if (config_log) {
+				if (CONF_MATCH("log")) {
+					size_t cpylen = (vlen
+					            <= sizeof(log_var_names)
+					        ? vlen
+					        : sizeof(log_var_names) - 1);
+					strncpy(log_var_names, v, cpylen);
+					log_var_names[cpylen] = '\0';
+					CONF_CONTINUE;
+				}
+			}
+			if (CONF_MATCH("thp")) {
+				bool match = false;
+				for (int m = 0; m < thp_mode_names_limit; m++) {
+					if (strncmp(thp_mode_names[m], v, vlen)
+					    == 0) {
+						if (!have_madvise_huge
+						    && !have_memcntl) {
+							CONF_ERROR(
+							    "No THP support", k,
+							    klen, v, vlen);
+						}
+						opt_thp = m;
+						match = true;
+						break;
+					}
+				}
+				if (!match) {
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
+				}
+				CONF_CONTINUE;
+			}
+			if (CONF_MATCH("zero_realloc")) {
+				if (CONF_MATCH_VALUE("alloc")) {
+					opt_zero_realloc_action =
+					    zero_realloc_action_alloc;
+				} else if (CONF_MATCH_VALUE("free")) {
+					opt_zero_realloc_action =
+					    zero_realloc_action_free;
+				} else if (CONF_MATCH_VALUE("abort")) {
+					opt_zero_realloc_action =
+					    zero_realloc_action_abort;
+				} else {
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
+				}
+				CONF_CONTINUE;
+			}
+			if (config_uaf_detection
+			    && CONF_MATCH("lg_san_uaf_align")) {
+				ssize_t a;
+				CONF_VALUE_READ(ssize_t, a)
+				if (CONF_VALUE_READ_FAIL() || a < -1) {
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
+				}
+				if (a == -1) {
+					opt_lg_san_uaf_align = -1;
+					CONF_CONTINUE;
+				}
+
+				/* clip if necessary */
+				ssize_t max_allowed = (sizeof(size_t) << 3) - 1;
+				ssize_t min_allowed = LG_PAGE;
+				if (a > max_allowed) {
+					a = max_allowed;
+				} else if (a < min_allowed) {
+					a = min_allowed;
+				}
+
+				opt_lg_san_uaf_align = a;
+				CONF_CONTINUE;
+			}
+
+			CONF_HANDLE_SIZE_T(opt_san_guard_small,
+			    "san_guard_small", 0, SIZE_T_MAX,
+			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, false)
+			CONF_HANDLE_SIZE_T(opt_san_guard_large,
+			    "san_guard_large", 0, SIZE_T_MAX,
+			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, false)
+
+			/*
+			 * Disable large size classes is now the default
+			 * behavior in jemalloc.  Although it is configurable
+			 * in MALLOC_CONF, this is mainly for debugging
+			 * purposes and should not be tuned.
+			 */
+			CONF_HANDLE_BOOL(opt_disable_large_size_classes,
+			    "disable_large_size_classes");
+
+			CONF_ERROR("Invalid conf pair", k, klen, v, vlen);
+#undef CONF_ERROR
+#undef CONF_CONTINUE
+#undef CONF_MATCH
+#undef CONF_MATCH_VALUE
+#undef CONF_HANDLE_BOOL
+#undef CONF_DONT_CHECK_MIN
+#undef CONF_CHECK_MIN
+#undef CONF_DONT_CHECK_MAX
+#undef CONF_CHECK_MAX
+#undef CONF_HANDLE_T
+#undef CONF_HANDLE_T_U
+#undef CONF_HANDLE_T_SIGNED
+#undef CONF_HANDLE_UNSIGNED
+#undef CONF_HANDLE_SIZE_T
+#undef CONF_HANDLE_SSIZE_T
+#undef CONF_HANDLE_CHAR_P
+			/* Re-enable diagnostic "-Wtype-limits" */
+			JEMALLOC_DIAGNOSTIC_POP
+		}
+		validate_hpa_settings();
+		if (opt_abort_conf && had_conf_error) {
+			malloc_abort_invalid_conf();
+		}
+	}
+	atomic_store_b(&log_init_done, true, ATOMIC_RELEASE);
+}
+
+static bool
+malloc_conf_init_check_deps(void) {
+	if (opt_prof_leak_error && !opt_prof_final) {
+		malloc_printf(
+		    "<jemalloc>: prof_leak_error is set w/o "
+		    "prof_final.\n");
+		return true;
+	}
+	/* To emphasize in the stats output that opt is disabled when !debug. */
+	if (!config_debug) {
+		opt_debug_double_free_max_scan = 0;
+	}
+
+	return false;
+}
+
+void
+malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
+    char readlink_buf[PATH_MAX + 1]) {
+	const char *opts_cache[MALLOC_CONF_NSOURCES] = {
+	    NULL, NULL, NULL, NULL, NULL};
+
+	/* The first call only set the confirm_conf option and opts_cache */
+	malloc_conf_init_helper(NULL, NULL, true, opts_cache, readlink_buf);
+	malloc_conf_init_helper(
+	    sc_data, bin_shard_sizes, false, opts_cache, NULL);
+	if (malloc_conf_init_check_deps()) {
+		/* check_deps does warning msg only; abort below if needed. */
+		if (opt_abort_conf) {
+			malloc_abort_invalid_conf();
+		}
+	}
+}
+
+#undef MALLOC_CONF_NSOURCES
diff --git a/src/jemalloc.c b/src/jemalloc.c
index d82788eb..8d341ba3 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -25,6 +25,8 @@
 #include "jemalloc/internal/thread_event.h"
 #include "jemalloc/internal/util.h"
 
+#include "jemalloc/internal/conf.h"
+
 /******************************************************************************/
 /* Data. */
 
@@ -165,7 +167,7 @@ bool         opt_experimental_infallible_new = false;
 bool         opt_experimental_tcache_gc = true;
 bool         opt_zero = false;
 unsigned     opt_narenas = 0;
-static fxp_t opt_narenas_ratio = FXP_INIT_INT(4);
+fxp_t opt_narenas_ratio = FXP_INIT_INT(4);
 
 unsigned ncpus;
 
@@ -291,8 +293,6 @@ typedef struct {
 #	define UTRACE(a, b, c)
 #endif
 
-/* Whether encountered any invalid config options. */
-static bool had_conf_error = false;
 
 /******************************************************************************/
 /*
@@ -733,24 +733,6 @@ check_entry_exit_locking(tsdn_t *tsdn) {
  * Begin initialization functions.
  */
 
-static char *
-jemalloc_getenv(const char *name) {
-#ifdef JEMALLOC_FORCE_GETENV
-	return getenv(name);
-#else
-#	ifdef JEMALLOC_HAVE_SECURE_GETENV
-	return secure_getenv(name);
-#	else
-#		ifdef JEMALLOC_HAVE_ISSETUGID
-	if (issetugid() != 0) {
-		return NULL;
-	}
-#		endif
-	return getenv(name);
-#	endif
-#endif
-}
-
 static unsigned
 malloc_ncpus(void) {
 	long result;
@@ -826,205 +808,6 @@ malloc_cpu_count_is_deterministic(void) {
 #endif
 }
 
-static void
-init_opt_stats_opts(const char *v, size_t vlen, char *dest) {
-	size_t opts_len = strlen(dest);
-	assert(opts_len <= stats_print_tot_num_options);
-
-	for (size_t i = 0; i < vlen; i++) {
-		switch (v[i]) {
-#define OPTION(o, v, d, s)                                                     \
-	case o:                                                                \
-		break;
-			STATS_PRINT_OPTIONS
-#undef OPTION
-		default:
-			continue;
-		}
-
-		if (strchr(dest, v[i]) != NULL) {
-			/* Ignore repeated. */
-			continue;
-		}
-
-		dest[opts_len++] = v[i];
-		dest[opts_len] = '\0';
-		assert(opts_len <= stats_print_tot_num_options);
-	}
-	assert(opts_len == strlen(dest));
-}
-
-static void
-malloc_conf_format_error(const char *msg, const char *begin, const char *end) {
-	size_t len = end - begin + 1;
-	len = len > BUFERROR_BUF ? BUFERROR_BUF : len;
-
-	malloc_printf("<jemalloc>: %s -- %.*s\n", msg, (int)len, begin);
-}
-
-static bool
-malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p,
-    char const **v_p, size_t *vlen_p) {
-	bool        accept;
-	const char *opts = *opts_p;
-
-	*k_p = opts;
-
-	for (accept = false; !accept;) {
-		switch (*opts) {
-		case 'A':
-		case 'B':
-		case 'C':
-		case 'D':
-		case 'E':
-		case 'F':
-		case 'G':
-		case 'H':
-		case 'I':
-		case 'J':
-		case 'K':
-		case 'L':
-		case 'M':
-		case 'N':
-		case 'O':
-		case 'P':
-		case 'Q':
-		case 'R':
-		case 'S':
-		case 'T':
-		case 'U':
-		case 'V':
-		case 'W':
-		case 'X':
-		case 'Y':
-		case 'Z':
-		case 'a':
-		case 'b':
-		case 'c':
-		case 'd':
-		case 'e':
-		case 'f':
-		case 'g':
-		case 'h':
-		case 'i':
-		case 'j':
-		case 'k':
-		case 'l':
-		case 'm':
-		case 'n':
-		case 'o':
-		case 'p':
-		case 'q':
-		case 'r':
-		case 's':
-		case 't':
-		case 'u':
-		case 'v':
-		case 'w':
-		case 'x':
-		case 'y':
-		case 'z':
-		case '0':
-		case '1':
-		case '2':
-		case '3':
-		case '4':
-		case '5':
-		case '6':
-		case '7':
-		case '8':
-		case '9':
-		case '_':
-			opts++;
-			break;
-		case ':':
-			opts++;
-			*klen_p = (uintptr_t)opts - 1 - (uintptr_t)*k_p;
-			*v_p = opts;
-			accept = true;
-			break;
-		case '\0':
-			if (opts != *opts_p) {
-				malloc_conf_format_error(
-				    "Conf string ends with key", *opts_p,
-				    opts - 1);
-				had_conf_error = true;
-			}
-			return true;
-		default:
-			malloc_conf_format_error(
-			    "Malformed conf string", *opts_p, opts);
-			had_conf_error = true;
-			return true;
-		}
-	}
-
-	for (accept = false; !accept;) {
-		switch (*opts) {
-		case ',':
-			opts++;
-			/*
-			 * Look ahead one character here, because the next time
-			 * this function is called, it will assume that end of
-			 * input has been cleanly reached if no input remains,
-			 * but we have optimistically already consumed the
-			 * comma if one exists.
-			 */
-			if (*opts == '\0') {
-				malloc_conf_format_error(
-				    "Conf string ends with comma", *opts_p,
-				    opts - 1);
-				had_conf_error = true;
-			}
-			*vlen_p = (uintptr_t)opts - 1 - (uintptr_t)*v_p;
-			accept = true;
-			break;
-		case '\0':
-			*vlen_p = (uintptr_t)opts - (uintptr_t)*v_p;
-			accept = true;
-			break;
-		default:
-			opts++;
-			break;
-		}
-	}
-
-	*opts_p = opts;
-	return false;
-}
-
-static void
-malloc_abort_invalid_conf(void) {
-	assert(opt_abort_conf);
-	malloc_printf(
-	    "<jemalloc>: Abort (abort_conf:true) on invalid conf "
-	    "value (see above).\n");
-	invalid_conf_abort();
-}
-
-static void
-malloc_conf_error(
-    const char *msg, const char *k, size_t klen, const char *v, size_t vlen) {
-	malloc_printf(
-	    "<jemalloc>: %s: %.*s:%.*s\n", msg, (int)klen, k, (int)vlen, v);
-	/* If abort_conf is set, error out after processing all options. */
-	const char *experimental = "experimental_";
-	if (strncmp(k, experimental, strlen(experimental)) == 0) {
-		/* However, tolerate experimental features. */
-		return;
-	}
-	const char  *deprecated[] = {"hpa_sec_bytes_after_flush"};
-	const size_t deprecated_cnt = (sizeof(deprecated)
-	    / sizeof(deprecated[0]));
-	for (size_t i = 0; i < deprecated_cnt; ++i) {
-		if (strncmp(k, deprecated[i], strlen(deprecated[i])) == 0) {
-			/* Tolerate deprecated features. */
-			return;
-		}
-	}
-	had_conf_error = true;
-}
-
 static void
 malloc_slow_flag_init(void) {
 	/*
@@ -1040,920 +823,6 @@ malloc_slow_flag_init(void) {
 	malloc_slow = (malloc_slow_flags != 0);
 }
 
-/* Number of sources for initializing malloc_conf */
-#define MALLOC_CONF_NSOURCES 5
-
-static const char *
-obtain_malloc_conf(unsigned which_source, char readlink_buf[PATH_MAX + 1]) {
-	if (config_debug) {
-		static unsigned read_source = 0;
-		/*
-		 * Each source should only be read once, to minimize # of
-		 * syscalls on init.
-		 */
-		assert(read_source == which_source);
-		read_source++;
-	}
-	assert(which_source < MALLOC_CONF_NSOURCES);
-
-	const char *ret;
-	switch (which_source) {
-	case 0:
-		ret = config_malloc_conf;
-		break;
-	case 1:
-		if (je_malloc_conf != NULL) {
-			/* Use options that were compiled into the program. */
-			ret = je_malloc_conf;
-		} else {
-			/* No configuration specified. */
-			ret = NULL;
-		}
-		break;
-	case 2: {
-#ifndef JEMALLOC_CONFIG_FILE
-		ret = NULL;
-		break;
-#else
-		ssize_t linklen = 0;
-#	ifndef _WIN32
-		int         saved_errno = errno;
-		const char *linkname =
-#		ifdef JEMALLOC_PREFIX
-		    "/etc/" JEMALLOC_PREFIX "malloc.conf"
-#		else
-		    "/etc/malloc.conf"
-#		endif
-		    ;
-
-		/*
-		 * Try to use the contents of the "/etc/malloc.conf" symbolic
-		 * link's name.
-		 */
-#		ifndef JEMALLOC_READLINKAT
-		linklen = readlink(linkname, readlink_buf, PATH_MAX);
-#		else
-		linklen = readlinkat(
-		    AT_FDCWD, linkname, readlink_buf, PATH_MAX);
-#		endif
-		if (linklen == -1) {
-			/* No configuration specified. */
-			linklen = 0;
-			/* Restore errno. */
-			set_errno(saved_errno);
-		}
-#	endif
-		readlink_buf[linklen] = '\0';
-		ret = readlink_buf;
-		break;
-#endif
-	}
-	case 3: {
-#ifndef JEMALLOC_CONFIG_ENV
-		ret = NULL;
-		break;
-#else
-		const char *envname =
-#	ifdef JEMALLOC_PREFIX
-		    JEMALLOC_CPREFIX "MALLOC_CONF"
-#	else
-		    "MALLOC_CONF"
-#	endif
-		    ;
-
-		if ((ret = jemalloc_getenv(envname)) != NULL) {
-			opt_malloc_conf_env_var = ret;
-		} else {
-			/* No configuration specified. */
-			ret = NULL;
-		}
-		break;
-#endif
-	}
-	case 4: {
-		ret = je_malloc_conf_2_conf_harder;
-		break;
-	}
-	default:
-		not_reached();
-		ret = NULL;
-	}
-	return ret;
-}
-
-static void
-validate_hpa_settings(void) {
-	if (!hpa_supported() || !opt_hpa) {
-		return;
-	}
-	if (HUGEPAGE > HUGEPAGE_MAX_EXPECTED_SIZE) {
-		had_conf_error = true;
-		malloc_printf(
-		    "<jemalloc>: huge page size (%zu) greater than expected."
-		    "May not be supported or behave as expected.",
-		    HUGEPAGE);
-	}
-#ifndef JEMALLOC_HAVE_MADVISE_COLLAPSE
-	if (opt_hpa_opts.hugify_sync) {
-		had_conf_error = true;
-		malloc_printf(
-		    "<jemalloc>: hpa_hugify_sync config option is enabled, "
-		    "but MADV_COLLAPSE support was not detected at build "
-		    "time.");
-	}
-#endif
-}
-
-static void
-malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
-    bool initial_call, const char *opts_cache[MALLOC_CONF_NSOURCES],
-    char readlink_buf[PATH_MAX + 1]) {
-	static const char *opts_explain[MALLOC_CONF_NSOURCES] = {
-	    "string specified via --with-malloc-conf",
-	    "string pointed to by the global variable malloc_conf",
-	    "\"name\" of the file referenced by the symbolic link named "
-	    "/etc/malloc.conf",
-	    "value of the environment variable MALLOC_CONF",
-	    "string pointed to by the global variable "
-	    "malloc_conf_2_conf_harder",
-	};
-	unsigned    i;
-	const char *opts, *k, *v;
-	size_t      klen, vlen;
-
-	for (i = 0; i < MALLOC_CONF_NSOURCES; i++) {
-		/* Get runtime configuration. */
-		if (initial_call) {
-			opts_cache[i] = obtain_malloc_conf(i, readlink_buf);
-		}
-		opts = opts_cache[i];
-		if (!initial_call && opt_confirm_conf) {
-			malloc_printf(
-			    "<jemalloc>: malloc_conf #%u (%s): \"%s\"\n", i + 1,
-			    opts_explain[i], opts != NULL ? opts : "");
-		}
-		if (opts == NULL) {
-			continue;
-		}
-
-		while (*opts != '\0'
-		    && !malloc_conf_next(&opts, &k, &klen, &v, &vlen)) {
-#define CONF_ERROR(msg, k, klen, v, vlen)                                      \
-	if (!initial_call) {                                                   \
-		malloc_conf_error(msg, k, klen, v, vlen);                      \
-		cur_opt_valid = false;                                         \
-	}
-#define CONF_CONTINUE                                                          \
-	{                                                                      \
-		if (!initial_call && opt_confirm_conf && cur_opt_valid) {      \
-			malloc_printf(                                         \
-			    "<jemalloc>: -- "                                  \
-			    "Set conf value: %.*s:%.*s"                        \
-			    "\n",                                              \
-			    (int)klen, k, (int)vlen, v);                       \
-		}                                                              \
-		continue;                                                      \
-	}
-#define CONF_MATCH(n) (sizeof(n) - 1 == klen && strncmp(n, k, klen) == 0)
-#define CONF_MATCH_VALUE(n) (sizeof(n) - 1 == vlen && strncmp(n, v, vlen) == 0)
-#define CONF_HANDLE_BOOL(o, n)                                                 \
-	if (CONF_MATCH(n)) {                                                   \
-		if (CONF_MATCH_VALUE("true")) {                                \
-			o = true;                                              \
-		} else if (CONF_MATCH_VALUE("false")) {                        \
-			o = false;                                             \
-		} else {                                                       \
-			CONF_ERROR("Invalid conf value", k, klen, v, vlen);    \
-		}                                                              \
-		CONF_CONTINUE;                                                 \
-	}
-			/*
-       * One of the CONF_MIN macros below expands, in one of the use points,
-       * to "unsigned integer < 0", which is always false, triggering the
-       * GCC -Wtype-limits warning, which we disable here and re-enable below.
-       */
-			JEMALLOC_DIAGNOSTIC_PUSH
-			JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS
-
-#define CONF_DONT_CHECK_MIN(um, min) false
-#define CONF_CHECK_MIN(um, min) ((um) < (min))
-#define CONF_DONT_CHECK_MAX(um, max) false
-#define CONF_CHECK_MAX(um, max) ((um) > (max))
-
-#define CONF_VALUE_READ(max_t, result)                                         \
-	char *end;                                                             \
-	set_errno(0);                                                          \
-	result = (max_t)malloc_strtoumax(v, &end, 0);
-#define CONF_VALUE_READ_FAIL()                                                 \
-	(get_errno() != 0 || (uintptr_t)end - (uintptr_t)v != vlen)
-
-#define CONF_HANDLE_T(t, max_t, o, n, min, max, check_min, check_max, clip)    \
-	if (CONF_MATCH(n)) {                                                   \
-		max_t mv;                                                      \
-		CONF_VALUE_READ(max_t, mv)                                     \
-		if (CONF_VALUE_READ_FAIL()) {                                  \
-			CONF_ERROR("Invalid conf value", k, klen, v, vlen);    \
-		} else if (clip) {                                             \
-			if (check_min(mv, (t)(min))) {                         \
-				o = (t)(min);                                  \
-			} else if (check_max(mv, (t)(max))) {                  \
-				o = (t)(max);                                  \
-			} else {                                               \
-				o = (t)mv;                                     \
-			}                                                      \
-		} else {                                                       \
-			if (check_min(mv, (t)(min))                            \
-			    || check_max(mv, (t)(max))) {                      \
-				CONF_ERROR(                                    \
-				    "Out-of-range "                            \
-				    "conf value",                              \
-				    k, klen, v, vlen);                         \
-			} else {                                               \
-				o = (t)mv;                                     \
-			}                                                      \
-		}                                                              \
-		CONF_CONTINUE;                                                 \
-	}
-#define CONF_HANDLE_T_U(t, o, n, min, max, check_min, check_max, clip)         \
-	CONF_HANDLE_T(t, uintmax_t, o, n, min, max, check_min, check_max, clip)
-#define CONF_HANDLE_T_SIGNED(t, o, n, min, max, check_min, check_max, clip)    \
-	CONF_HANDLE_T(t, intmax_t, o, n, min, max, check_min, check_max, clip)
-
-#define CONF_HANDLE_UNSIGNED(o, n, min, max, check_min, check_max, clip)       \
-	CONF_HANDLE_T_U(unsigned, o, n, min, max, check_min, check_max, clip)
-#define CONF_HANDLE_SIZE_T(o, n, min, max, check_min, check_max, clip)         \
-	CONF_HANDLE_T_U(size_t, o, n, min, max, check_min, check_max, clip)
-#define CONF_HANDLE_INT64_T(o, n, min, max, check_min, check_max, clip)        \
-	CONF_HANDLE_T_SIGNED(                                                  \
-	    int64_t, o, n, min, max, check_min, check_max, clip)
-#define CONF_HANDLE_UINT64_T(o, n, min, max, check_min, check_max, clip)       \
-	CONF_HANDLE_T_U(uint64_t, o, n, min, max, check_min, check_max, clip)
-#define CONF_HANDLE_SSIZE_T(o, n, min, max)                                    \
-	CONF_HANDLE_T_SIGNED(                                                  \
-	    ssize_t, o, n, min, max, CONF_CHECK_MIN, CONF_CHECK_MAX, false)
-#define CONF_HANDLE_CHAR_P(o, n, d)                                            \
-	if (CONF_MATCH(n)) {                                                   \
-		size_t cpylen = (vlen <= sizeof(o) - 1) ? vlen                 \
-		                                        : sizeof(o) - 1;       \
-		strncpy(o, v, cpylen);                                         \
-		o[cpylen] = '\0';                                              \
-		CONF_CONTINUE;                                                 \
-	}
-
-			bool cur_opt_valid = true;
-
-			CONF_HANDLE_BOOL(opt_confirm_conf, "confirm_conf")
-			if (initial_call) {
-				continue;
-			}
-
-			CONF_HANDLE_BOOL(opt_abort, "abort")
-			CONF_HANDLE_BOOL(opt_abort_conf, "abort_conf")
-			CONF_HANDLE_BOOL(opt_cache_oblivious, "cache_oblivious")
-			CONF_HANDLE_BOOL(opt_trust_madvise, "trust_madvise")
-			CONF_HANDLE_BOOL(
-			    opt_experimental_hpa_start_huge_if_thp_always,
-			    "experimental_hpa_start_huge_if_thp_always")
-			CONF_HANDLE_BOOL(opt_experimental_hpa_enforce_hugify,
-			    "experimental_hpa_enforce_hugify")
-			CONF_HANDLE_BOOL(
-			    opt_huge_arena_pac_thp, "huge_arena_pac_thp")
-			if (strncmp("metadata_thp", k, klen) == 0) {
-				int  m;
-				bool match = false;
-				for (m = 0; m < metadata_thp_mode_limit; m++) {
-					if (strncmp(metadata_thp_mode_names[m],
-					        v, vlen)
-					    == 0) {
-						opt_metadata_thp = m;
-						match = true;
-						break;
-					}
-				}
-				if (!match) {
-					CONF_ERROR("Invalid conf value", k,
-					    klen, v, vlen);
-				}
-				CONF_CONTINUE;
-			}
-			CONF_HANDLE_BOOL(opt_retain, "retain")
-			if (strncmp("dss", k, klen) == 0) {
-				int  m;
-				bool match = false;
-				for (m = 0; m < dss_prec_limit; m++) {
-					if (strncmp(dss_prec_names[m], v, vlen)
-					    == 0) {
-						if (extent_dss_prec_set(m)) {
-							CONF_ERROR(
-							    "Error setting dss",
-							    k, klen, v, vlen);
-						} else {
-							opt_dss =
-							    dss_prec_names[m];
-							match = true;
-							break;
-						}
-					}
-				}
-				if (!match) {
-					CONF_ERROR("Invalid conf value", k,
-					    klen, v, vlen);
-				}
-				CONF_CONTINUE;
-			}
-			if (CONF_MATCH("narenas")) {
-				if (CONF_MATCH_VALUE("default")) {
-					opt_narenas = 0;
-					CONF_CONTINUE;
-				} else {
-					CONF_HANDLE_UNSIGNED(opt_narenas,
-					    "narenas", 1, UINT_MAX,
-					    CONF_CHECK_MIN, CONF_DONT_CHECK_MAX,
-					    /* clip */ false)
-				}
-			}
-			if (CONF_MATCH("narenas_ratio")) {
-				char *end;
-				bool  err = fxp_parse(
-                                    &opt_narenas_ratio, v, &end);
-				if (err || (size_t)(end - v) != vlen) {
-					CONF_ERROR("Invalid conf value", k,
-					    klen, v, vlen);
-				}
-				CONF_CONTINUE;
-			}
-			if (CONF_MATCH("bin_shards")) {
-				const char *bin_shards_segment_cur = v;
-				size_t      vlen_left = vlen;
-				do {
-					size_t size_start;
-					size_t size_end;
-					size_t nshards;
-					bool   err = multi_setting_parse_next(
-                                            &bin_shards_segment_cur, &vlen_left,
-                                            &size_start, &size_end, &nshards);
-					if (err
-					    || bin_update_shard_size(
-					        bin_shard_sizes, size_start,
-					        size_end, nshards)) {
-						CONF_ERROR(
-						    "Invalid settings for "
-						    "bin_shards",
-						    k, klen, v, vlen);
-						break;
-					}
-				} while (vlen_left > 0);
-				CONF_CONTINUE;
-			}
-			if (CONF_MATCH("tcache_ncached_max")) {
-				bool err = tcache_bin_info_default_init(
-				    v, vlen);
-				if (err) {
-					CONF_ERROR(
-					    "Invalid settings for "
-					    "tcache_ncached_max",
-					    k, klen, v, vlen);
-				}
-				CONF_CONTINUE;
-			}
-			CONF_HANDLE_INT64_T(opt_mutex_max_spin,
-			    "mutex_max_spin", -1, INT64_MAX, CONF_CHECK_MIN,
-			    CONF_DONT_CHECK_MAX, false);
-			CONF_HANDLE_SSIZE_T(opt_dirty_decay_ms,
-			    "dirty_decay_ms", -1,
-			    NSTIME_SEC_MAX * KQU(1000) < QU(SSIZE_MAX)
-			        ? NSTIME_SEC_MAX * KQU(1000)
-			        : SSIZE_MAX);
-			CONF_HANDLE_SSIZE_T(opt_muzzy_decay_ms,
-			    "muzzy_decay_ms", -1,
-			    NSTIME_SEC_MAX * KQU(1000) < QU(SSIZE_MAX)
-			        ? NSTIME_SEC_MAX * KQU(1000)
-			        : SSIZE_MAX);
-			CONF_HANDLE_SIZE_T(opt_process_madvise_max_batch,
-			    "process_madvise_max_batch", 0,
-			    PROCESS_MADVISE_MAX_BATCH_LIMIT,
-			    CONF_DONT_CHECK_MIN, CONF_CHECK_MAX,
-			    /* clip */ true)
-			CONF_HANDLE_BOOL(opt_stats_print, "stats_print")
-			if (CONF_MATCH("stats_print_opts")) {
-				init_opt_stats_opts(
-				    v, vlen, opt_stats_print_opts);
-				CONF_CONTINUE;
-			}
-			CONF_HANDLE_INT64_T(opt_stats_interval,
-			    "stats_interval", -1, INT64_MAX, CONF_CHECK_MIN,
-			    CONF_DONT_CHECK_MAX, false)
-			if (CONF_MATCH("stats_interval_opts")) {
-				init_opt_stats_opts(
-				    v, vlen, opt_stats_interval_opts);
-				CONF_CONTINUE;
-			}
-			if (config_fill) {
-				if (CONF_MATCH("junk")) {
-					if (CONF_MATCH_VALUE("true")) {
-						opt_junk = "true";
-						opt_junk_alloc = opt_junk_free =
-						    true;
-					} else if (CONF_MATCH_VALUE("false")) {
-						opt_junk = "false";
-						opt_junk_alloc = opt_junk_free =
-						    false;
-					} else if (CONF_MATCH_VALUE("alloc")) {
-						opt_junk = "alloc";
-						opt_junk_alloc = true;
-						opt_junk_free = false;
-					} else if (CONF_MATCH_VALUE("free")) {
-						opt_junk = "free";
-						opt_junk_alloc = false;
-						opt_junk_free = true;
-					} else {
-						CONF_ERROR("Invalid conf value",
-						    k, klen, v, vlen);
-					}
-					CONF_CONTINUE;
-				}
-				CONF_HANDLE_BOOL(opt_zero, "zero")
-			}
-			if (config_utrace) {
-				CONF_HANDLE_BOOL(opt_utrace, "utrace")
-			}
-			if (config_xmalloc) {
-				CONF_HANDLE_BOOL(opt_xmalloc, "xmalloc")
-			}
-			if (config_enable_cxx) {
-				CONF_HANDLE_BOOL(
-				    opt_experimental_infallible_new,
-				    "experimental_infallible_new")
-			}
-
-			CONF_HANDLE_BOOL(opt_experimental_tcache_gc,
-			    "experimental_tcache_gc")
-			CONF_HANDLE_BOOL(opt_tcache, "tcache")
-			CONF_HANDLE_SIZE_T(opt_tcache_max, "tcache_max", 0,
-			    TCACHE_MAXCLASS_LIMIT, CONF_DONT_CHECK_MIN,
-			    CONF_CHECK_MAX, /* clip */ true)
-			if (CONF_MATCH("lg_tcache_max")) {
-				size_t m;
-				CONF_VALUE_READ(size_t, m)
-				if (CONF_VALUE_READ_FAIL()) {
-					CONF_ERROR("Invalid conf value", k,
-					    klen, v, vlen);
-				} else {
-					/* clip if necessary */
-					if (m > TCACHE_LG_MAXCLASS_LIMIT) {
-						m = TCACHE_LG_MAXCLASS_LIMIT;
-					}
-					opt_tcache_max = (size_t)1 << m;
-				}
-				CONF_CONTINUE;
-			}
-			/*
-			 * Anyone trying to set a value outside -16 to 16 is
-			 * deeply confused.
-			 */
-			CONF_HANDLE_SSIZE_T(opt_lg_tcache_nslots_mul,
-			    "lg_tcache_nslots_mul", -16, 16)
-			/* Ditto with values past 2048. */
-			CONF_HANDLE_UNSIGNED(opt_tcache_nslots_small_min,
-			    "tcache_nslots_small_min", 1, 2048, CONF_CHECK_MIN,
-			    CONF_CHECK_MAX, /* clip */ true)
-			CONF_HANDLE_UNSIGNED(opt_tcache_nslots_small_max,
-			    "tcache_nslots_small_max", 1, 2048, CONF_CHECK_MIN,
-			    CONF_CHECK_MAX, /* clip */ true)
-			CONF_HANDLE_UNSIGNED(opt_tcache_nslots_large,
-			    "tcache_nslots_large", 1, 2048, CONF_CHECK_MIN,
-			    CONF_CHECK_MAX, /* clip */ true)
-			CONF_HANDLE_SIZE_T(opt_tcache_gc_incr_bytes,
-			    "tcache_gc_incr_bytes", 1024, SIZE_T_MAX,
-			    CONF_CHECK_MIN, CONF_DONT_CHECK_MAX,
-			    /* clip */ true)
-			CONF_HANDLE_SIZE_T(opt_tcache_gc_delay_bytes,
-			    "tcache_gc_delay_bytes", 0, SIZE_T_MAX,
-			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX,
-			    /* clip */ false)
-			CONF_HANDLE_UNSIGNED(opt_lg_tcache_flush_small_div,
-			    "lg_tcache_flush_small_div", 1, 16, CONF_CHECK_MIN,
-			    CONF_CHECK_MAX, /* clip */ true)
-			CONF_HANDLE_UNSIGNED(opt_lg_tcache_flush_large_div,
-			    "lg_tcache_flush_large_div", 1, 16, CONF_CHECK_MIN,
-			    CONF_CHECK_MAX, /* clip */ true)
-			CONF_HANDLE_UNSIGNED(opt_debug_double_free_max_scan,
-			    "debug_double_free_max_scan", 0, UINT_MAX,
-			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX,
-			    /* clip */ false)
-			CONF_HANDLE_SIZE_T(opt_calloc_madvise_threshold,
-			    "calloc_madvise_threshold", 0, SC_LARGE_MAXCLASS,
-			    CONF_DONT_CHECK_MIN, CONF_CHECK_MAX,
-			    /* clip */ false)
-
-			/*
-			 * The runtime option of oversize_threshold remains
-			 * undocumented.  It may be tweaked in the next major
-			 * release (6.0).  The default value 8M is rather
-			 * conservative / safe.  Tuning it further down may
-			 * improve fragmentation a bit more, but may also cause
-			 * contention on the huge arena.
-			 */
-			CONF_HANDLE_SIZE_T(opt_oversize_threshold,
-			    "oversize_threshold", 0, SC_LARGE_MAXCLASS,
-			    CONF_DONT_CHECK_MIN, CONF_CHECK_MAX, false)
-			CONF_HANDLE_SIZE_T(opt_lg_extent_max_active_fit,
-			    "lg_extent_max_active_fit", 0,
-			    (sizeof(size_t) << 3), CONF_DONT_CHECK_MIN,
-			    CONF_CHECK_MAX, false)
-
-			if (strncmp("percpu_arena", k, klen) == 0) {
-				bool match = false;
-				for (int m = percpu_arena_mode_names_base;
-				    m < percpu_arena_mode_names_limit; m++) {
-					if (strncmp(percpu_arena_mode_names[m],
-					        v, vlen)
-					    == 0) {
-						if (!have_percpu_arena) {
-							CONF_ERROR(
-							    "No getcpu support",
-							    k, klen, v, vlen);
-						}
-						opt_percpu_arena = m;
-						match = true;
-						break;
-					}
-				}
-				if (!match) {
-					CONF_ERROR("Invalid conf value", k,
-					    klen, v, vlen);
-				}
-				CONF_CONTINUE;
-			}
-			CONF_HANDLE_BOOL(
-			    opt_background_thread, "background_thread");
-			CONF_HANDLE_SIZE_T(opt_max_background_threads,
-			    "max_background_threads", 1,
-			    opt_max_background_threads, CONF_CHECK_MIN,
-			    CONF_CHECK_MAX, true);
-			CONF_HANDLE_BOOL(opt_hpa, "hpa")
-			CONF_HANDLE_SIZE_T(opt_hpa_opts.slab_max_alloc,
-			    "hpa_slab_max_alloc", PAGE, HUGEPAGE,
-			    CONF_CHECK_MIN, CONF_CHECK_MAX, true);
-
-			/*
-			 * Accept either a ratio-based or an exact hugification
-			 * threshold.
-			 */
-			CONF_HANDLE_SIZE_T(opt_hpa_opts.hugification_threshold,
-			    "hpa_hugification_threshold", PAGE, HUGEPAGE,
-			    CONF_CHECK_MIN, CONF_CHECK_MAX, true);
-			if (CONF_MATCH("hpa_hugification_threshold_ratio")) {
-				fxp_t ratio;
-				char *end;
-				bool  err = fxp_parse(&ratio, v, &end);
-				if (err || (size_t)(end - v) != vlen
-				    || ratio > FXP_INIT_INT(1)) {
-					CONF_ERROR("Invalid conf value", k,
-					    klen, v, vlen);
-				} else {
-					opt_hpa_opts.hugification_threshold =
-					    fxp_mul_frac(HUGEPAGE, ratio);
-				}
-				CONF_CONTINUE;
-			}
-
-			CONF_HANDLE_UINT64_T(opt_hpa_opts.hugify_delay_ms,
-			    "hpa_hugify_delay_ms", 0, 0, CONF_DONT_CHECK_MIN,
-			    CONF_DONT_CHECK_MAX, false);
-
-			CONF_HANDLE_BOOL(
-			    opt_hpa_opts.hugify_sync, "hpa_hugify_sync");
-
-			CONF_HANDLE_UINT64_T(opt_hpa_opts.min_purge_interval_ms,
-			    "hpa_min_purge_interval_ms", 0, 0,
-			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, false);
-
-			CONF_HANDLE_SSIZE_T(
-			    opt_hpa_opts.experimental_max_purge_nhp,
-			    "experimental_hpa_max_purge_nhp", -1, SSIZE_MAX);
-
-			/*
-			 * Accept either a ratio-based or an exact purge
-			 * threshold.
-			 */
-			CONF_HANDLE_SIZE_T(opt_hpa_opts.purge_threshold,
-			    "hpa_purge_threshold", PAGE, HUGEPAGE,
-			    CONF_CHECK_MIN, CONF_CHECK_MAX, true);
-			if (CONF_MATCH("hpa_purge_threshold_ratio")) {
-				fxp_t ratio;
-				char *end;
-				bool  err = fxp_parse(&ratio, v, &end);
-				if (err || (size_t)(end - v) != vlen
-				    || ratio > FXP_INIT_INT(1)) {
-					CONF_ERROR("Invalid conf value", k,
-					    klen, v, vlen);
-				} else {
-					opt_hpa_opts.purge_threshold =
-					    fxp_mul_frac(HUGEPAGE, ratio);
-				}
-				CONF_CONTINUE;
-			}
-
-			CONF_HANDLE_UINT64_T(opt_hpa_opts.min_purge_delay_ms,
-			    "hpa_min_purge_delay_ms", 0, UINT64_MAX,
-			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, false);
-
-			if (strncmp("hpa_hugify_style", k, klen) == 0) {
-				bool match = false;
-				for (int m = 0; m < hpa_hugify_style_limit;
-				    m++) {
-					if (strncmp(hpa_hugify_style_names[m],
-					        v, vlen)
-					    == 0) {
-						opt_hpa_opts.hugify_style = m;
-						match = true;
-						break;
-					}
-				}
-				if (!match) {
-					CONF_ERROR("Invalid conf value", k,
-					    klen, v, vlen);
-				}
-				CONF_CONTINUE;
-			}
-
-			if (CONF_MATCH("hpa_dirty_mult")) {
-				if (CONF_MATCH_VALUE("-1")) {
-					opt_hpa_opts.dirty_mult = (fxp_t)-1;
-					CONF_CONTINUE;
-				}
-				fxp_t ratio;
-				char *end;
-				bool  err = fxp_parse(&ratio, v, &end);
-				if (err || (size_t)(end - v) != vlen) {
-					CONF_ERROR("Invalid conf value", k,
-					    klen, v, vlen);
-				} else {
-					opt_hpa_opts.dirty_mult = ratio;
-				}
-				CONF_CONTINUE;
-			}
-			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.nshards,
-			    "hpa_sec_nshards", 0, 0, CONF_CHECK_MIN,
-			    CONF_DONT_CHECK_MAX, true);
-			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.max_alloc,
-			    "hpa_sec_max_alloc", PAGE,
-			    USIZE_GROW_SLOW_THRESHOLD, CONF_CHECK_MIN,
-			    CONF_CHECK_MAX, true);
-			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.max_bytes,
-			    "hpa_sec_max_bytes", SEC_OPTS_MAX_BYTES_DEFAULT, 0,
-			    CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
-			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.batch_fill_extra,
-			    "hpa_sec_batch_fill_extra", 1, HUGEPAGE_PAGES,
-			    CONF_CHECK_MIN, CONF_CHECK_MAX, true);
-
-			if (CONF_MATCH("slab_sizes")) {
-				if (CONF_MATCH_VALUE("default")) {
-					sc_data_init(sc_data);
-					CONF_CONTINUE;
-				}
-				bool        err;
-				const char *slab_size_segment_cur = v;
-				size_t      vlen_left = vlen;
-				do {
-					size_t slab_start;
-					size_t slab_end;
-					size_t pgs;
-					err = multi_setting_parse_next(
-					    &slab_size_segment_cur, &vlen_left,
-					    &slab_start, &slab_end, &pgs);
-					if (!err) {
-						sc_data_update_slab_size(
-						    sc_data, slab_start,
-						    slab_end, (int)pgs);
-					} else {
-						CONF_ERROR(
-						    "Invalid settings "
-						    "for slab_sizes",
-						    k, klen, v, vlen);
-					}
-				} while (!err && vlen_left > 0);
-				CONF_CONTINUE;
-			}
-			if (config_prof) {
-				CONF_HANDLE_BOOL(opt_prof, "prof")
-				CONF_HANDLE_CHAR_P(
-				    opt_prof_prefix, "prof_prefix", "jeprof")
-				CONF_HANDLE_BOOL(opt_prof_active, "prof_active")
-				CONF_HANDLE_BOOL(opt_prof_thread_active_init,
-				    "prof_thread_active_init")
-				CONF_HANDLE_SIZE_T(opt_lg_prof_sample,
-				    "lg_prof_sample", 0,
-				    (sizeof(uint64_t) << 3) - 1,
-				    CONF_DONT_CHECK_MIN, CONF_CHECK_MAX, true)
-				CONF_HANDLE_BOOL(opt_prof_accum, "prof_accum")
-				CONF_HANDLE_UNSIGNED(opt_prof_bt_max,
-				    "prof_bt_max", 1, PROF_BT_MAX_LIMIT,
-				    CONF_CHECK_MIN, CONF_CHECK_MAX,
-				    /* clip */ true)
-				CONF_HANDLE_SSIZE_T(opt_lg_prof_interval,
-				    "lg_prof_interval", -1,
-				    (sizeof(uint64_t) << 3) - 1)
-				CONF_HANDLE_BOOL(opt_prof_gdump, "prof_gdump")
-				CONF_HANDLE_BOOL(opt_prof_final, "prof_final")
-				CONF_HANDLE_BOOL(opt_prof_leak, "prof_leak")
-				CONF_HANDLE_BOOL(
-				    opt_prof_leak_error, "prof_leak_error")
-				CONF_HANDLE_BOOL(opt_prof_log, "prof_log")
-				CONF_HANDLE_BOOL(opt_prof_pid_namespace,
-				    "prof_pid_namespace")
-				CONF_HANDLE_SSIZE_T(opt_prof_recent_alloc_max,
-				    "prof_recent_alloc_max", -1, SSIZE_MAX)
-				CONF_HANDLE_BOOL(opt_prof_stats, "prof_stats")
-				CONF_HANDLE_BOOL(opt_prof_sys_thread_name,
-				    "prof_sys_thread_name")
-				if (CONF_MATCH("prof_time_resolution")) {
-					if (CONF_MATCH_VALUE("default")) {
-						opt_prof_time_res =
-						    prof_time_res_default;
-					} else if (CONF_MATCH_VALUE("high")) {
-						if (!config_high_res_timer) {
-							CONF_ERROR(
-							    "No high resolution"
-							    " timer support",
-							    k, klen, v, vlen);
-						} else {
-							opt_prof_time_res =
-							    prof_time_res_high;
-						}
-					} else {
-						CONF_ERROR("Invalid conf value",
-						    k, klen, v, vlen);
-					}
-					CONF_CONTINUE;
-				}
-				/*
-				 * Undocumented.  When set to false, don't
-				 * correct for an unbiasing bug in jeprof
-				 * attribution.  This can be handy if you want
-				 * to get consistent numbers from your binary
-				 * across different jemalloc versions, even if
-				 * those numbers are incorrect.  The default is
-				 * true.
-				 */
-				CONF_HANDLE_BOOL(opt_prof_unbias, "prof_unbias")
-			}
-			if (config_log) {
-				if (CONF_MATCH("log")) {
-					size_t cpylen = (vlen
-					            <= sizeof(log_var_names)
-					        ? vlen
-					        : sizeof(log_var_names) - 1);
-					strncpy(log_var_names, v, cpylen);
-					log_var_names[cpylen] = '\0';
-					CONF_CONTINUE;
-				}
-			}
-			if (CONF_MATCH("thp")) {
-				bool match = false;
-				for (int m = 0; m < thp_mode_names_limit; m++) {
-					if (strncmp(thp_mode_names[m], v, vlen)
-					    == 0) {
-						if (!have_madvise_huge
-						    && !have_memcntl) {
-							CONF_ERROR(
-							    "No THP support", k,
-							    klen, v, vlen);
-						}
-						opt_thp = m;
-						match = true;
-						break;
-					}
-				}
-				if (!match) {
-					CONF_ERROR("Invalid conf value", k,
-					    klen, v, vlen);
-				}
-				CONF_CONTINUE;
-			}
-			if (CONF_MATCH("zero_realloc")) {
-				if (CONF_MATCH_VALUE("alloc")) {
-					opt_zero_realloc_action =
-					    zero_realloc_action_alloc;
-				} else if (CONF_MATCH_VALUE("free")) {
-					opt_zero_realloc_action =
-					    zero_realloc_action_free;
-				} else if (CONF_MATCH_VALUE("abort")) {
-					opt_zero_realloc_action =
-					    zero_realloc_action_abort;
-				} else {
-					CONF_ERROR("Invalid conf value", k,
-					    klen, v, vlen);
-				}
-				CONF_CONTINUE;
-			}
-			if (config_uaf_detection
-			    && CONF_MATCH("lg_san_uaf_align")) {
-				ssize_t a;
-				CONF_VALUE_READ(ssize_t, a)
-				if (CONF_VALUE_READ_FAIL() || a < -1) {
-					CONF_ERROR("Invalid conf value", k,
-					    klen, v, vlen);
-				}
-				if (a == -1) {
-					opt_lg_san_uaf_align = -1;
-					CONF_CONTINUE;
-				}
-
-				/* clip if necessary */
-				ssize_t max_allowed = (sizeof(size_t) << 3) - 1;
-				ssize_t min_allowed = LG_PAGE;
-				if (a > max_allowed) {
-					a = max_allowed;
-				} else if (a < min_allowed) {
-					a = min_allowed;
-				}
-
-				opt_lg_san_uaf_align = a;
-				CONF_CONTINUE;
-			}
-
-			CONF_HANDLE_SIZE_T(opt_san_guard_small,
-			    "san_guard_small", 0, SIZE_T_MAX,
-			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, false)
-			CONF_HANDLE_SIZE_T(opt_san_guard_large,
-			    "san_guard_large", 0, SIZE_T_MAX,
-			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, false)
-
-			/*
-			 * Disable large size classes is now the default
-			 * behavior in jemalloc.  Although it is configurable
-			 * in MALLOC_CONF, this is mainly for debugging
-			 * purposes and should not be tuned.
-			 */
-			CONF_HANDLE_BOOL(opt_disable_large_size_classes,
-			    "disable_large_size_classes");
-
-			CONF_ERROR("Invalid conf pair", k, klen, v, vlen);
-#undef CONF_ERROR
-#undef CONF_CONTINUE
-#undef CONF_MATCH
-#undef CONF_MATCH_VALUE
-#undef CONF_HANDLE_BOOL
-#undef CONF_DONT_CHECK_MIN
-#undef CONF_CHECK_MIN
-#undef CONF_DONT_CHECK_MAX
-#undef CONF_CHECK_MAX
-#undef CONF_HANDLE_T
-#undef CONF_HANDLE_T_U
-#undef CONF_HANDLE_T_SIGNED
-#undef CONF_HANDLE_UNSIGNED
-#undef CONF_HANDLE_SIZE_T
-#undef CONF_HANDLE_SSIZE_T
-#undef CONF_HANDLE_CHAR_P
-			/* Re-enable diagnostic "-Wtype-limits" */
-			JEMALLOC_DIAGNOSTIC_POP
-		}
-		validate_hpa_settings();
-		if (opt_abort_conf && had_conf_error) {
-			malloc_abort_invalid_conf();
-		}
-	}
-	atomic_store_b(&log_init_done, true, ATOMIC_RELEASE);
-}
-
-static bool
-malloc_conf_init_check_deps(void) {
-	if (opt_prof_leak_error && !opt_prof_final) {
-		malloc_printf(
-		    "<jemalloc>: prof_leak_error is set w/o "
-		    "prof_final.\n");
-		return true;
-	}
-	/* To emphasize in the stats output that opt is disabled when !debug. */
-	if (!config_debug) {
-		opt_debug_double_free_max_scan = 0;
-	}
-
-	return false;
-}
-
-static void
-malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
-    char readlink_buf[PATH_MAX + 1]) {
-	const char *opts_cache[MALLOC_CONF_NSOURCES] = {
-	    NULL, NULL, NULL, NULL, NULL};
-
-	/* The first call only set the confirm_conf option and opts_cache */
-	malloc_conf_init_helper(NULL, NULL, true, opts_cache, readlink_buf);
-	malloc_conf_init_helper(
-	    sc_data, bin_shard_sizes, false, opts_cache, NULL);
-	if (malloc_conf_init_check_deps()) {
-		/* check_deps does warning msg only; abort below if needed. */
-		if (opt_abort_conf) {
-			malloc_abort_invalid_conf();
-		}
-	}
-}
-
-#undef MALLOC_CONF_NSOURCES
-
 static bool
 malloc_init_hard_needed(void) {
 	if (malloc_initialized()

From 86b721921386a7192e010ec28c7b2308373d07b0 Mon Sep 17 00:00:00 2001
From: Carl Shapiro <cshapiro@meta.com>
Date: Mon, 2 Mar 2026 13:02:59 -0800
Subject: [PATCH 368/395] Add unit tests for conf parsing and its helpers

---
 Makefile.in                   |   5 ++
 test/unit/conf.c              | 113 +++++++++++++++++++++++++++++
 test/unit/conf_init_0.c       |  22 ++++++
 test/unit/conf_init_1.c       |  23 ++++++
 test/unit/conf_init_confirm.c |  39 ++++++++++
 test/unit/conf_parse.c        | 130 ++++++++++++++++++++++++++++++++++
 test/unit/malloc_conf_2.c     |  24 ++++++-
 7 files changed, 354 insertions(+), 2 deletions(-)
 create mode 100644 test/unit/conf.c
 create mode 100644 test/unit/conf_init_0.c
 create mode 100644 test/unit/conf_init_1.c
 create mode 100644 test/unit/conf_init_confirm.c
 create mode 100644 test/unit/conf_parse.c

diff --git a/Makefile.in b/Makefile.in
index 1a7207e0..459f98fb 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -216,6 +216,11 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/buf_writer.c \
 	$(srcroot)test/unit/cache_bin.c \
 	$(srcroot)test/unit/ckh.c \
+	$(srcroot)test/unit/conf.c \
+	$(srcroot)test/unit/conf_init_0.c \
+	$(srcroot)test/unit/conf_init_1.c \
+	$(srcroot)test/unit/conf_init_confirm.c \
+	$(srcroot)test/unit/conf_parse.c \
 	$(srcroot)test/unit/counter.c \
 	$(srcroot)test/unit/decay.c \
 	$(srcroot)test/unit/div.c \
diff --git a/test/unit/conf.c b/test/unit/conf.c
new file mode 100644
index 00000000..1a1cde7c
--- /dev/null
+++ b/test/unit/conf.c
@@ -0,0 +1,113 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/conf.h"
+
+TEST_BEGIN(test_conf_next_simple) {
+	const char *opts = "key:value";
+	const char *k;
+	size_t      klen;
+	const char *v;
+	size_t      vlen;
+
+	had_conf_error = false;
+	bool end = conf_next(&opts, &k, &klen, &v, &vlen);
+	expect_false(end, "Should not be at end");
+	expect_zu_eq(klen, 3, "Key length should be 3");
+	expect_false(strncmp(k, "key", klen), "Key should be \"key\"");
+	expect_zu_eq(vlen, 5, "Value length should be 5");
+	expect_false(strncmp(v, "value", vlen), "Value should be \"value\"");
+	expect_false(had_conf_error, "Should not have had an error");
+}
+TEST_END
+
+TEST_BEGIN(test_conf_next_multi) {
+	const char *opts = "k1:v1,k2:v2";
+	const char *k;
+	size_t      klen;
+	const char *v;
+	size_t      vlen;
+	bool        end;
+
+	had_conf_error = false;
+
+	end = conf_next(&opts, &k, &klen, &v, &vlen);
+	expect_false(end, "Should not be at end after first pair");
+	expect_zu_eq(klen, 2, "First key length should be 2");
+	expect_false(strncmp(k, "k1", klen), "First key should be \"k1\"");
+	expect_zu_eq(vlen, 2, "First value length should be 2");
+	expect_false(strncmp(v, "v1", vlen), "First value should be \"v1\"");
+
+	end = conf_next(&opts, &k, &klen, &v, &vlen);
+	expect_false(end, "Should not be at end after second pair");
+	expect_zu_eq(klen, 2, "Second key length should be 2");
+	expect_false(strncmp(k, "k2", klen), "Second key should be \"k2\"");
+	expect_zu_eq(vlen, 2, "Second value length should be 2");
+	expect_false(strncmp(v, "v2", vlen), "Second value should be \"v2\"");
+
+	expect_false(had_conf_error, "Should not have had an error");
+}
+TEST_END
+
+TEST_BEGIN(test_conf_next_empty) {
+	const char *opts = "";
+	const char *k;
+	size_t      klen;
+	const char *v;
+	size_t      vlen;
+
+	had_conf_error = false;
+	bool end = conf_next(&opts, &k, &klen, &v, &vlen);
+	expect_true(end, "Empty string should return true (end)");
+	expect_false(had_conf_error, "Empty string should not set error");
+}
+TEST_END
+
+TEST_BEGIN(test_conf_next_missing_value) {
+	const char *opts = "key_only";
+	const char *k;
+	size_t      klen;
+	const char *v;
+	size_t      vlen;
+
+	had_conf_error = false;
+	bool end = conf_next(&opts, &k, &klen, &v, &vlen);
+	expect_true(end, "Key without value should return true (end)");
+	expect_true(had_conf_error, "Key without value should set error");
+}
+TEST_END
+
+TEST_BEGIN(test_conf_next_malformed) {
+	const char *opts = "bad!key:val";
+	const char *k;
+	size_t      klen;
+	const char *v;
+	size_t      vlen;
+
+	had_conf_error = false;
+	bool end = conf_next(&opts, &k, &klen, &v, &vlen);
+	expect_true(end, "Malformed key should return true (end)");
+	expect_true(had_conf_error, "Malformed key should set error");
+}
+TEST_END
+
+TEST_BEGIN(test_conf_next_trailing_comma) {
+	const char *opts = "k:v,";
+	const char *k;
+	size_t      klen;
+	const char *v;
+	size_t      vlen;
+
+	had_conf_error = false;
+	bool end = conf_next(&opts, &k, &klen, &v, &vlen);
+	expect_false(end, "Should parse the first pair successfully");
+	expect_true(had_conf_error,
+	    "Trailing comma should set error");
+}
+TEST_END
+
+int
+main(void) {
+	return test(test_conf_next_simple, test_conf_next_multi,
+	    test_conf_next_empty, test_conf_next_missing_value,
+	    test_conf_next_malformed, test_conf_next_trailing_comma);
+}
diff --git a/test/unit/conf_init_0.c b/test/unit/conf_init_0.c
new file mode 100644
index 00000000..a1f0e63f
--- /dev/null
+++ b/test/unit/conf_init_0.c
@@ -0,0 +1,22 @@
+#include "test/jemalloc_test.h"
+
+TEST_BEGIN(test_default_dirty_decay_ms) {
+#ifdef _WIN32
+	test_skip("not supported on win32");
+#endif
+
+	ssize_t dirty_decay_ms;
+	size_t sz = sizeof(dirty_decay_ms);
+
+	int err = mallctl("opt.dirty_decay_ms", &dirty_decay_ms, &sz, NULL, 0);
+	assert_d_eq(err, 0, "Unexpected mallctl failure");
+	expect_zd_eq(dirty_decay_ms, 10000,
+	    "dirty_decay_ms should be the default (10000)"
+	    " when no global variables are set");
+}
+TEST_END
+
+int
+main(void) {
+	return test(test_default_dirty_decay_ms);
+}
diff --git a/test/unit/conf_init_1.c b/test/unit/conf_init_1.c
new file mode 100644
index 00000000..07aec5dc
--- /dev/null
+++ b/test/unit/conf_init_1.c
@@ -0,0 +1,23 @@
+#include "test/jemalloc_test.h"
+
+const char *malloc_conf = "dirty_decay_ms:1234";
+
+TEST_BEGIN(test_malloc_conf_dirty_decay_ms) {
+#ifdef _WIN32
+	test_skip("not supported on win32");
+#endif
+
+	ssize_t dirty_decay_ms;
+	size_t sz = sizeof(dirty_decay_ms);
+
+	int err = mallctl("opt.dirty_decay_ms", &dirty_decay_ms, &sz, NULL, 0);
+	assert_d_eq(err, 0, "Unexpected mallctl failure");
+	expect_zd_eq(dirty_decay_ms, 1234,
+	    "dirty_decay_ms should be 1234 (set via malloc_conf)");
+}
+TEST_END
+
+int
+main(void) {
+	return test(test_malloc_conf_dirty_decay_ms);
+}
diff --git a/test/unit/conf_init_confirm.c b/test/unit/conf_init_confirm.c
new file mode 100644
index 00000000..a4358359
--- /dev/null
+++ b/test/unit/conf_init_confirm.c
@@ -0,0 +1,39 @@
+#include "test/jemalloc_test.h"
+
+const char *malloc_conf = "dirty_decay_ms:1234,confirm_conf:true";
+
+TEST_BEGIN(test_confirm_conf_two_pass) {
+#ifdef _WIN32
+	test_skip("not supported on win32");
+#endif
+
+	bool confirm_conf;
+	size_t sz = sizeof(confirm_conf);
+
+	int err = mallctl("opt.confirm_conf", &confirm_conf, &sz, NULL, 0);
+	assert_d_eq(err, 0, "Unexpected mallctl failure");
+	expect_true(confirm_conf,
+	    "confirm_conf should be true (processed in pass 1)");
+}
+TEST_END
+
+TEST_BEGIN(test_conf_option_applied_in_second_pass) {
+#ifdef _WIN32
+	test_skip("not supported on win32");
+#endif
+
+	ssize_t dirty_decay_ms;
+	size_t sz = sizeof(dirty_decay_ms);
+
+	int err = mallctl("opt.dirty_decay_ms", &dirty_decay_ms, &sz, NULL, 0);
+	assert_d_eq(err, 0, "Unexpected mallctl failure");
+	expect_zd_eq(dirty_decay_ms, 1234,
+	    "dirty_decay_ms should be 1234 (processed in pass 2)");
+}
+TEST_END
+
+int
+main(void) {
+	return test(test_confirm_conf_two_pass,
+	    test_conf_option_applied_in_second_pass);
+}
diff --git a/test/unit/conf_parse.c b/test/unit/conf_parse.c
new file mode 100644
index 00000000..b3fedb40
--- /dev/null
+++ b/test/unit/conf_parse.c
@@ -0,0 +1,130 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/conf.h"
+
+TEST_BEGIN(test_conf_handle_bool_true) {
+	bool result = false;
+	bool err = conf_handle_bool("true", sizeof("true") - 1, &result);
+	expect_false(err, "conf_handle_bool should succeed for \"true\"");
+	expect_true(result, "result should be true");
+}
+TEST_END
+
+TEST_BEGIN(test_conf_handle_bool_false) {
+	bool result = true;
+	bool err = conf_handle_bool("false", sizeof("false") - 1, &result);
+	expect_false(err, "conf_handle_bool should succeed for \"false\"");
+	expect_false(result, "result should be false");
+}
+TEST_END
+
+TEST_BEGIN(test_conf_handle_bool_invalid) {
+	bool result = false;
+	bool err = conf_handle_bool("yes", sizeof("yes") - 1, &result);
+	expect_true(err, "conf_handle_bool should fail for \"yes\"");
+}
+TEST_END
+
+TEST_BEGIN(test_conf_handle_unsigned_in_range) {
+	uintmax_t result = 0;
+	bool err = conf_handle_unsigned("100", sizeof("100") - 1,
+	    1, 2048, true, true, true, &result);
+	expect_false(err, "Should succeed for in-range value");
+	expect_u64_eq((uint64_t)result, 100, "result should be 100");
+}
+TEST_END
+
+TEST_BEGIN(test_conf_handle_unsigned_clip_max) {
+	uintmax_t result = 0;
+	bool err = conf_handle_unsigned("9999", sizeof("9999") - 1,
+	    1, 2048, true, true, true, &result);
+	expect_false(err, "Should succeed with clipping");
+	expect_u64_eq((uint64_t)result, 2048,
+	    "result should be clipped to max 2048");
+}
+TEST_END
+
+TEST_BEGIN(test_conf_handle_unsigned_clip_min) {
+	uintmax_t result = 0;
+	bool err = conf_handle_unsigned("0", sizeof("0") - 1,
+	    1, 2048, true, true, true, &result);
+	expect_false(err, "Should succeed with clipping");
+	expect_u64_eq((uint64_t)result, 1,
+	    "result should be clipped to min 1");
+}
+TEST_END
+
+TEST_BEGIN(test_conf_handle_unsigned_no_clip_reject) {
+	uintmax_t result = 0;
+	bool err = conf_handle_unsigned("9999", sizeof("9999") - 1,
+	    1, 2048, true, true, false, &result);
+	expect_true(err, "Should fail for out-of-range value without clip");
+}
+TEST_END
+
+TEST_BEGIN(test_conf_handle_unsigned_invalid) {
+	uintmax_t result = 0;
+	bool err = conf_handle_unsigned("abc", sizeof("abc") - 1,
+	    1, 2048, true, true, true, &result);
+	expect_true(err, "Should fail for non-numeric input");
+}
+TEST_END
+
+TEST_BEGIN(test_conf_handle_signed_valid) {
+	intmax_t result = 0;
+	bool err = conf_handle_signed("5000", sizeof("5000") - 1,
+	    -1, INTMAX_MAX, true, false, false, &result);
+	expect_false(err, "Should succeed for valid value");
+	expect_d64_eq((int64_t)result, 5000, "result should be 5000");
+}
+TEST_END
+
+TEST_BEGIN(test_conf_handle_signed_negative) {
+	intmax_t result = 0;
+	bool err = conf_handle_signed("-1", sizeof("-1") - 1,
+	    -1, INTMAX_MAX, true, false, false, &result);
+	expect_false(err, "Should succeed for -1");
+	expect_d64_eq((int64_t)result, -1, "result should be -1");
+}
+TEST_END
+
+TEST_BEGIN(test_conf_handle_signed_out_of_range) {
+	intmax_t result = 0;
+	bool err = conf_handle_signed("5000", sizeof("5000") - 1,
+	    -1, 4999, true, true, false, &result);
+	expect_true(err, "Should fail for out-of-range value");
+}
+TEST_END
+
+TEST_BEGIN(test_conf_handle_char_p) {
+	char buf[8];
+	bool err;
+
+	/* Normal copy. */
+	err = conf_handle_char_p("hello", sizeof("hello") - 1, buf, sizeof(buf));
+	expect_false(err, "Should succeed");
+	expect_str_eq(buf, "hello", "Should copy string");
+
+	/* Truncation. */
+	err = conf_handle_char_p("longstring", sizeof("longstring") - 1,
+	    buf, sizeof(buf));
+	expect_false(err, "Should succeed even when truncating");
+	expect_str_eq(buf, "longstr", "Should truncate to dest_sz - 1");
+}
+TEST_END
+
+int
+main(void) {
+	return test(test_conf_handle_bool_true,
+	    test_conf_handle_bool_false,
+	    test_conf_handle_bool_invalid,
+	    test_conf_handle_unsigned_in_range,
+	    test_conf_handle_unsigned_clip_max,
+	    test_conf_handle_unsigned_clip_min,
+	    test_conf_handle_unsigned_no_clip_reject,
+	    test_conf_handle_unsigned_invalid,
+	    test_conf_handle_signed_valid,
+	    test_conf_handle_signed_negative,
+	    test_conf_handle_signed_out_of_range,
+	    test_conf_handle_char_p);
+}
diff --git a/test/unit/malloc_conf_2.c b/test/unit/malloc_conf_2.c
index 023b7102..667e7006 100644
--- a/test/unit/malloc_conf_2.c
+++ b/test/unit/malloc_conf_2.c
@@ -1,6 +1,6 @@
 #include "test/jemalloc_test.h"
 
-const char *malloc_conf = "dirty_decay_ms:1000";
+const char *malloc_conf = "dirty_decay_ms:1000,muzzy_decay_ms:2000";
 const char *malloc_conf_2_conf_harder = "dirty_decay_ms:1234";
 
 TEST_BEGIN(test_malloc_conf_2) {
@@ -49,7 +49,27 @@ TEST_BEGIN(test_mallctl_global_var) {
 }
 TEST_END
 
+TEST_BEGIN(test_non_conflicting_var) {
+#ifdef _WIN32
+	bool windows = true;
+#else
+	bool windows = false;
+#endif
+	/* Windows doesn't support weak symbol linker trickery. */
+	test_skip_if(windows);
+
+	ssize_t muzzy_decay_ms;
+	size_t  sz = sizeof(muzzy_decay_ms);
+
+	int err = mallctl("opt.muzzy_decay_ms", &muzzy_decay_ms, &sz, NULL, 0);
+	assert_d_eq(err, 0, "Unexpected mallctl failure");
+	expect_zd_eq(muzzy_decay_ms, 2000,
+	    "Non-conflicting option from malloc_conf should pass through");
+}
+TEST_END
+
 int
 main(void) {
-	return test(test_malloc_conf_2, test_mallctl_global_var);
+	return test(test_malloc_conf_2, test_mallctl_global_var,
+	    test_non_conflicting_var);
 }

From 1d018d8fdabec88134b32122aa054cb8b37fe29c Mon Sep 17 00:00:00 2001
From: Tony Printezis <printezis@fb.com>
Date: Wed, 25 Mar 2026 06:59:01 -0700
Subject: [PATCH 369/395] improve hpdata_assert_consistent()

A few ways this consistency check can be improved:
* Print which conditions fail and associated values.
* Accumulate the result so that we can print all conditions that fail.
* Turn hpdata_assert_consistent() into a macro so, when it fails,
  we can get line number where it's called from.
---
 include/jemalloc/internal/hpdata.h | 79 ++++++++++++++++++++++--------
 1 file changed, 58 insertions(+), 21 deletions(-)

diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index eb83c900..a9c507f0 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -342,39 +342,76 @@ hpdata_assert_empty(hpdata_t *hpdata) {
  */
 static inline bool
 hpdata_consistent(hpdata_t *hpdata) {
-	if (fb_urange_longest(hpdata->active_pages, HUGEPAGE_PAGES)
-	    != hpdata_longest_free_range_get(hpdata)) {
-		return false;
+	bool res = true;
+
+	const size_t active_urange_longest = fb_urange_longest(
+	    hpdata->active_pages, HUGEPAGE_PAGES);
+	const size_t longest_free_range = hpdata_longest_free_range_get(hpdata);
+	if (active_urange_longest != longest_free_range) {
+		malloc_printf(
+		    "<jemalloc>: active_fb_urange_longest=%zu != hpdata_longest_free_range=%zu\n",
+		    active_urange_longest, longest_free_range);
+		res = false;
 	}
-	if (fb_scount(hpdata->active_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES)
-	    != hpdata->h_nactive) {
-		return false;
+
+	const size_t active_scount = fb_scount(
+	    hpdata->active_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES);
+	if (active_scount != hpdata->h_nactive) {
+		malloc_printf(
+		    "<jemalloc>: active_fb_scount=%zu != hpdata_nactive=%zu\n",
+		    active_scount, hpdata->h_nactive);
+		res = false;
 	}
-	if (fb_scount(hpdata->touched_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES)
-	    != hpdata->h_ntouched) {
-		return false;
+
+	const size_t touched_scount = fb_scount(
+	    hpdata->touched_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES);
+	if (touched_scount != hpdata->h_ntouched) {
+		malloc_printf(
+		    "<jemalloc>: touched_fb_scount=%zu != hpdata_ntouched=%zu\n",
+		    touched_scount, hpdata->h_ntouched);
+		res = false;
 	}
+
 	if (hpdata->h_ntouched < hpdata->h_nactive) {
-		return false;
+		malloc_printf(
+		    "<jemalloc>: hpdata_ntouched=%zu < hpdata_nactive=%zu\n",
+		    hpdata->h_ntouched, hpdata->h_nactive);
+		res = false;
 	}
-	if (hpdata->h_huge && hpdata->h_ntouched != HUGEPAGE_PAGES) {
-		return false;
+
+	if (hpdata->h_huge && (hpdata->h_ntouched != HUGEPAGE_PAGES)) {
+		malloc_printf(
+		    "<jemalloc>: hpdata_huge=%d && (hpdata_ntouched=%zu != hugepage_pages=%zu)\n",
+		    hpdata->h_huge, hpdata->h_ntouched, HUGEPAGE_PAGES);
+		res = false;
 	}
-	if (hpdata_changing_state_get(hpdata)
-	    && ((hpdata->h_purge_allowed) || hpdata->h_hugify_allowed)) {
-		return false;
+
+	const bool changing_state = hpdata_changing_state_get(hpdata);
+	if (changing_state
+	    && (hpdata->h_purge_allowed || hpdata->h_hugify_allowed)) {
+		malloc_printf(
+		    "<jemalloc>: hpdata_changing_state=%d && (hpdata_purge_allowed=%d || hpdata_hugify_allowed=%d)\n",
+		    changing_state, hpdata->h_purge_allowed,
+		    hpdata->h_hugify_allowed);
+		res = false;
 	}
+
 	if (hpdata_hugify_allowed_get(hpdata)
 	    != hpdata_in_psset_hugify_container_get(hpdata)) {
-		return false;
+		malloc_printf(
+		    "<jemalloc>: hpdata_hugify_allowed=%d != hpdata_in_psset_hugify_container=%d\n",
+		    hpdata_hugify_allowed_get(hpdata),
+		    hpdata_in_psset_hugify_container_get(hpdata));
+		res = false;
 	}
-	return true;
+
+	return res;
 }
 
-static inline void
-hpdata_assert_consistent(hpdata_t *hpdata) {
-	assert(hpdata_consistent(hpdata));
-}
+#define hpdata_assert_consistent(hpdata)                                       \
+	do {                                                                   \
+		assert(hpdata_consistent(hpdata));                             \
+	} while (0)
 
 static inline bool
 hpdata_empty(const hpdata_t *hpdata) {

From d758349ca438ee35769409b06c642ca2d8e408ac Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Wed, 25 Mar 2026 21:13:34 -0700
Subject: [PATCH 370/395] Fix psset_pick_purge when last candidate with index 0
 dirtiness is ineligible

psset_pick_purge used max_bit-- after rejecting a time-ineligible
candidate, which caused unnecessary re-scanning of the same bitmap
and makes assert fail in debug mode) and a size_t underflow
when the lowest-index entry was rejected.  Use max_bit = ind - 1
to skip directly past the rejected index.
---
 src/psset.c       |  5 ++++-
 test/unit/psset.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 58 insertions(+), 2 deletions(-)

diff --git a/src/psset.c b/src/psset.c
index a8a9615d..4e904feb 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -417,7 +417,10 @@ psset_pick_purge(psset_t *psset, const nstime_t *now) {
 		if (nstime_compare(tm_allowed, now) <= 0) {
 			return ps;
 		}
-		max_bit--;
+		if (ind == 0) {
+			break;
+		}
+		max_bit = ind - 1;
 	}
 	/* No page is ready yet */
 	return NULL;
diff --git a/test/unit/psset.c b/test/unit/psset.c
index 12d55941..6ad653f5 100644
--- a/test/unit/psset.c
+++ b/test/unit/psset.c
@@ -941,6 +941,58 @@ TEST_BEGIN(test_purge_prefers_empty) {
 }
 TEST_END
 
+TEST_BEGIN(test_pick_purge_underflow) {
+	test_skip_if(hpa_hugepage_size_exceeds_limit());
+	void *ptr;
+
+	psset_t psset;
+	psset_init(&psset);
+
+	/*
+	 * Test that psset_pick_purge skips directly past a time-ineligible
+	 * entry without underflow.
+	 *
+	 * Create a hugified, non-empty hpdata with 1 dirty page, which
+	 * lands at purge list index 0 (pind=0, huge=true).  Set its
+	 * purge-allowed time in the future.  Calling psset_pick_purge
+	 * with a "now" before that time should return NULL without
+	 * looping through all higher indices on the way down.
+	 */
+	hpdata_t       hpdata_lowest;
+	nstime_t       future_tm, now;
+	const uint64_t BASE_SEC = 1000;
+
+	hpdata_init(&hpdata_lowest, (void *)(10 * HUGEPAGE), 100, false);
+	psset_insert(&psset, &hpdata_lowest);
+
+	psset_update_begin(&psset, &hpdata_lowest);
+	/* Allocate all pages. */
+	ptr = hpdata_reserve_alloc(&hpdata_lowest, HUGEPAGE_PAGES * PAGE);
+	expect_ptr_eq(hpdata_addr_get(&hpdata_lowest), ptr, "");
+	/* Hugify the slab. */
+	hpdata_hugify(&hpdata_lowest);
+	/* Free the last page to create exactly 1 dirty page. */
+	hpdata_unreserve(&hpdata_lowest,
+	    (void *)((uintptr_t)ptr + (HUGEPAGE_PAGES - 1) * PAGE), PAGE);
+	/* Now: nactive = HUGEPAGE_PAGES-1, ndirty = 1, huge = true.
+	 * purge_list_ind = sz_psz2ind(sz_psz_quantize_floor(PAGE)) * 2 + 0
+	 * which should be index 0. */
+	hpdata_purge_allowed_set(&hpdata_lowest, true);
+	nstime_init2(&future_tm, BASE_SEC + 9999, 0);
+	hpdata_time_purge_allowed_set(&hpdata_lowest, &future_tm);
+	psset_update_end(&psset, &hpdata_lowest);
+
+	/*
+	 * Call with a "now" before the future time.  Should return NULL
+	 * (no eligible entry).
+	 */
+	nstime_init2(&now, BASE_SEC + 500, 0);
+	hpdata_t *to_purge = psset_pick_purge(&psset, &now);
+	expect_ptr_null(
+	    to_purge, "Should return NULL when no entry is time-eligible");
+}
+TEST_END
+
 TEST_BEGIN(test_purge_prefers_empty_huge) {
 	test_skip_if(hpa_hugepage_size_exceeds_limit());
 	void *ptr;
@@ -1020,5 +1072,6 @@ main(void) {
 	    test_multi_pageslab, test_stats_merged, test_stats_huge,
 	    test_stats_fullness, test_oldest_fit, test_insert_remove,
 	    test_purge_prefers_nonhuge, test_purge_timing,
-	    test_purge_prefers_empty, test_purge_prefers_empty_huge);
+	    test_purge_prefers_empty, test_pick_purge_underflow,
+	    test_purge_prefers_empty_huge);
 }

From a87c518babfe81395a63b6b023245d8359ca1b96 Mon Sep 17 00:00:00 2001
From: Weixie Cui <cuiweixie@gmail.com>
Date: Wed, 18 Mar 2026 11:35:09 +0800
Subject: [PATCH 371/395] Fix typo in prof_log_rep_check: use != instead of ||
 for alloc_count

The condition incorrectly used 'alloc_count || 0' which was likely a typo
for 'alloc_count != 0'. While both evaluate similarly for the zero/non-zero
case, the fix ensures consistency with bt_count and thr_count checks and
uses the correct comparison operator.
---
 src/prof_log.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/prof_log.c b/src/prof_log.c
index 64b363bb..74f1372f 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -375,7 +375,7 @@ prof_log_rep_check(void) {
 	size_t alloc_count = prof_log_alloc_count();
 
 	if (prof_logging_state == prof_logging_state_stopped) {
-		if (bt_count != 0 || thr_count != 0 || alloc_count || 0) {
+		if (bt_count != 0 || thr_count != 0 || alloc_count != 0) {
 			return true;
 		}
 	}

From 19bbefe136cf8684e126cdb80f7ef2aba88e55dc Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Tue, 31 Mar 2026 18:25:50 -0700
Subject: [PATCH 372/395] Remove dead code: extent_commit_wrapper,
 large_salloc, tcache_gc_dalloc event waits

These functions had zero callers anywhere in the codebase:
- extent_commit_wrapper: wrapper never called, _impl used directly
- large_salloc: trivial wrapper never called
- tcache_gc_dalloc_new_event_wait: no header declaration, no callers
- tcache_gc_dalloc_postponed_event_wait: no header declaration, no callers
---
 include/jemalloc/internal/extent.h        |  2 --
 include/jemalloc/internal/large_externs.h |  1 -
 src/extent.c                              |  7 -------
 src/large.c                               |  5 -----
 src/tcache.c                              | 10 ----------
 5 files changed, 25 deletions(-)

diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index e81dff2c..a9f81cb7 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -57,8 +57,6 @@ void extent_dalloc_wrapper_purged(
     tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *edata);
 void extent_destroy_wrapper(
     tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *edata);
-bool extent_commit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
-    size_t offset, size_t length);
 bool extent_purge_lazy_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length);
 bool extent_purge_forced_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
diff --git a/include/jemalloc/internal/large_externs.h b/include/jemalloc/internal/large_externs.h
index 7cee6752..84c6c5d6 100644
--- a/include/jemalloc/internal/large_externs.h
+++ b/include/jemalloc/internal/large_externs.h
@@ -17,7 +17,6 @@ void *large_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t usize,
 void   large_dalloc_prep_locked(tsdn_t *tsdn, edata_t *edata);
 void   large_dalloc_finish(tsdn_t *tsdn, edata_t *edata);
 void   large_dalloc(tsdn_t *tsdn, edata_t *edata);
-size_t large_salloc(tsdn_t *tsdn, const edata_t *edata);
 void   large_prof_info_get(
       tsd_t *tsd, edata_t *edata, prof_info_t *prof_info, bool reset_recent);
 void large_prof_tctx_reset(edata_t *edata);
diff --git a/src/extent.c b/src/extent.c
index 0a23bbd9..4b927191 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1239,13 +1239,6 @@ extent_commit_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
 	return err;
 }
 
-bool
-extent_commit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
-    size_t offset, size_t length) {
-	return extent_commit_impl(tsdn, ehooks, edata, offset, length,
-	    /* growing_retained */ false);
-}
-
 static bool
 extent_decommit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length) {
diff --git a/src/large.c b/src/large.c
index 087df99d..56fa16ab 100644
--- a/src/large.c
+++ b/src/large.c
@@ -276,11 +276,6 @@ large_dalloc(tsdn_t *tsdn, edata_t *edata) {
 	arena_decay_tick(tsdn, arena);
 }
 
-size_t
-large_salloc(tsdn_t *tsdn, const edata_t *edata) {
-	return edata_usize_get(edata);
-}
-
 void
 large_prof_info_get(
     tsd_t *tsd, edata_t *edata, prof_info_t *prof_info, bool reset_recent) {
diff --git a/src/tcache.c b/src/tcache.c
index 10fa7c21..fe210d27 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -111,16 +111,6 @@ tcache_gc_postponed_event_wait(tsd_t *tsd) {
 	return TE_MIN_START_WAIT;
 }
 
-uint64_t
-tcache_gc_dalloc_new_event_wait(tsd_t *tsd) {
-	return opt_tcache_gc_incr_bytes;
-}
-
-uint64_t
-tcache_gc_dalloc_postponed_event_wait(tsd_t *tsd) {
-	return TE_MIN_START_WAIT;
-}
-
 static inline void
 tcache_bin_fill_ctl_init(tcache_slow_t *tcache_slow, szind_t szind) {
 	assert(szind < SC_NBINS);

From 176ea0a801338cae1b938c47f0d7dba7ffef0d25 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Tue, 31 Mar 2026 18:12:23 -0700
Subject: [PATCH 373/395] Remove experimental.thread.activity_callback

---
 include/jemalloc/internal/activity_callback.h | 26 -------
 include/jemalloc/internal/tsd_internals.h     |  6 +-
 src/ctl.c                                     | 33 +--------
 src/peak_event.c                              | 13 ----
 test/unit/mallctl.c                           | 73 +------------------
 5 files changed, 3 insertions(+), 148 deletions(-)
 delete mode 100644 include/jemalloc/internal/activity_callback.h

diff --git a/include/jemalloc/internal/activity_callback.h b/include/jemalloc/internal/activity_callback.h
deleted file mode 100644
index 6745f1a2..00000000
--- a/include/jemalloc/internal/activity_callback.h
+++ /dev/null
@@ -1,26 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_ACTIVITY_CALLBACK_H
-#define JEMALLOC_INTERNAL_ACTIVITY_CALLBACK_H
-
-#include "jemalloc/internal/jemalloc_preamble.h"
-
-/*
- * The callback to be executed "periodically", in response to some amount of
- * allocator activity.
- *
- * This callback need not be computing any sort of peak (although that's the
- * intended first use case), but we drive it from the peak counter, so it's
- * keeps things tidy to keep it here.
- *
- * The calls to this thunk get driven by the peak_event module.
- */
-#define ACTIVITY_CALLBACK_THUNK_INITIALIZER                                    \
-	{ NULL, NULL }
-typedef void (*activity_callback_t)(
-    void *uctx, uint64_t allocated, uint64_t deallocated);
-typedef struct activity_callback_thunk_s activity_callback_thunk_t;
-struct activity_callback_thunk_s {
-	activity_callback_t callback;
-	void               *uctx;
-};
-
-#endif /* JEMALLOC_INTERNAL_ACTIVITY_CALLBACK_H */
diff --git a/include/jemalloc/internal/tsd_internals.h b/include/jemalloc/internal/tsd_internals.h
index f675587d..53b58d0c 100644
--- a/include/jemalloc/internal/tsd_internals.h
+++ b/include/jemalloc/internal/tsd_internals.h
@@ -4,7 +4,6 @@
 #define JEMALLOC_INTERNAL_TSD_INTERNALS_H
 
 #include "jemalloc/internal/jemalloc_preamble.h"
-#include "jemalloc/internal/activity_callback.h"
 #include "jemalloc/internal/arena_types.h"
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/bin_types.h"
@@ -84,8 +83,6 @@ typedef ql_elm(tsd_t) tsd_link_t;
 	O(tsd_link, tsd_link_t, tsd_link_t)                                    \
 	O(in_hook, bool, bool)                                                 \
 	O(peak, peak_t, peak_t)                                                \
-	O(activity_callback_thunk, activity_callback_thunk_t,                  \
-	    activity_callback_thunk_t)                                         \
 	O(tcache_slow, tcache_slow_t, tcache_slow_t)                           \
 	O(rtree_ctx, rtree_ctx_t, rtree_ctx_t)
 
@@ -105,8 +102,7 @@ typedef ql_elm(tsd_t) tsd_link_t;
 	    /* sec_shard */ (uint8_t) - 1,                                     \
 	    /* binshards */ TSD_BINSHARDS_ZERO_INITIALIZER,                    \
 	    /* tsd_link */ {NULL}, /* in_hook */ false,                        \
-	    /* peak */ PEAK_INITIALIZER, /* activity_callback_thunk */         \
-	    ACTIVITY_CALLBACK_THUNK_INITIALIZER,                               \
+	    /* peak */ PEAK_INITIALIZER,                                       \
 	    /* tcache_slow */ TCACHE_SLOW_ZERO_INITIALIZER,                    \
 	    /* rtree_ctx */ RTREE_CTX_INITIALIZER,
 
diff --git a/src/ctl.c b/src/ctl.c
index 4cac5608..89824d6a 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -365,7 +365,6 @@ CTL_PROTO(experimental_hooks_prof_sample)
 CTL_PROTO(experimental_hooks_prof_sample_free)
 CTL_PROTO(experimental_hooks_thread_event)
 CTL_PROTO(experimental_hooks_safety_check_abort)
-CTL_PROTO(experimental_thread_activity_callback)
 CTL_PROTO(experimental_utilization_query)
 CTL_PROTO(experimental_utilization_batch_query)
 CTL_PROTO(experimental_arenas_i_pactivep)
@@ -890,9 +889,6 @@ static const ctl_named_node_t experimental_hooks_node[] = {
     {NAME("thread_event"), CTL(experimental_hooks_thread_event)},
 };
 
-static const ctl_named_node_t experimental_thread_node[] = {
-    {NAME("activity_callback"), CTL(experimental_thread_activity_callback)}};
-
 static const ctl_named_node_t experimental_utilization_node[] = {
     {NAME("query"), CTL(experimental_utilization_query)},
     {NAME("batch_query"), CTL(experimental_utilization_batch_query)}};
@@ -916,8 +912,7 @@ static const ctl_named_node_t experimental_node[] = {
     {NAME("arenas"), CHILD(indexed, experimental_arenas)},
     {NAME("arenas_create_ext"), CTL(experimental_arenas_create_ext)},
     {NAME("prof_recent"), CHILD(named, experimental_prof_recent)},
-    {NAME("batch_alloc"), CTL(experimental_batch_alloc)},
-    {NAME("thread"), CHILD(named, experimental_thread)}};
+    {NAME("batch_alloc"), CTL(experimental_batch_alloc)}};
 
 static const ctl_named_node_t root_node[] = {{NAME("version"), CTL(version)},
     {NAME("epoch"), CTL(epoch)},
@@ -4255,32 +4250,6 @@ label_return:
 	return ret;
 }
 
-static int
-experimental_thread_activity_callback_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
-
-	if (!config_stats) {
-		return ENOENT;
-	}
-
-	activity_callback_thunk_t t_old = tsd_activity_callback_thunk_get(tsd);
-	READ(t_old, activity_callback_thunk_t);
-
-	if (newp != NULL) {
-		/*
-		 * This initialization is unnecessary.  If it's omitted, though,
-		 * clang gets confused and warns on the subsequent use of t_new.
-		 */
-		activity_callback_thunk_t t_new = {NULL, NULL};
-		WRITE(t_new, activity_callback_thunk_t);
-		tsd_activity_callback_thunk_set(tsd, t_new);
-	}
-	ret = 0;
-label_return:
-	return ret;
-}
-
 /*
  * Output six memory utilization entries for an input pointer, the first one of
  * type (void *) and the remaining five of type size_t, describing the following
diff --git a/src/peak_event.c b/src/peak_event.c
index e7f54dba..39f90b70 100644
--- a/src/peak_event.c
+++ b/src/peak_event.c
@@ -3,7 +3,6 @@
 
 #include "jemalloc/internal/peak_event.h"
 
-#include "jemalloc/internal/activity_callback.h"
 #include "jemalloc/internal/peak.h"
 #include "jemalloc/internal/thread_event_registry.h"
 
@@ -16,17 +15,6 @@ peak_event_update(tsd_t *tsd) {
 	peak_update(peak, alloc, dalloc);
 }
 
-static void
-peak_event_activity_callback(tsd_t *tsd) {
-	activity_callback_thunk_t *thunk = tsd_activity_callback_thunkp_get(
-	    tsd);
-	uint64_t alloc = tsd_thread_allocated_get(tsd);
-	uint64_t dalloc = tsd_thread_deallocated_get(tsd);
-	if (thunk->callback != NULL) {
-		thunk->callback(thunk->uctx, alloc, dalloc);
-	}
-}
-
 /* Set current state to zero. */
 void
 peak_event_zero(tsd_t *tsd) {
@@ -55,7 +43,6 @@ peak_event_postponed_event_wait(tsd_t *tsd) {
 static void
 peak_event_handler(tsd_t *tsd) {
 	peak_event_update(tsd);
-	peak_event_activity_callback(tsd);
 }
 
 static te_enabled_t
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 4cd0225b..8f1d2a66 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -1332,77 +1332,6 @@ TEST_BEGIN(test_thread_peak) {
 }
 TEST_END
 
-typedef struct activity_test_data_s activity_test_data_t;
-struct activity_test_data_s {
-	uint64_t obtained_alloc;
-	uint64_t obtained_dalloc;
-};
-
-static void
-activity_test_callback(void *uctx, uint64_t alloc, uint64_t dalloc) {
-	activity_test_data_t *test_data = (activity_test_data_t *)uctx;
-	test_data->obtained_alloc = alloc;
-	test_data->obtained_dalloc = dalloc;
-}
-
-TEST_BEGIN(test_thread_activity_callback) {
-	test_skip_if(!config_stats);
-
-	const size_t big_size = 10 * 1024 * 1024;
-	void        *ptr;
-	int          err;
-	size_t       sz;
-
-	uint64_t *allocatedp;
-	uint64_t *deallocatedp;
-	sz = sizeof(allocatedp);
-	err = mallctl("thread.allocatedp", &allocatedp, &sz, NULL, 0);
-	assert_d_eq(0, err, "");
-	err = mallctl("thread.deallocatedp", &deallocatedp, &sz, NULL, 0);
-	assert_d_eq(0, err, "");
-
-	activity_callback_thunk_t old_thunk = {
-	    (activity_callback_t)111, (void *)222};
-
-	activity_test_data_t      test_data = {333, 444};
-	activity_callback_thunk_t new_thunk = {
-	    &activity_test_callback, &test_data};
-
-	sz = sizeof(old_thunk);
-	err = mallctl("experimental.thread.activity_callback", &old_thunk, &sz,
-	    &new_thunk, sizeof(new_thunk));
-	assert_d_eq(0, err, "");
-
-	expect_true(old_thunk.callback == NULL, "Callback already installed");
-	expect_true(old_thunk.uctx == NULL, "Callback data already installed");
-
-	ptr = mallocx(big_size, 0);
-	expect_u64_eq(test_data.obtained_alloc, *allocatedp, "");
-	expect_u64_eq(test_data.obtained_dalloc, *deallocatedp, "");
-
-	free(ptr);
-	expect_u64_eq(test_data.obtained_alloc, *allocatedp, "");
-	expect_u64_eq(test_data.obtained_dalloc, *deallocatedp, "");
-
-	sz = sizeof(old_thunk);
-	new_thunk = (activity_callback_thunk_t){NULL, NULL};
-	err = mallctl("experimental.thread.activity_callback", &old_thunk, &sz,
-	    &new_thunk, sizeof(new_thunk));
-	assert_d_eq(0, err, "");
-
-	expect_true(old_thunk.callback == &activity_test_callback, "");
-	expect_true(old_thunk.uctx == &test_data, "");
-
-	/* Inserting NULL should have turned off tracking. */
-	test_data.obtained_alloc = 333;
-	test_data.obtained_dalloc = 444;
-	ptr = mallocx(big_size, 0);
-	free(ptr);
-	expect_u64_eq(333, test_data.obtained_alloc, "");
-	expect_u64_eq(444, test_data.obtained_dalloc, "");
-}
-TEST_END
-
 static unsigned nuser_thread_event_cb_calls;
 static void
 user_thread_event_cb(bool is_alloc, uint64_t tallocated, uint64_t tdallocated) {
@@ -1455,5 +1384,5 @@ main(void) {
 	    test_stats_arenas_hpa_shard_counters,
 	    test_stats_arenas_hpa_shard_slabs, test_hooks,
 	    test_hooks_exhaustion, test_thread_idle, test_thread_peak,
-	    test_thread_activity_callback, test_thread_event_hook);
+	    test_thread_event_hook);
 }

From 513778bcb18f7e98073775d2b358674b14f7433f Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Fri, 27 Mar 2026 08:42:54 -0700
Subject: [PATCH 374/395] Fix off-by-one in arenas_bin_i_index and
 arenas_lextent_i_index bounds checks

The index validation used > instead of >=, allowing access at index
SC_NBINS (for bins) and SC_NSIZES-SC_NBINS (for lextents), which are
one past the valid range. This caused out-of-bounds reads in bin_infos[]
and sz_index2size_unsafe().

Add unit tests that verify the boundary indices return ENOENT.
---
 src/ctl.c           |  4 ++--
 test/unit/mallctl.c | 47 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 49 insertions(+), 2 deletions(-)

diff --git a/src/ctl.c b/src/ctl.c
index 89824d6a..bfc12469 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -3250,7 +3250,7 @@ CTL_RO_NL_GEN(arenas_bin_i_slab_size, bin_infos[mib[2]].slab_size, size_t)
 CTL_RO_NL_GEN(arenas_bin_i_nshards, bin_infos[mib[2]].n_shards, uint32_t)
 static const ctl_named_node_t *
 arenas_bin_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) {
-	if (i > SC_NBINS) {
+	if (i >= SC_NBINS) {
 		return NULL;
 	}
 	return super_arenas_bin_i_node;
@@ -3262,7 +3262,7 @@ CTL_RO_NL_GEN(arenas_lextent_i_size,
 static const ctl_named_node_t *
 arenas_lextent_i_index(
     tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) {
-	if (i > SC_NSIZES - SC_NBINS) {
+	if (i >= SC_NSIZES - SC_NBINS) {
 		return NULL;
 	}
 	return super_arenas_lextent_i_node;
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 8f1d2a66..6ab443af 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -956,6 +956,52 @@ TEST_BEGIN(test_arenas_bin_constants) {
 }
 TEST_END
 
+TEST_BEGIN(test_arenas_bin_oob) {
+	size_t sz;
+	size_t result;
+	char   buf[128];
+
+	/*
+	 * Querying the bin at index SC_NBINS should fail because valid
+	 * indices are [0, SC_NBINS).
+	 */
+	sz = sizeof(result);
+	malloc_snprintf(
+	    buf, sizeof(buf), "arenas.bin.%u.size", (unsigned)SC_NBINS);
+	expect_d_eq(mallctl(buf, (void *)&result, &sz, NULL, 0), ENOENT,
+	    "mallctl() should fail for out-of-bounds bin index SC_NBINS");
+
+	/* One below the boundary should succeed. */
+	malloc_snprintf(
+	    buf, sizeof(buf), "arenas.bin.%u.size", (unsigned)(SC_NBINS - 1));
+	expect_d_eq(mallctl(buf, (void *)&result, &sz, NULL, 0), 0,
+	    "mallctl() should succeed for valid bin index SC_NBINS-1");
+}
+TEST_END
+
+TEST_BEGIN(test_arenas_lextent_oob) {
+	size_t   sz;
+	size_t   result;
+	char     buf[128];
+	unsigned nlextents = SC_NSIZES - SC_NBINS;
+
+	/*
+	 * Querying the lextent at index nlextents should fail because valid
+	 * indices are [0, nlextents).
+	 */
+	sz = sizeof(result);
+	malloc_snprintf(buf, sizeof(buf), "arenas.lextent.%u.size", nlextents);
+	expect_d_eq(mallctl(buf, (void *)&result, &sz, NULL, 0), ENOENT,
+	    "mallctl() should fail for out-of-bounds lextent index");
+
+	/* Querying the last element (nlextents - 1) should succeed. */
+	malloc_snprintf(
+	    buf, sizeof(buf), "arenas.lextent.%u.size", nlextents - 1);
+	expect_d_eq(mallctl(buf, (void *)&result, &sz, NULL, 0), 0,
+	    "mallctl() should succeed for valid lextent index");
+}
+TEST_END
+
 TEST_BEGIN(test_arenas_lextent_constants) {
 #define TEST_ARENAS_LEXTENT_CONSTANT(t, name, expected)                        \
 	do {                                                                   \
@@ -1379,6 +1425,7 @@ main(void) {
 	    test_arena_i_dss, test_arena_i_name, test_arena_i_retain_grow_limit,
 	    test_arenas_dirty_decay_ms, test_arenas_muzzy_decay_ms,
 	    test_arenas_constants, test_arenas_bin_constants,
+	    test_arenas_bin_oob, test_arenas_lextent_oob,
 	    test_arenas_lextent_constants, test_arenas_create,
 	    test_arenas_lookup, test_prof_active, test_stats_arenas,
 	    test_stats_arenas_hpa_shard_counters,

From 87f9938de51be77946b02f0ed54cbd32a5ff055b Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Fri, 27 Mar 2026 09:02:32 -0700
Subject: [PATCH 375/395] Fix duplicate "nactive_huge" JSON key in HPA shard
 stats output

In both the full_slabs and empty_slabs JSON sections of HPA shard
stats, "nactive_huge" was emitted twice instead of emitting
"ndirty_huge" as the second entry. This caused ndirty_huge to be
missing from the JSON output entirely.

Add a unit test that verifies both sections contain "ndirty_huge".
---
 src/stats.c            |  4 +--
 test/unit/json_stats.c | 73 +++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 74 insertions(+), 3 deletions(-)

diff --git a/src/stats.c b/src/stats.c
index 22b412bd..d906ade6 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -981,7 +981,7 @@ stats_arena_hpa_shard_slabs_print(emitter_t *emitter, unsigned i) {
 	emitter_json_kv(
 	    emitter, "nactive_huge", emitter_type_size, &nactive_huge);
 	emitter_json_kv(
-	    emitter, "nactive_huge", emitter_type_size, &nactive_huge);
+	    emitter, "ndirty_huge", emitter_type_size, &ndirty_huge);
 	emitter_json_kv(emitter, "npageslabs_nonhuge", emitter_type_size,
 	    &npageslabs_nonhuge);
 	emitter_json_kv(
@@ -1022,7 +1022,7 @@ stats_arena_hpa_shard_slabs_print(emitter_t *emitter, unsigned i) {
 	emitter_json_kv(
 	    emitter, "nactive_huge", emitter_type_size, &nactive_huge);
 	emitter_json_kv(
-	    emitter, "nactive_huge", emitter_type_size, &nactive_huge);
+	    emitter, "ndirty_huge", emitter_type_size, &ndirty_huge);
 	emitter_json_kv(emitter, "npageslabs_nonhuge", emitter_type_size,
 	    &npageslabs_nonhuge);
 	emitter_json_kv(
diff --git a/test/unit/json_stats.c b/test/unit/json_stats.c
index ea8a170b..eee717c7 100644
--- a/test/unit/json_stats.c
+++ b/test/unit/json_stats.c
@@ -237,7 +237,78 @@ TEST_BEGIN(test_json_stats_mutexes) {
 }
 TEST_END
 
+/*
+ * Verify that hpa_shard JSON stats contain "ndirty_huge" key in both
+ * full_slabs and empty_slabs sections.  A previous bug emitted duplicate
+ * "nactive_huge" instead of "ndirty_huge".
+ */
+TEST_BEGIN(test_hpa_shard_json_ndirty_huge) {
+	test_skip_if(!config_stats);
+	test_skip_if(!hpa_supported());
+
+	/* Do some allocation to create HPA state. */
+	void *p = mallocx(PAGE, MALLOCX_TCACHE_NONE);
+	expect_ptr_not_null(p, "Unexpected mallocx failure");
+
+	uint64_t epoch = 1;
+	size_t   sz = sizeof(epoch);
+	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sz), 0,
+	    "Unexpected mallctl() failure");
+
+	stats_buf_t sbuf;
+	stats_buf_init(&sbuf);
+	/* "J" for JSON, include per-arena HPA stats. */
+	malloc_stats_print(stats_buf_write_cb, &sbuf, "J");
+
+	/*
+	 * Find "full_slabs" and check it contains "ndirty_huge".
+	 */
+	const char *full_slabs = strstr(sbuf.buf, "\"full_slabs\"");
+	if (full_slabs != NULL) {
+		const char *empty_slabs = strstr(full_slabs, "\"empty_slabs\"");
+		const char *search_end = empty_slabs != NULL
+		    ? empty_slabs
+		    : sbuf.buf + sbuf.len;
+		/*
+		 * Search for "ndirty_huge" between full_slabs and
+		 * empty_slabs.
+		 */
+		const char *ndirty = full_slabs;
+		bool        found = false;
+		while (ndirty < search_end) {
+			ndirty = strstr(ndirty, "\"ndirty_huge\"");
+			if (ndirty != NULL && ndirty < search_end) {
+				found = true;
+				break;
+			}
+			break;
+		}
+		expect_true(
+		    found, "full_slabs section should contain ndirty_huge key");
+	}
+
+	/*
+	 * Find "empty_slabs" and check it contains "ndirty_huge".
+	 */
+	const char *empty_slabs = strstr(sbuf.buf, "\"empty_slabs\"");
+	if (empty_slabs != NULL) {
+		/* Find the end of the empty_slabs object. */
+		const char *nonfull = strstr(empty_slabs, "\"nonfull_slabs\"");
+		const char *search_end = nonfull != NULL ? nonfull
+		                                         : sbuf.buf + sbuf.len;
+		const char *ndirty = strstr(empty_slabs, "\"ndirty_huge\"");
+		bool        found = (ndirty != NULL && ndirty < search_end);
+		expect_true(found,
+		    "empty_slabs section should contain ndirty_huge key");
+	}
+
+	stats_buf_fini(&sbuf);
+	dallocx(p, MALLOCX_TCACHE_NONE);
+}
+TEST_END
+
 int
 main(void) {
-	return test(test_json_stats_mutexes);
+	return test_no_reentrancy(test_json_stats_mutexes,
+	    test_hpa_shard_json_ndirty_huge);
 }

From a0f2bdf91ddd4e5662790c7cd877052c9009441d Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Fri, 27 Mar 2026 09:57:28 -0700
Subject: [PATCH 376/395] Fix missing negation in large_ralloc_no_move
 usize_min fallback

The second expansion attempt in large_ralloc_no_move omitted the !
before large_ralloc_no_move_expand(), inverting the return value.
On expansion failure, the function falsely reported success, making
callers believe the allocation was expanded in-place when it was not.
On expansion success, the function falsely reported failure, causing
callers to unnecessarily allocate, copy, and free.

Add unit test that verifies the return value matches actual size change.
---
 Makefile.in                      |  1 +
 include/jemalloc/internal/util.h |  3 ++
 src/large.c                      |  2 +-
 test/unit/large_ralloc.c         | 76 ++++++++++++++++++++++++++++++++
 4 files changed, 81 insertions(+), 1 deletion(-)
 create mode 100644 test/unit/large_ralloc.c

diff --git a/Makefile.in b/Makefile.in
index 459f98fb..435fc34d 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -248,6 +248,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/junk_alloc.c \
 	$(srcroot)test/unit/junk_free.c \
 	$(srcroot)test/unit/json_stats.c \
+	$(srcroot)test/unit/large_ralloc.c \
 	$(srcroot)test/unit/log.c \
 	$(srcroot)test/unit/mallctl.c \
 	$(srcroot)test/unit/malloc_conf_2.c \
diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h
index bf246c95..ecfa76b8 100644
--- a/include/jemalloc/internal/util.h
+++ b/include/jemalloc/internal/util.h
@@ -20,6 +20,9 @@
  */
 #define JEMALLOC_ARG_CONCAT(...) __VA_ARGS__
 
+/* Number of elements in a fixed-size array. */
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
 /* cpp macro definition stringification. */
 #define STRINGIFY_HELPER(x) #x
 #define STRINGIFY(x) STRINGIFY_HELPER(x)
diff --git a/src/large.c b/src/large.c
index 56fa16ab..6ccf49d7 100644
--- a/src/large.c
+++ b/src/large.c
@@ -147,7 +147,7 @@ large_ralloc_no_move(tsdn_t *tsdn, edata_t *edata, size_t usize_min,
 		}
 		/* Try again, this time with usize_min. */
 		if (usize_min < usize_max && usize_min > oldusize
-		    && large_ralloc_no_move_expand(
+		    && !large_ralloc_no_move_expand(
 		        tsdn, edata, usize_min, zero)) {
 			arena_decay_tick(tsdn, arena_get_from_edata(edata));
 			return false;
diff --git a/test/unit/large_ralloc.c b/test/unit/large_ralloc.c
new file mode 100644
index 00000000..1f08d125
--- /dev/null
+++ b/test/unit/large_ralloc.c
@@ -0,0 +1,76 @@
+#include "test/jemalloc_test.h"
+
+/*
+ * Test that large_ralloc_no_move causes a failure (returns true) when
+ * in-place extent expansion cannot succeed for either usize_max or
+ * usize_min.
+ *
+ * A previous bug omitted the ! negation on the second extent expansion
+ * attempt (usize_min fallback), causing false success (return false) when
+ * the expansion actually failed.
+ */
+TEST_BEGIN(test_large_ralloc_no_move_expand_fail) {
+	/*
+	 * Allocate two adjacent large objects in the same arena to block
+	 * in-place expansion of the first one.
+	 */
+	unsigned arena_ind;
+	size_t   sz = sizeof(arena_ind);
+	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
+
+	int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
+
+	size_t large_sz = SC_LARGE_MINCLASS;
+	/* Allocate several blocks to prevent expansion of the first. */
+	void *blocks[8];
+	for (size_t i = 0; i < ARRAY_SIZE(blocks); i++) {
+		blocks[i] = mallocx(large_sz, flags);
+		expect_ptr_not_null(blocks[i], "Unexpected mallocx() failure");
+	}
+
+	/*
+	 * Try to expand blocks[0] in place. Use usize_min < usize_max to
+	 * exercise the fallback path.
+	 */
+	tsd_t   *tsd = tsd_fetch();
+	edata_t *edata = emap_edata_lookup(
+	    tsd_tsdn(tsd), &arena_emap_global, blocks[0]);
+	expect_ptr_not_null(edata, "Unexpected edata lookup failure");
+
+	size_t oldusize = edata_usize_get(edata);
+	size_t usize_min = sz_s2u(oldusize + 1);
+	size_t usize_max = sz_s2u(oldusize * 2);
+
+	/* Ensure min and max are in different size classes. */
+	if (usize_min == usize_max) {
+		usize_max = sz_s2u(usize_min + 1);
+	}
+
+	bool ret = large_ralloc_no_move(
+	    tsd_tsdn(tsd), edata, usize_min, usize_max, false);
+
+	/*
+	 * With adjacent allocations blocking expansion, this should fail.
+	 * The bug caused ret == false (success) even when expansion failed.
+	 */
+	if (!ret) {
+		/*
+		 * Expansion might actually succeed if adjacent memory
+		 * is free.  Verify the size actually changed.
+		 */
+		size_t newusize = edata_usize_get(edata);
+		expect_zu_ge(newusize, usize_min,
+		    "Expansion reported success but size didn't change");
+	}
+
+	for (size_t i = 0; i < ARRAY_SIZE(blocks); i++) {
+		dallocx(blocks[i], flags);
+	}
+}
+TEST_END
+
+int
+main(void) {
+	return test_no_reentrancy(test_large_ralloc_no_move_expand_fail);
+}

From eab2b29736a3f499f7be1236950ed9aab57c4267 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Fri, 27 Mar 2026 09:57:34 -0700
Subject: [PATCH 377/395] Fix off-by-one in stats_arenas_i_bins_j and
 stats_arenas_i_lextents_j bounds checks

Same pattern as arenas_bin_i_index: used > instead of >= allowing
access one past the end of bstats[] and lstats[] arrays.

Add unit tests that verify boundary indices return ENOENT.
---
 src/ctl.c           |  4 ++--
 test/unit/mallctl.c | 54 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 56 insertions(+), 2 deletions(-)

diff --git a/src/ctl.c b/src/ctl.c
index bfc12469..0b72086c 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -3998,7 +3998,7 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nonfull_slabs,
 static const ctl_named_node_t *
 stats_arenas_i_bins_j_index(
     tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t j) {
-	if (j > SC_NBINS) {
+	if (j >= SC_NBINS) {
 		return NULL;
 	}
 	return super_stats_arenas_i_bins_j_node;
@@ -4022,7 +4022,7 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_curlextents,
 static const ctl_named_node_t *
 stats_arenas_i_lextents_j_index(
     tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t j) {
-	if (j > SC_NSIZES - SC_NBINS) {
+	if (j >= SC_NSIZES - SC_NBINS) {
 		return NULL;
 	}
 	return super_stats_arenas_i_lextents_j_node;
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 6ab443af..11710c27 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -1002,6 +1002,59 @@ TEST_BEGIN(test_arenas_lextent_oob) {
 }
 TEST_END
 
+TEST_BEGIN(test_stats_arenas_bins_oob) {
+	test_skip_if(!config_stats);
+	size_t   sz;
+	uint64_t result;
+	char     buf[128];
+
+	uint64_t epoch = 1;
+	sz = sizeof(epoch);
+	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sz), 0,
+	    "Unexpected mallctl() failure");
+
+	/* SC_NBINS is one past the valid range. */
+	sz = sizeof(result);
+	malloc_snprintf(buf, sizeof(buf), "stats.arenas.0.bins.%u.nmalloc",
+	    (unsigned)SC_NBINS);
+	expect_d_eq(mallctl(buf, (void *)&result, &sz, NULL, 0), ENOENT,
+	    "mallctl() should fail for out-of-bounds stats bin index");
+
+	/* SC_NBINS - 1 is valid. */
+	malloc_snprintf(buf, sizeof(buf), "stats.arenas.0.bins.%u.nmalloc",
+	    (unsigned)(SC_NBINS - 1));
+	expect_d_eq(mallctl(buf, (void *)&result, &sz, NULL, 0), 0,
+	    "mallctl() should succeed for valid stats bin index");
+}
+TEST_END
+
+TEST_BEGIN(test_stats_arenas_lextents_oob) {
+	test_skip_if(!config_stats);
+	size_t   sz;
+	uint64_t result;
+	char     buf[128];
+	unsigned nlextents = SC_NSIZES - SC_NBINS;
+
+	uint64_t epoch = 1;
+	sz = sizeof(epoch);
+	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sz), 0,
+	    "Unexpected mallctl() failure");
+
+	/* nlextents is one past the valid range. */
+	sz = sizeof(result);
+	malloc_snprintf(
+	    buf, sizeof(buf), "stats.arenas.0.lextents.%u.nmalloc", nlextents);
+	expect_d_eq(mallctl(buf, (void *)&result, &sz, NULL, 0), ENOENT,
+	    "mallctl() should fail for out-of-bounds stats lextent index");
+
+	/* nlextents - 1 is valid. */
+	malloc_snprintf(buf, sizeof(buf), "stats.arenas.0.lextents.%u.nmalloc",
+	    nlextents - 1);
+	expect_d_eq(mallctl(buf, (void *)&result, &sz, NULL, 0), 0,
+	    "mallctl() should succeed for valid stats lextent index");
+}
+TEST_END
+
 TEST_BEGIN(test_arenas_lextent_constants) {
 #define TEST_ARENAS_LEXTENT_CONSTANT(t, name, expected)                        \
 	do {                                                                   \
@@ -1426,6 +1479,7 @@ main(void) {
 	    test_arenas_dirty_decay_ms, test_arenas_muzzy_decay_ms,
 	    test_arenas_constants, test_arenas_bin_constants,
 	    test_arenas_bin_oob, test_arenas_lextent_oob,
+	    test_stats_arenas_bins_oob, test_stats_arenas_lextents_oob,
 	    test_arenas_lextent_constants, test_arenas_create,
 	    test_arenas_lookup, test_prof_active, test_stats_arenas,
 	    test_stats_arenas_hpa_shard_counters,

From c2d57040f0d281449febb9bb80287e63bfb271fe Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Fri, 27 Mar 2026 10:02:59 -0700
Subject: [PATCH 378/395] Fix out-of-bounds write in malloc_vsnprintf when size
 is 0

When called with size==0, the else branch wrote to str[size-1] which
is str[(size_t)-1], a massive out-of-bounds write. Standard vsnprintf
allows size==0 to mean "compute length only, write nothing".

Add unit test for the size==0 case.
---
 src/malloc_io.c       |  2 +-
 test/unit/malloc_io.c | 20 +++++++++++++++++++-
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/src/malloc_io.c b/src/malloc_io.c
index 779cdc05..9716c668 100644
--- a/src/malloc_io.c
+++ b/src/malloc_io.c
@@ -692,7 +692,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
 label_out:
 	if (i < size) {
 		str[i] = '\0';
-	} else {
+	} else if (size != 0) {
 		str[size - 1] = '\0';
 	}
 
diff --git a/test/unit/malloc_io.c b/test/unit/malloc_io.c
index f7895945..ee744a78 100644
--- a/test/unit/malloc_io.c
+++ b/test/unit/malloc_io.c
@@ -252,8 +252,26 @@ TEST_BEGIN(test_malloc_snprintf) {
 }
 TEST_END
 
+TEST_BEGIN(test_malloc_snprintf_zero_size) {
+	char   buf[8];
+	size_t result;
+
+	/*
+	 * malloc_snprintf with size==0 should not write anything but should
+	 * return the length that would have been written.  A previous bug
+	 * caused an out-of-bounds write via str[size - 1] when size was 0.
+	 */
+	memset(buf, 'X', sizeof(buf));
+	result = malloc_snprintf(buf, 0, "%s", "hello");
+	expect_zu_eq(result, 5, "Expected length 5 for \"hello\"");
+	/* buf should be untouched. */
+	expect_c_eq(buf[0], 'X', "Buffer should not have been modified");
+}
+TEST_END
+
 int
 main(void) {
 	return test(test_malloc_strtoumax_no_endptr, test_malloc_strtoumax,
-	    test_malloc_snprintf_truncated, test_malloc_snprintf);
+	    test_malloc_snprintf_truncated, test_malloc_snprintf,
+	    test_malloc_snprintf_zero_size);
 }

From 3a8bee81f18bd241ba571a6a77c940c8f8cfcfb1 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Fri, 27 Mar 2026 10:03:06 -0700
Subject: [PATCH 379/395] Fix pac_mapped stats inflation on allocation failure

newly_mapped_size was set unconditionally in the ecache_alloc_grow
fallback path, even when the allocation returned NULL. This inflated
pac_mapped stats without a corresponding deallocation to correct them.

Guard the assignment with an edata != NULL check, matching the pattern
used in the batched allocation path above it.
---
 src/pac.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/pac.c b/src/pac.c
index 86001139..ed0f77c2 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -198,7 +198,9 @@ pac_alloc_real(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size,
 		edata = ecache_alloc_grow(tsdn, pac, ehooks,
 		    &pac->ecache_retained, NULL, size, alignment, zero,
 		    guarded);
-		newly_mapped_size = size;
+		if (edata != NULL) {
+			newly_mapped_size = size;
+		}
 	}
 
 	if (config_stats && newly_mapped_size != 0) {

From dd30c91eaaf02e5f347e37a49f99eae670b94c88 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Fri, 27 Mar 2026 10:04:28 -0700
Subject: [PATCH 380/395] Fix wrong fallback value in os_page_detect when
 sysconf fails

Returned LG_PAGE (log2 of page size, e.g. 12) instead of PAGE (actual
page size, e.g. 4096) when sysconf(_SC_PAGESIZE) failed. This would
cause os_page to be set to an absurdly small value, breaking all
page-aligned operations.
---
 src/pages.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/pages.c b/src/pages.c
index 2a4f0093..5c12ae42 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -718,7 +718,7 @@ os_page_detect(void) {
 #else
 	long result = sysconf(_SC_PAGESIZE);
 	if (result == -1) {
-		return LG_PAGE;
+		return PAGE;
 	}
 	return (size_t)result;
 #endif

From 3f6e63e86a193e8a4d685480165812cac6d2350f Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Fri, 27 Mar 2026 10:04:36 -0700
Subject: [PATCH 381/395] Fix wrong type for malloc_read_fd return value in
 prof_stack_range

Used size_t (unsigned) instead of ssize_t for the return value of
malloc_read_fd, which returns -1 on error. With size_t, -1 becomes
a huge positive value, bypassing the error check and corrupting the
remaining byte count.
---
 src/prof_stack_range.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/prof_stack_range.c b/src/prof_stack_range.c
index b167b132..8ebcab8e 100644
--- a/src/prof_stack_range.c
+++ b/src/prof_stack_range.c
@@ -73,17 +73,21 @@ prof_mapping_containing_addr(uintptr_t addr, const char *maps_path,
 			}
 
 			remaining = malloc_read_fd(fd, buf, sizeof(buf));
-			if (remaining <= 0) {
+			if (remaining < 0) {
 				ret = errno;
 				break;
+			} else if (remaining == 0) {
+				break;
 			}
 			line = buf;
 		} else if (line == NULL) {
 			/* case 1: no newline found in buf */
 			remaining = malloc_read_fd(fd, buf, sizeof(buf));
-			if (remaining <= 0) {
+			if (remaining < 0) {
 				ret = errno;
 				break;
+			} else if (remaining == 0) {
+				break;
 			}
 			line = memchr(buf, '\n', remaining);
 			if (line != NULL) {
@@ -99,11 +103,13 @@ prof_mapping_containing_addr(uintptr_t addr, const char *maps_path,
 			    remaining); /* copy remaining characters to start of buf */
 			line = buf;
 
-			size_t count = malloc_read_fd(
+			ssize_t count = malloc_read_fd(
 			    fd, buf + remaining, sizeof(buf) - remaining);
-			if (count <= 0) {
+			if (count < 0) {
 				ret = errno;
 				break;
+			} else if (count == 0) {
+				break;
 			}
 
 			remaining +=

From 675ab079e7e6f08a74727ec53569ec2db578d515 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Fri, 27 Mar 2026 10:04:43 -0700
Subject: [PATCH 382/395] Fix missing release of acquired neighbor edata in
 extent_try_coalesce_impl

When emap_try_acquire_edata_neighbor returned a non-NULL neighbor but
the size check failed, the neighbor was never released from
extent_state_merging, making it permanently invisible to future
allocation and coalescing operations.

Release the neighbor when it doesn't meet the size requirement,
matching the pattern used in extent_recycle_extract.
---
 src/extent.c   | 44 +++++++++++++++++++++++++++-----------------
 test/unit/pa.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 73 insertions(+), 18 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index 4b927191..118c8785 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -916,15 +916,20 @@ extent_try_coalesce_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		size_t   max_next_neighbor = max_size > edata_size_get(edata)
 		      ? max_size - edata_size_get(edata)
 		      : 0;
-		if (next != NULL && edata_size_get(next) <= max_next_neighbor) {
-			if (!extent_coalesce(
-			        tsdn, pac, ehooks, ecache, edata, next, true)) {
-				if (ecache->delay_coalesce) {
-					/* Do minimal coalescing. */
-					*coalesced = true;
-					return edata;
+		if (next != NULL) {
+			if (edata_size_get(next) > max_next_neighbor) {
+				emap_release_edata(
+				    tsdn, pac->emap, next, ecache->state);
+			} else {
+				if (!extent_coalesce(tsdn, pac, ehooks, ecache,
+				        edata, next, true)) {
+					if (ecache->delay_coalesce) {
+						/* Do minimal coalescing. */
+						*coalesced = true;
+						return edata;
+					}
+					again = true;
 				}
-				again = true;
 			}
 		}
 
@@ -934,16 +939,21 @@ extent_try_coalesce_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		size_t   max_prev_neighbor = max_size > edata_size_get(edata)
 		      ? max_size - edata_size_get(edata)
 		      : 0;
-		if (prev != NULL && edata_size_get(prev) <= max_prev_neighbor) {
-			if (!extent_coalesce(tsdn, pac, ehooks, ecache, edata,
-			        prev, false)) {
-				edata = prev;
-				if (ecache->delay_coalesce) {
-					/* Do minimal coalescing. */
-					*coalesced = true;
-					return edata;
+		if (prev != NULL) {
+			if (edata_size_get(prev) > max_prev_neighbor) {
+				emap_release_edata(
+				    tsdn, pac->emap, prev, ecache->state);
+			} else {
+				if (!extent_coalesce(tsdn, pac, ehooks, ecache,
+				        edata, prev, false)) {
+					edata = prev;
+					if (ecache->delay_coalesce) {
+						/* Do minimal coalescing. */
+						*coalesced = true;
+						return edata;
+					}
+					again = true;
 				}
-				again = true;
 			}
 		}
 	} while (again);
diff --git a/test/unit/pa.c b/test/unit/pa.c
index 8552225f..c1562d7b 100644
--- a/test/unit/pa.c
+++ b/test/unit/pa.c
@@ -121,7 +121,52 @@ TEST_BEGIN(test_alloc_free_purge_thds) {
 }
 TEST_END
 
+TEST_BEGIN(test_failed_coalesce_releases_neighbor) {
+	test_skip_if(!maps_coalesce);
+
+	test_data_t *test_data = init_test_data(-1, -1);
+	size_t old_lg_extent_max_active_fit = opt_lg_extent_max_active_fit;
+	opt_lg_extent_max_active_fit = 0;
+
+	bool     deferred_work_generated = false;
+	size_t   unit = SC_LARGE_MINCLASS;
+	size_t   alloc_size = 4 * unit;
+	edata_t *edata = pa_alloc(TSDN_NULL, &test_data->shard, alloc_size,
+	    PAGE,
+	    /* slab */ false, sz_size2index(alloc_size), /* zero */ false,
+	    /* guarded */ false, &deferred_work_generated);
+	expect_ptr_not_null(edata, "Unexpected pa_alloc() failure");
+
+	void *tail_addr = (void *)((uintptr_t)edata_base_get(edata) + unit);
+	expect_false(pa_shrink(TSDN_NULL, &test_data->shard, edata, alloc_size,
+	                 unit, sz_size2index(unit), &deferred_work_generated),
+	    "Unexpected pa_shrink() failure");
+
+	edata_t *tail = emap_edata_lookup(
+	    TSDN_NULL, &test_data->emap, tail_addr);
+	expect_ptr_not_null(tail, "Expected dirty tail extent after shrink");
+	expect_ptr_eq(
+	    edata_base_get(tail), tail_addr, "Unexpected tail extent address");
+	expect_zu_eq(
+	    edata_size_get(tail), 3 * unit, "Unexpected tail extent size");
+	expect_d_eq(edata_state_get(tail), extent_state_dirty,
+	    "Expected tail extent to start dirty");
+
+	pa_dalloc(
+	    TSDN_NULL, &test_data->shard, edata, &deferred_work_generated);
+
+	tail = emap_edata_lookup(TSDN_NULL, &test_data->emap, tail_addr);
+	expect_ptr_not_null(
+	    tail, "Expected oversized dirty neighbor to remain discoverable");
+	expect_d_eq(edata_state_get(tail), extent_state_dirty,
+	    "Failed coalesce must release oversized dirty neighbor");
+
+	opt_lg_extent_max_active_fit = old_lg_extent_max_active_fit;
+}
+TEST_END
+
 int
 main(void) {
-	return test(test_alloc_free_purge_thds);
+	return test(
+	    test_alloc_free_purge_thds, test_failed_coalesce_releases_neighbor);
 }

From 234404d324458d4404ef382742741cb4ffbcf921 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Fri, 27 Mar 2026 10:06:15 -0700
Subject: [PATCH 383/395] Fix wrong loop variable for array index in
 sz_boot_pind2sz_tab

The sentinel fill loop used sz_pind2sz_tab[pind] (constant) instead
of sz_pind2sz_tab[i] (loop variable), writing only to the first
entry repeatedly and leaving subsequent entries uninitialized.
---
 src/sz.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/sz.c b/src/sz.c
index 4a4c057d..da92f2b4 100644
--- a/src/sz.c
+++ b/src/sz.c
@@ -65,7 +65,7 @@ sz_boot_pind2sz_tab(const sc_data_t *sc_data) {
 		}
 	}
 	for (int i = pind; i <= (int)SC_NPSIZES; i++) {
-		sz_pind2sz_tab[pind] = sc_data->large_maxclass + PAGE;
+		sz_pind2sz_tab[i] = sc_data->large_maxclass + PAGE;
 	}
 }
 
@@ -93,7 +93,7 @@ sz_boot_size2index_tab(const sc_data_t *sc_data) {
 	size_t dst_max = (SC_LOOKUP_MAXCLASS >> SC_LG_TINY_MIN) + 1;
 	size_t dst_ind = 0;
 	for (unsigned sc_ind = 0; sc_ind < SC_NSIZES && dst_ind < dst_max;
-	     sc_ind++) {
+	    sc_ind++) {
 		const sc_t *sc = &sc_data->sc[sc_ind];
 		size_t      sz = (ZU(1) << sc->lg_base)
 		    + (ZU(sc->ndelta) << sc->lg_delta);

From 2fceece256c0a01a28743652ce3e5cc67723e453 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Fri, 27 Mar 2026 10:06:22 -0700
Subject: [PATCH 384/395] Fix extra size argument in edata_init call in
 extent_alloc_dss

An extra 'size' argument was passed where 'slab' (false) should be,
shifting all subsequent arguments: slab got size (nonzero=true),
szind got false (0), and sn got SC_NSIZES instead of a proper serial
number from extent_sn_next(). Match the correct pattern used by the
gap edata_init call above.
---
 src/extent_dss.c | 24 +++++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/src/extent_dss.c b/src/extent_dss.c
index 3f7a15d0..c7c34207 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -153,11 +153,14 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 			    - (uintptr_t)gap_addr_page;
 			if (gap_size_page != 0) {
 				edata_init(gap, arena_ind_get(arena),
-				    gap_addr_page, gap_size_page, false,
-				    SC_NSIZES,
+				    gap_addr_page, gap_size_page,
+				    /* slab */ false,
+				    /* szind */ SC_NSIZES,
 				    extent_sn_next(&arena->pa_shard.pac),
-				    extent_state_active, false, true,
-				    EXTENT_PAI_PAC, head_state);
+				    extent_state_active,
+				    /* zeroed */ false,
+				    /* committed */ true,
+				    /* pai */ EXTENT_PAI_PAC, head_state);
 			}
 			/*
 			 * Compute the address just past the end of the desired
@@ -203,9 +206,16 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 					    arena);
 
 					edata_init(&edata, arena_ind_get(arena),
-					    ret, size, size, false, SC_NSIZES,
-					    extent_state_active, false, true,
-					    EXTENT_PAI_PAC, head_state);
+					    ret, size,
+					    /* slab */ false,
+					    /* szind */ SC_NSIZES,
+					    extent_sn_next(
+					        &arena->pa_shard.pac),
+					    extent_state_active,
+					    /* zeroed */ false,
+					    /* committed */ true,
+					    /* pai */ EXTENT_PAI_PAC,
+					    head_state);
 					if (extent_purge_forced_wrapper(tsdn,
 					        ehooks, &edata, 0, size)) {
 						memset(ret, 0, size);

From 5904a421878b31d6a5ec674027b35db63e64537d Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Fri, 27 Mar 2026 10:07:05 -0700
Subject: [PATCH 385/395] Fix memory leak of old curr_reg on
 san_bump_grow_locked failure

When san_bump_grow_locked fails, it sets sba->curr_reg to NULL.
The old curr_reg (saved in to_destroy) was never freed or restored,
leaking the virtual memory extent. Restore sba->curr_reg from
to_destroy on failure so the old region remains usable.
---
 src/san_bump.c       |  1 +
 test/unit/san_bump.c | 89 +++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 89 insertions(+), 1 deletion(-)

diff --git a/src/san_bump.c b/src/san_bump.c
index 09ed18ca..11031290 100644
--- a/src/san_bump.c
+++ b/src/san_bump.c
@@ -31,6 +31,7 @@ san_bump_alloc(tsdn_t *tsdn, san_bump_alloc_t *sba, pac_t *pac,
 		bool err = san_bump_grow_locked(
 		    tsdn, sba, pac, ehooks, guarded_size);
 		if (err) {
+			sba->curr_reg = to_destroy;
 			goto label_err;
 		}
 	} else {
diff --git a/test/unit/san_bump.c b/test/unit/san_bump.c
index 9aa0210e..54d8583d 100644
--- a/test/unit/san_bump.c
+++ b/test/unit/san_bump.c
@@ -4,6 +4,50 @@
 #include "jemalloc/internal/arena_structs.h"
 #include "jemalloc/internal/san_bump.h"
 
+static extent_hooks_t *san_bump_default_hooks;
+static extent_hooks_t  san_bump_hooks;
+static bool            fail_retained_alloc;
+static unsigned        retained_alloc_fail_calls;
+
+static void *
+san_bump_fail_alloc_hook(extent_hooks_t *UNUSED extent_hooks, void *new_addr,
+    size_t size, size_t alignment, bool *zero, bool *commit,
+    unsigned arena_ind) {
+	if (fail_retained_alloc && new_addr == NULL
+	    && size >= SBA_RETAINED_ALLOC_SIZE) {
+		retained_alloc_fail_calls++;
+		return NULL;
+	}
+	return san_bump_default_hooks->alloc(san_bump_default_hooks, new_addr,
+	    size, alignment, zero, commit, arena_ind);
+}
+
+static void
+install_san_bump_fail_alloc_hooks(unsigned arena_ind) {
+	size_t          hooks_mib[3];
+	size_t          hooks_miblen = sizeof(hooks_mib) / sizeof(size_t);
+	size_t          old_size = sizeof(extent_hooks_t *);
+	size_t          new_size = sizeof(extent_hooks_t *);
+	extent_hooks_t *new_hooks;
+	extent_hooks_t *old_hooks;
+
+	expect_d_eq(
+	    mallctlnametomib("arena.0.extent_hooks", hooks_mib, &hooks_miblen),
+	    0, "Unexpected mallctlnametomib() failure");
+	hooks_mib[1] = (size_t)arena_ind;
+	expect_d_eq(mallctlbymib(hooks_mib, hooks_miblen, (void *)&old_hooks,
+	                &old_size, NULL, 0),
+	    0, "Unexpected extent_hooks error");
+
+	san_bump_default_hooks = old_hooks;
+	san_bump_hooks = *old_hooks;
+	san_bump_hooks.alloc = san_bump_fail_alloc_hook;
+	new_hooks = &san_bump_hooks;
+	expect_d_eq(mallctlbymib(hooks_mib, hooks_miblen, NULL, NULL,
+	                (void *)&new_hooks, new_size),
+	    0, "Unexpected extent_hooks install failure");
+}
+
 TEST_BEGIN(test_san_bump_alloc) {
 	test_skip_if(!maps_coalesce || !opt_retain);
 
@@ -69,6 +113,48 @@ TEST_BEGIN(test_san_bump_alloc) {
 }
 TEST_END
 
+TEST_BEGIN(test_failed_grow_preserves_curr_reg) {
+	test_skip_if(!maps_coalesce || !opt_retain);
+
+	tsdn_t *tsdn = tsdn_fetch();
+
+	san_bump_alloc_t sba;
+	san_bump_alloc_init(&sba);
+
+	unsigned arena_ind = do_arena_create(0, 0);
+	assert_u_ne(arena_ind, UINT_MAX, "Failed to create an arena");
+	install_san_bump_fail_alloc_hooks(arena_ind);
+
+	arena_t *arena = arena_get(tsdn, arena_ind, false);
+	pac_t   *pac = &arena->pa_shard.pac;
+
+	size_t   small_alloc_size = PAGE * 16;
+	edata_t *edata = san_bump_alloc(tsdn, &sba, pac, pac_ehooks_get(pac),
+	    small_alloc_size, /* zero */ false);
+	expect_ptr_not_null(edata, "Initial san_bump allocation failed");
+	expect_ptr_not_null(sba.curr_reg,
+	    "Expected retained region remainder after initial allocation");
+
+	fail_retained_alloc = true;
+	retained_alloc_fail_calls = 0;
+
+	edata_t *failed = san_bump_alloc(tsdn, &sba, pac, pac_ehooks_get(pac),
+	    SBA_RETAINED_ALLOC_SIZE, /* zero */ false);
+	expect_ptr_null(failed, "Expected retained grow allocation failure");
+	expect_u_eq(retained_alloc_fail_calls, 1,
+	    "Expected exactly one failed retained allocation attempt");
+
+	edata_t *reused = san_bump_alloc(tsdn, &sba, pac, pac_ehooks_get(pac),
+	    small_alloc_size, /* zero */ false);
+	expect_ptr_not_null(
+	    reused, "Expected allocator to reuse preexisting current region");
+	expect_u_eq(retained_alloc_fail_calls, 1,
+	    "Reuse path should not attempt another retained grow allocation");
+
+	fail_retained_alloc = false;
+}
+TEST_END
+
 TEST_BEGIN(test_large_alloc_size) {
 	test_skip_if(!maps_coalesce || !opt_retain);
 
@@ -105,5 +191,6 @@ TEST_END
 
 int
 main(void) {
-	return test(test_san_bump_alloc, test_large_alloc_size);
+	return test(test_san_bump_alloc, test_failed_grow_preserves_curr_reg,
+	    test_large_alloc_size);
 }

From 3ac9f96158f3b095496e260259a3c32857eafd28 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Tue, 31 Mar 2026 20:33:49 -0700
Subject: [PATCH 386/395] Run clang-format on test/unit/conf_parse.c

---
 test/unit/conf_parse.c | 58 ++++++++++++++++++++----------------------
 1 file changed, 27 insertions(+), 31 deletions(-)

diff --git a/test/unit/conf_parse.c b/test/unit/conf_parse.c
index b3fedb40..eb107865 100644
--- a/test/unit/conf_parse.c
+++ b/test/unit/conf_parse.c
@@ -27,8 +27,8 @@ TEST_END
 
 TEST_BEGIN(test_conf_handle_unsigned_in_range) {
 	uintmax_t result = 0;
-	bool err = conf_handle_unsigned("100", sizeof("100") - 1,
-	    1, 2048, true, true, true, &result);
+	bool      err = conf_handle_unsigned(
+            "100", sizeof("100") - 1, 1, 2048, true, true, true, &result);
 	expect_false(err, "Should succeed for in-range value");
 	expect_u64_eq((uint64_t)result, 100, "result should be 100");
 }
@@ -36,44 +36,43 @@ TEST_END
 
 TEST_BEGIN(test_conf_handle_unsigned_clip_max) {
 	uintmax_t result = 0;
-	bool err = conf_handle_unsigned("9999", sizeof("9999") - 1,
-	    1, 2048, true, true, true, &result);
+	bool      err = conf_handle_unsigned(
+            "9999", sizeof("9999") - 1, 1, 2048, true, true, true, &result);
 	expect_false(err, "Should succeed with clipping");
-	expect_u64_eq((uint64_t)result, 2048,
-	    "result should be clipped to max 2048");
+	expect_u64_eq(
+	    (uint64_t)result, 2048, "result should be clipped to max 2048");
 }
 TEST_END
 
 TEST_BEGIN(test_conf_handle_unsigned_clip_min) {
 	uintmax_t result = 0;
-	bool err = conf_handle_unsigned("0", sizeof("0") - 1,
-	    1, 2048, true, true, true, &result);
+	bool      err = conf_handle_unsigned(
+            "0", sizeof("0") - 1, 1, 2048, true, true, true, &result);
 	expect_false(err, "Should succeed with clipping");
-	expect_u64_eq((uint64_t)result, 1,
-	    "result should be clipped to min 1");
+	expect_u64_eq((uint64_t)result, 1, "result should be clipped to min 1");
 }
 TEST_END
 
 TEST_BEGIN(test_conf_handle_unsigned_no_clip_reject) {
 	uintmax_t result = 0;
-	bool err = conf_handle_unsigned("9999", sizeof("9999") - 1,
-	    1, 2048, true, true, false, &result);
+	bool      err = conf_handle_unsigned(
+            "9999", sizeof("9999") - 1, 1, 2048, true, true, false, &result);
 	expect_true(err, "Should fail for out-of-range value without clip");
 }
 TEST_END
 
 TEST_BEGIN(test_conf_handle_unsigned_invalid) {
 	uintmax_t result = 0;
-	bool err = conf_handle_unsigned("abc", sizeof("abc") - 1,
-	    1, 2048, true, true, true, &result);
+	bool      err = conf_handle_unsigned(
+            "abc", sizeof("abc") - 1, 1, 2048, true, true, true, &result);
 	expect_true(err, "Should fail for non-numeric input");
 }
 TEST_END
 
 TEST_BEGIN(test_conf_handle_signed_valid) {
 	intmax_t result = 0;
-	bool err = conf_handle_signed("5000", sizeof("5000") - 1,
-	    -1, INTMAX_MAX, true, false, false, &result);
+	bool     err = conf_handle_signed("5000", sizeof("5000") - 1, -1,
+	        INTMAX_MAX, true, false, false, &result);
 	expect_false(err, "Should succeed for valid value");
 	expect_d64_eq((int64_t)result, 5000, "result should be 5000");
 }
@@ -81,8 +80,8 @@ TEST_END
 
 TEST_BEGIN(test_conf_handle_signed_negative) {
 	intmax_t result = 0;
-	bool err = conf_handle_signed("-1", sizeof("-1") - 1,
-	    -1, INTMAX_MAX, true, false, false, &result);
+	bool err = conf_handle_signed("-1", sizeof("-1") - 1, -1, INTMAX_MAX,
+	    true, false, false, &result);
 	expect_false(err, "Should succeed for -1");
 	expect_d64_eq((int64_t)result, -1, "result should be -1");
 }
@@ -90,8 +89,8 @@ TEST_END
 
 TEST_BEGIN(test_conf_handle_signed_out_of_range) {
 	intmax_t result = 0;
-	bool err = conf_handle_signed("5000", sizeof("5000") - 1,
-	    -1, 4999, true, true, false, &result);
+	bool     err = conf_handle_signed(
+            "5000", sizeof("5000") - 1, -1, 4999, true, true, false, &result);
 	expect_true(err, "Should fail for out-of-range value");
 }
 TEST_END
@@ -101,13 +100,14 @@ TEST_BEGIN(test_conf_handle_char_p) {
 	bool err;
 
 	/* Normal copy. */
-	err = conf_handle_char_p("hello", sizeof("hello") - 1, buf, sizeof(buf));
+	err = conf_handle_char_p(
+	    "hello", sizeof("hello") - 1, buf, sizeof(buf));
 	expect_false(err, "Should succeed");
 	expect_str_eq(buf, "hello", "Should copy string");
 
 	/* Truncation. */
-	err = conf_handle_char_p("longstring", sizeof("longstring") - 1,
-	    buf, sizeof(buf));
+	err = conf_handle_char_p(
+	    "longstring", sizeof("longstring") - 1, buf, sizeof(buf));
 	expect_false(err, "Should succeed even when truncating");
 	expect_str_eq(buf, "longstr", "Should truncate to dest_sz - 1");
 }
@@ -115,16 +115,12 @@ TEST_END
 
 int
 main(void) {
-	return test(test_conf_handle_bool_true,
-	    test_conf_handle_bool_false,
-	    test_conf_handle_bool_invalid,
-	    test_conf_handle_unsigned_in_range,
+	return test(test_conf_handle_bool_true, test_conf_handle_bool_false,
+	    test_conf_handle_bool_invalid, test_conf_handle_unsigned_in_range,
 	    test_conf_handle_unsigned_clip_max,
 	    test_conf_handle_unsigned_clip_min,
 	    test_conf_handle_unsigned_no_clip_reject,
-	    test_conf_handle_unsigned_invalid,
-	    test_conf_handle_signed_valid,
+	    test_conf_handle_unsigned_invalid, test_conf_handle_signed_valid,
 	    test_conf_handle_signed_negative,
-	    test_conf_handle_signed_out_of_range,
-	    test_conf_handle_char_p);
+	    test_conf_handle_signed_out_of_range, test_conf_handle_char_p);
 }

From b507644cb084d095917aea6e2573c702caff3e5a Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Fri, 27 Mar 2026 12:27:00 -0700
Subject: [PATCH 387/395] Fix conf_handle_char_p zero-sized dest and remove
 unused conf_handle_unsigned

---
 include/jemalloc/internal/conf.h | 16 +++-----
 src/conf.c                       | 41 ++++-----------------
 test/unit/conf_parse.c           | 63 +++++++-------------------------
 3 files changed, 27 insertions(+), 93 deletions(-)

diff --git a/include/jemalloc/internal/conf.h b/include/jemalloc/internal/conf.h
index 21661955..26983ee9 100644
--- a/include/jemalloc/internal/conf.h
+++ b/include/jemalloc/internal/conf.h
@@ -9,19 +9,15 @@ void malloc_abort_invalid_conf(void);
 
 #ifdef JEMALLOC_JET
 extern bool had_conf_error;
+
 bool conf_next(char const **opts_p, char const **k_p, size_t *klen_p,
     char const **v_p, size_t *vlen_p);
-void conf_error(const char *msg, const char *k, size_t klen,
-    const char *v, size_t vlen);
+void conf_error(
+    const char *msg, const char *k, size_t klen, const char *v, size_t vlen);
 bool conf_handle_bool(const char *v, size_t vlen, bool *result);
-bool conf_handle_unsigned(const char *v, size_t vlen,
-    uintmax_t min, uintmax_t max, bool check_min, bool check_max,
-    bool clip, uintmax_t *result);
-bool conf_handle_signed(const char *v, size_t vlen,
-    intmax_t min, intmax_t max, bool check_min, bool check_max,
-    bool clip, intmax_t *result);
-bool conf_handle_char_p(const char *v, size_t vlen,
-    char *dest, size_t dest_sz);
+bool conf_handle_signed(const char *v, size_t vlen, intmax_t min, intmax_t max,
+    bool check_min, bool check_max, bool clip, intmax_t *result);
+bool conf_handle_char_p(const char *v, size_t vlen, char *dest, size_t dest_sz);
 #endif
 
 #endif /* JEMALLOC_INTERNAL_CONF_H */
diff --git a/src/conf.c b/src/conf.c
index 8a23bda6..65abcd25 100644
--- a/src/conf.c
+++ b/src/conf.c
@@ -254,36 +254,8 @@ JEMALLOC_DIAGNOSTIC_PUSH
 JEMALLOC_DIAGNOSTIC_IGNORE("-Wunused-function")
 
 JET_EXTERN bool
-conf_handle_unsigned(const char *v, size_t vlen,
-    uintmax_t min, uintmax_t max, bool check_min, bool check_max,
-    bool clip, uintmax_t *result) {
-	char *end;
-	set_errno(0);
-	uintmax_t mv = (uintmax_t)malloc_strtoumax(v, &end, 0);
-	if (get_errno() != 0 || (uintptr_t)end - (uintptr_t)v != vlen) {
-		return true;
-	}
-	if (clip) {
-		if (check_min && mv < min) {
-			*result = min;
-		} else if (check_max && mv > max) {
-			*result = max;
-		} else {
-			*result = mv;
-		}
-	} else {
-		if ((check_min && mv < min) || (check_max && mv > max)) {
-			return true;
-		}
-		*result = mv;
-	}
-	return false;
-}
-
-JET_EXTERN bool
-conf_handle_signed(const char *v, size_t vlen,
-    intmax_t min, intmax_t max, bool check_min, bool check_max,
-    bool clip, intmax_t *result) {
+conf_handle_signed(const char *v, size_t vlen, intmax_t min, intmax_t max,
+    bool check_min, bool check_max, bool clip, intmax_t *result) {
 	char *end;
 	set_errno(0);
 	intmax_t mv = (intmax_t)malloc_strtoumax(v, &end, 0);
@@ -309,6 +281,9 @@ conf_handle_signed(const char *v, size_t vlen,
 
 JET_EXTERN bool
 conf_handle_char_p(const char *v, size_t vlen, char *dest, size_t dest_sz) {
+	if (dest_sz == 0) {
+		return false;
+	}
 	size_t cpylen = (vlen <= dest_sz - 1) ? vlen : dest_sz - 1;
 	strncpy(dest, v, cpylen);
 	dest[cpylen] = '\0';
@@ -473,11 +448,11 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			continue;
 		}
 
-		while (*opts != '\0'
-		    && !conf_next(&opts, &k, &klen, &v, &vlen)) {
+		while (
+		    *opts != '\0' && !conf_next(&opts, &k, &klen, &v, &vlen)) {
 #define CONF_ERROR(msg, k, klen, v, vlen)                                      \
 	if (!initial_call) {                                                   \
-		conf_error(msg, k, klen, v, vlen);                      \
+		conf_error(msg, k, klen, v, vlen);                             \
 		cur_opt_valid = false;                                         \
 	}
 #define CONF_CONTINUE                                                          \
diff --git a/test/unit/conf_parse.c b/test/unit/conf_parse.c
index eb107865..448cc84a 100644
--- a/test/unit/conf_parse.c
+++ b/test/unit/conf_parse.c
@@ -25,50 +25,6 @@ TEST_BEGIN(test_conf_handle_bool_invalid) {
 }
 TEST_END
 
-TEST_BEGIN(test_conf_handle_unsigned_in_range) {
-	uintmax_t result = 0;
-	bool      err = conf_handle_unsigned(
-            "100", sizeof("100") - 1, 1, 2048, true, true, true, &result);
-	expect_false(err, "Should succeed for in-range value");
-	expect_u64_eq((uint64_t)result, 100, "result should be 100");
-}
-TEST_END
-
-TEST_BEGIN(test_conf_handle_unsigned_clip_max) {
-	uintmax_t result = 0;
-	bool      err = conf_handle_unsigned(
-            "9999", sizeof("9999") - 1, 1, 2048, true, true, true, &result);
-	expect_false(err, "Should succeed with clipping");
-	expect_u64_eq(
-	    (uint64_t)result, 2048, "result should be clipped to max 2048");
-}
-TEST_END
-
-TEST_BEGIN(test_conf_handle_unsigned_clip_min) {
-	uintmax_t result = 0;
-	bool      err = conf_handle_unsigned(
-            "0", sizeof("0") - 1, 1, 2048, true, true, true, &result);
-	expect_false(err, "Should succeed with clipping");
-	expect_u64_eq((uint64_t)result, 1, "result should be clipped to min 1");
-}
-TEST_END
-
-TEST_BEGIN(test_conf_handle_unsigned_no_clip_reject) {
-	uintmax_t result = 0;
-	bool      err = conf_handle_unsigned(
-            "9999", sizeof("9999") - 1, 1, 2048, true, true, false, &result);
-	expect_true(err, "Should fail for out-of-range value without clip");
-}
-TEST_END
-
-TEST_BEGIN(test_conf_handle_unsigned_invalid) {
-	uintmax_t result = 0;
-	bool      err = conf_handle_unsigned(
-            "abc", sizeof("abc") - 1, 1, 2048, true, true, true, &result);
-	expect_true(err, "Should fail for non-numeric input");
-}
-TEST_END
-
 TEST_BEGIN(test_conf_handle_signed_valid) {
 	intmax_t result = 0;
 	bool     err = conf_handle_signed("5000", sizeof("5000") - 1, -1,
@@ -113,14 +69,21 @@ TEST_BEGIN(test_conf_handle_char_p) {
 }
 TEST_END
 
+TEST_BEGIN(test_conf_handle_char_p_zero_dest_sz) {
+	char buf[4] = {'X', 'Y', 'Z', '\0'};
+	bool err;
+
+	err = conf_handle_char_p("abc", sizeof("abc") - 1, buf, 0);
+	expect_false(err, "Should succeed for zero-sized destination");
+	expect_c_eq(buf[0], 'X', "Zero-sized destination must not be modified");
+}
+TEST_END
+
 int
 main(void) {
 	return test(test_conf_handle_bool_true, test_conf_handle_bool_false,
-	    test_conf_handle_bool_invalid, test_conf_handle_unsigned_in_range,
-	    test_conf_handle_unsigned_clip_max,
-	    test_conf_handle_unsigned_clip_min,
-	    test_conf_handle_unsigned_no_clip_reject,
-	    test_conf_handle_unsigned_invalid, test_conf_handle_signed_valid,
+	    test_conf_handle_bool_invalid, test_conf_handle_signed_valid,
 	    test_conf_handle_signed_negative,
-	    test_conf_handle_signed_out_of_range, test_conf_handle_char_p);
+	    test_conf_handle_signed_out_of_range, test_conf_handle_char_p,
+	    test_conf_handle_char_p_zero_dest_sz);
 }

From a47fa33b5a7d91ab0218436a75b652a2b65588c9 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Tue, 31 Mar 2026 20:37:23 -0700
Subject: [PATCH 388/395] Run clang-format on test/unit/tcache_max.c

---
 test/unit/tcache_max.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/test/unit/tcache_max.c b/test/unit/tcache_max.c
index d57b2d3b..653563ca 100644
--- a/test/unit/tcache_max.c
+++ b/test/unit/tcache_max.c
@@ -195,9 +195,9 @@ TEST_BEGIN(test_tcache_max) {
 
 	global_test = true;
 	for (alloc_option = alloc_option_start; alloc_option < alloc_option_end;
-	     alloc_option++) {
+	    alloc_option++) {
 		for (dalloc_option = dalloc_option_start;
-		     dalloc_option < dalloc_option_end; dalloc_option++) {
+		    dalloc_option < dalloc_option_end; dalloc_option++) {
 			/* opt.tcache_max set to 1024 in tcache_max.sh. */
 			test_tcache_max_impl(1024, alloc_option, dalloc_option);
 		}
@@ -318,9 +318,9 @@ tcache_check(void *arg) {
 	expect_zu_eq(tcache_nbins, tcache_max2nbins(new_tcache_max),
 	    "Unexpected value for tcache_nbins");
 	for (unsigned alloc_option = alloc_option_start;
-	     alloc_option < alloc_option_end; alloc_option++) {
+	    alloc_option < alloc_option_end; alloc_option++) {
 		for (unsigned dalloc_option = dalloc_option_start;
-		     dalloc_option < dalloc_option_end; dalloc_option++) {
+		    dalloc_option < dalloc_option_end; dalloc_option++) {
 			test_tcache_max_impl(
 			    new_tcache_max, alloc_option, dalloc_option);
 		}

From 3cc56d325c15cdb7d6047ed513ab908121c66698 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Fri, 27 Mar 2026 13:30:58 -0700
Subject: [PATCH 389/395] Fix large alloc nrequests under-counting on cache
 misses

---
 include/jemalloc/internal/tcache_inlines.h |  6 +--
 test/unit/tcache_max.c                     | 47 +++++++++++++++++++++-
 2 files changed, 49 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 6bd1b339..5f8ed317 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -163,10 +163,10 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 			assert(usize <= tcache_max_get(tcache->tcache_slow));
 			memset(ret, 0, usize);
 		}
+	}
 
-		if (config_stats) {
-			bin->tstats.nrequests++;
-		}
+	if (config_stats) {
+		bin->tstats.nrequests++;
 	}
 
 	return ret;
diff --git a/test/unit/tcache_max.c b/test/unit/tcache_max.c
index 653563ca..ab54da39 100644
--- a/test/unit/tcache_max.c
+++ b/test/unit/tcache_max.c
@@ -206,6 +206,50 @@ TEST_BEGIN(test_tcache_max) {
 }
 TEST_END
 
+TEST_BEGIN(test_large_tcache_nrequests_on_miss) {
+	test_skip_if(!config_stats);
+	test_skip_if(!opt_tcache);
+	test_skip_if(opt_prof);
+	test_skip_if(san_uaf_detection_enabled());
+
+	size_t large;
+	size_t sz = sizeof(large);
+	expect_d_eq(
+	    mallctl("arenas.lextent.0.size", (void *)&large, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
+	expect_d_eq(mallctl("thread.tcache.max", NULL, NULL, (void *)&large,
+	                sizeof(large)),
+	    0, "Unexpected mallctl() failure");
+	expect_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0), 0,
+	    "Unexpected tcache flush failure");
+
+	tsd_t *tsd = tsd_fetch();
+	expect_ptr_not_null(tsd, "Unexpected tsd_fetch() failure");
+	tcache_t *tcache = tcache_get(tsd);
+	expect_ptr_not_null(tcache, "Expected auto tcache");
+
+	szind_t binind = sz_size2index(large);
+	expect_true(binind >= SC_NBINS, "Expected large size class");
+	cache_bin_t *bin = &tcache->bins[binind];
+	bin->tstats.nrequests = 0;
+
+	void *p = mallocx(large, 0);
+	expect_ptr_not_null(p, "Unexpected mallocx() failure");
+	expect_u64_eq(bin->tstats.nrequests, 1,
+	    "Large tcache miss should count as one request");
+
+	dallocx(p, 0);
+	p = mallocx(large, 0);
+	expect_ptr_not_null(p, "Unexpected mallocx() failure");
+	expect_u64_eq(bin->tstats.nrequests, 2,
+	    "Large tcache hit should increment request count again");
+
+	dallocx(p, 0);
+	expect_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0), 0,
+	    "Unexpected tcache flush failure");
+}
+TEST_END
+
 static size_t
 tcache_max2nbins(size_t tcache_max) {
 	return sz_size2index(tcache_max) + 1;
@@ -358,5 +402,6 @@ TEST_END
 
 int
 main(void) {
-	return test(test_tcache_max, test_thread_tcache_max);
+	return test(test_tcache_max, test_large_tcache_nrequests_on_miss,
+	    test_thread_tcache_max);
 }

From 6281482c395fdbf721ff1f09f531315744446b35 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Fri, 27 Mar 2026 13:50:11 -0700
Subject: [PATCH 390/395] Nest HPA SEC stats inside hpa_shard JSON

---
 src/stats.c            |  3 +-
 test/unit/json_stats.c | 77 +++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 77 insertions(+), 3 deletions(-)

diff --git a/src/stats.c b/src/stats.c
index d906ade6..fa018ea0 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1113,9 +1113,8 @@ stats_arena_hpa_shard_slabs_print(emitter_t *emitter, unsigned i) {
 
 static void
 stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
-	stats_arena_hpa_shard_sec_print(emitter, i);
-
 	emitter_json_object_kv_begin(emitter, "hpa_shard");
+	stats_arena_hpa_shard_sec_print(emitter, i);
 	stats_arena_hpa_shard_counters_print(emitter, i, uptime);
 	stats_arena_hpa_shard_slabs_print(emitter, i);
 	emitter_json_object_end(emitter); /* End "hpa_shard" */
diff --git a/test/unit/json_stats.c b/test/unit/json_stats.c
index eee717c7..d6cbc50c 100644
--- a/test/unit/json_stats.c
+++ b/test/unit/json_stats.c
@@ -185,6 +185,41 @@ static const char  *arena_mutex_names[] = {"large", "extent_avail",
 static const size_t num_arena_mutexes = sizeof(arena_mutex_names)
     / sizeof(arena_mutex_names[0]);
 
+static const char *
+json_find_object_end(const char *object_begin) {
+	int depth = 0;
+	for (const char *cur = object_begin; *cur != '\0'; cur++) {
+		if (*cur == '{') {
+			depth++;
+		} else if (*cur == '}') {
+			depth--;
+			if (depth == 0) {
+				return cur;
+			}
+		}
+	}
+	return NULL;
+}
+
+static const char *
+json_find_previous_hpa_shard_object(
+    const char *json, const char *pos, const char **object_end) {
+	*object_end = NULL;
+	const char *found = NULL;
+	const char *cur = json;
+	const char *next;
+
+	while ((next = strstr(cur, "\"hpa_shard\":{")) != NULL && next < pos) {
+		found = strchr(next, '{');
+		cur = next + 1;
+	}
+	if (found == NULL) {
+		return NULL;
+	}
+	*object_end = json_find_object_end(found);
+	return found;
+}
+
 TEST_BEGIN(test_json_stats_mutexes) {
 	test_skip_if(!config_stats);
 
@@ -307,8 +342,48 @@ TEST_BEGIN(test_hpa_shard_json_ndirty_huge) {
 }
 TEST_END
 
+TEST_BEGIN(test_hpa_shard_json_contains_sec_stats) {
+	test_skip_if(!config_stats);
+	test_skip_if(!hpa_supported());
+
+	void *p = mallocx(PAGE, MALLOCX_TCACHE_NONE);
+	expect_ptr_not_null(p, "Unexpected mallocx failure");
+
+	uint64_t epoch = 1;
+	size_t   sz = sizeof(epoch);
+	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sz), 0,
+	    "Unexpected mallctl() failure");
+
+	stats_buf_t sbuf;
+	stats_buf_init(&sbuf);
+	malloc_stats_print(stats_buf_write_cb, &sbuf, "J");
+
+	const char *sec_bytes = strstr(sbuf.buf, "\"sec_bytes\"");
+	expect_ptr_not_null(sec_bytes, "JSON output should contain sec_bytes");
+	const char *hpa_shard_end = NULL;
+	const char *hpa_shard = json_find_previous_hpa_shard_object(
+	    sbuf.buf, sec_bytes, &hpa_shard_end);
+	expect_ptr_not_null(hpa_shard,
+	    "sec_bytes should be associated with an hpa_shard JSON object");
+	expect_ptr_not_null(hpa_shard_end,
+	    "Could not find end of enclosing hpa_shard JSON object");
+	expect_true(sec_bytes != NULL && sec_bytes < hpa_shard_end,
+	    "sec_bytes should be nested inside hpa_shard JSON object");
+	const char *sec_hits = strstr(hpa_shard, "\"sec_hits\"");
+	expect_true(sec_hits != NULL && sec_hits < hpa_shard_end,
+	    "sec_hits should be nested inside hpa_shard JSON object");
+	const char *sec_misses = strstr(hpa_shard, "\"sec_misses\"");
+	expect_true(sec_misses != NULL && sec_misses < hpa_shard_end,
+	    "sec_misses should be nested inside hpa_shard JSON object");
+
+	stats_buf_fini(&sbuf);
+	dallocx(p, MALLOCX_TCACHE_NONE);
+}
+TEST_END
+
 int
 main(void) {
 	return test_no_reentrancy(test_json_stats_mutexes,
-	    test_hpa_shard_json_ndirty_huge);
+	    test_hpa_shard_json_ndirty_huge,
+	    test_hpa_shard_json_contains_sec_stats);
 }

From db7d99703d41e58ba2932e98a6e12dd377028231 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Fri, 27 Mar 2026 13:54:15 -0700
Subject: [PATCH 391/395] Add TODO to benchmark possibly better policy

---
 src/sec.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/sec.c b/src/sec.c
index 5f65362f..a3254537 100644
--- a/src/sec.c
+++ b/src/sec.c
@@ -130,13 +130,17 @@ sec_multishard_trylock_alloc(
 			cur_shard = 0;
 		}
 	}
-	/* No bin had alloc or had the extent */
+	/*
+	 * TODO: Benchmark whether it is worth blocking on all shards here before
+	 * declaring a miss.  That could recover more remote-shard hits under
+	 * contention, but it also changes the allocation latency policy.
+	 */
 	assert(cur_shard == sec_shard_pick(tsdn, sec));
 	bin = sec_bin_pick(sec, cur_shard, pszind);
 	malloc_mutex_lock(tsdn, &bin->mtx);
 	edata_t *edata = sec_bin_alloc_locked(tsdn, sec, bin, size);
 	if (edata == NULL) {
-		/* Only now we know it is a miss */
+		/* Only now we know it is a miss. */
 		bin->stats.nmisses++;
 	}
 	malloc_mutex_unlock(tsdn, &bin->mtx);

From f265645d02f0bde59833c46977b66acd94dec42e Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Fri, 27 Mar 2026 13:58:27 -0700
Subject: [PATCH 392/395] Emit retained HPA slab stats in JSON

---
 src/stats.c            |   6 ++
 test/unit/json_stats.c | 122 ++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 127 insertions(+), 1 deletion(-)

diff --git a/src/stats.c b/src/stats.c
index fa018ea0..82458fec 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -988,6 +988,8 @@ stats_arena_hpa_shard_slabs_print(emitter_t *emitter, unsigned i) {
 	    emitter, "nactive_nonhuge", emitter_type_size, &nactive_nonhuge);
 	emitter_json_kv(
 	    emitter, "ndirty_nonhuge", emitter_type_size, &ndirty_nonhuge);
+	emitter_json_kv(emitter, "nretained_nonhuge", emitter_type_size,
+	    &nretained_nonhuge);
 	emitter_json_object_end(emitter); /* End "full_slabs" */
 
 	/* Next, empty slab stats. */
@@ -1029,6 +1031,8 @@ stats_arena_hpa_shard_slabs_print(emitter_t *emitter, unsigned i) {
 	    emitter, "nactive_nonhuge", emitter_type_size, &nactive_nonhuge);
 	emitter_json_kv(
 	    emitter, "ndirty_nonhuge", emitter_type_size, &ndirty_nonhuge);
+	emitter_json_kv(emitter, "nretained_nonhuge", emitter_type_size,
+	    &nretained_nonhuge);
 	emitter_json_object_end(emitter); /* End "empty_slabs" */
 
 	/* Last, nonfull slab stats. */
@@ -1103,6 +1107,8 @@ stats_arena_hpa_shard_slabs_print(emitter_t *emitter, unsigned i) {
 		    &nactive_nonhuge);
 		emitter_json_kv(emitter, "ndirty_nonhuge", emitter_type_size,
 		    &ndirty_nonhuge);
+		emitter_json_kv(emitter, "nretained_nonhuge", emitter_type_size,
+		    &nretained_nonhuge);
 		emitter_json_object_end(emitter);
 	}
 	emitter_json_array_end(emitter); /* End "nonfull_slabs" */
diff --git a/test/unit/json_stats.c b/test/unit/json_stats.c
index d6cbc50c..c206974b 100644
--- a/test/unit/json_stats.c
+++ b/test/unit/json_stats.c
@@ -196,6 +196,28 @@ json_find_object_end(const char *object_begin) {
 			if (depth == 0) {
 				return cur;
 			}
+			if (depth < 0) {
+				return NULL;
+			}
+		}
+	}
+	return NULL;
+}
+
+static const char *
+json_find_array_end(const char *array_begin) {
+	int depth = 0;
+	for (const char *cur = array_begin; *cur != '\0'; cur++) {
+		if (*cur == '[') {
+			depth++;
+		} else if (*cur == ']') {
+			depth--;
+			if (depth == 0) {
+				return cur;
+			}
+			if (depth < 0) {
+				return NULL;
+			}
 		}
 	}
 	return NULL;
@@ -220,6 +242,52 @@ json_find_previous_hpa_shard_object(
 	return found;
 }
 
+static const char *
+json_find_named_object(
+    const char *json, const char *key, const char **object_end) {
+	*object_end = NULL;
+	char   search_key[128];
+	size_t written = malloc_snprintf(
+	    search_key, sizeof(search_key), "\"%s\":{", key);
+	if (written >= sizeof(search_key)) {
+		return NULL;
+	}
+
+	const char *object_begin = strstr(json, search_key);
+	if (object_begin == NULL) {
+		return NULL;
+	}
+	object_begin = strchr(object_begin, '{');
+	if (object_begin == NULL) {
+		return NULL;
+	}
+	*object_end = json_find_object_end(object_begin);
+	return object_begin;
+}
+
+static const char *
+json_find_named_array(
+    const char *json, const char *key, const char **array_end) {
+	*array_end = NULL;
+	char   search_key[128];
+	size_t written = malloc_snprintf(
+	    search_key, sizeof(search_key), "\"%s\":[", key);
+	if (written >= sizeof(search_key)) {
+		return NULL;
+	}
+
+	const char *array_begin = strstr(json, search_key);
+	if (array_begin == NULL) {
+		return NULL;
+	}
+	array_begin = strchr(array_begin, '[');
+	if (array_begin == NULL) {
+		return NULL;
+	}
+	*array_end = json_find_array_end(array_begin);
+	return array_begin;
+}
+
 TEST_BEGIN(test_json_stats_mutexes) {
 	test_skip_if(!config_stats);
 
@@ -381,9 +449,61 @@ TEST_BEGIN(test_hpa_shard_json_contains_sec_stats) {
 }
 TEST_END
 
+TEST_BEGIN(test_hpa_shard_json_contains_retained_stats) {
+	test_skip_if(!config_stats);
+	test_skip_if(!hpa_supported());
+
+	void *p = mallocx(PAGE, MALLOCX_TCACHE_NONE);
+	expect_ptr_not_null(p, "Unexpected mallocx failure");
+
+	uint64_t epoch = 1;
+	size_t   sz = sizeof(epoch);
+	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sz), 0,
+	    "Unexpected mallctl() failure");
+
+	stats_buf_t sbuf;
+	stats_buf_init(&sbuf);
+	malloc_stats_print(stats_buf_write_cb, &sbuf, "J");
+
+	const char *full_slabs_end = NULL;
+	const char *full_slabs = json_find_named_object(
+	    sbuf.buf, "full_slabs", &full_slabs_end);
+	expect_ptr_not_null(
+	    full_slabs, "JSON output should contain full_slabs");
+	const char *full_retained = strstr(full_slabs, "\"nretained_nonhuge\"");
+	expect_true(full_retained != NULL && full_retained < full_slabs_end,
+	    "full_slabs should contain nretained_nonhuge");
+
+	const char *empty_slabs_end = NULL;
+	const char *empty_slabs = json_find_named_object(
+	    sbuf.buf, "empty_slabs", &empty_slabs_end);
+	expect_ptr_not_null(
+	    empty_slabs, "JSON output should contain empty_slabs");
+	const char *empty_retained = strstr(
+	    empty_slabs, "\"nretained_nonhuge\"");
+	expect_true(empty_retained != NULL && empty_retained < empty_slabs_end,
+	    "empty_slabs should contain nretained_nonhuge");
+
+	const char *nonfull_slabs_end = NULL;
+	const char *nonfull_slabs = json_find_named_array(
+	    sbuf.buf, "nonfull_slabs", &nonfull_slabs_end);
+	expect_ptr_not_null(
+	    nonfull_slabs, "JSON output should contain nonfull_slabs");
+	const char *nonfull_retained = strstr(
+	    nonfull_slabs, "\"nretained_nonhuge\"");
+	expect_true(
+	    nonfull_retained != NULL && nonfull_retained < nonfull_slabs_end,
+	    "nonfull_slabs should contain nretained_nonhuge");
+
+	stats_buf_fini(&sbuf);
+	dallocx(p, MALLOCX_TCACHE_NONE);
+}
+TEST_END
+
 int
 main(void) {
 	return test_no_reentrancy(test_json_stats_mutexes,
 	    test_hpa_shard_json_ndirty_huge,
-	    test_hpa_shard_json_contains_sec_stats);
+	    test_hpa_shard_json_contains_sec_stats,
+	    test_hpa_shard_json_contains_retained_stats);
 }

From 6515df8cec7fe50f6b45069f82bdf685171f9ee7 Mon Sep 17 00:00:00 2001
From: Guangli Dai <gdai@meta.com>
Date: Tue, 7 Apr 2026 10:41:44 -0700
Subject: [PATCH 393/395] Documentation updates (#2869)

* Document new mallctl interfaces added since 5.3.0

Add documentation for the following new mallctl entries:
- opt.debug_double_free_max_scan: double-free detection scan limit
- opt.prof_bt_max: max profiling backtrace depth
- opt.disable_large_size_classes: page-aligned large allocations
- opt.process_madvise_max_batch: batched process_madvise purging
- thread.tcache.max: per-thread tcache_max control
- thread.tcache.ncached_max.read_sizeclass: query ncached_max
- thread.tcache.ncached_max.write: set ncached_max per size range
- arena.<i>.name: get/set arena names
- arenas.hugepage: hugepage size
- approximate_stats.active: lightweight active bytes estimate

Remove config.prof_frameptr since it still needs more development
and is still experimental.

Co-authored-by: lexprfuncall <carl.shapiro@gmail.com>
---
 doc/jemalloc.xml.in | 157 +++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 147 insertions(+), 10 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 2a8573b8..692658f8 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -897,16 +897,6 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
         during build configuration.</para></listitem>
       </varlistentry>
 
-      <varlistentry id="config.prof_frameptr">
-        <term>
-          <mallctl>config.prof_frameptr</mallctl>
-          (<type>bool</type>)
-          <literal>r-</literal>
-        </term>
-        <listitem><para><option>--enable-prof-frameptr</option> was specified
-        during build configuration.</para></listitem>
-      </varlistentry>
-
       <varlistentry id="config.stats">
         <term>
           <mallctl>config.stats</mallctl>
@@ -1419,6 +1409,17 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         extent hooks.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="opt.prof_bt_max">
+        <term>
+          <mallctl>opt.prof_bt_max</mallctl>
+          (<type>unsigned</type>)
+          <literal>r-</literal>
+          [<option>--enable-prof</option>]
+        </term>
+        <listitem><para>Maximum number of stack frames to record in profiling
+        backtraces.  The default is 128.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="opt.prof">
         <term>
           <mallctl>opt.prof</mallctl>
@@ -1666,6 +1667,53 @@ malloc_conf = "xmalloc:true";]]></programlisting>
 	testing this behavior.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="opt.debug_double_free_max_scan">
+        <term>
+          <mallctl>opt.debug_double_free_max_scan</mallctl>
+          (<type>unsigned</type>)
+          <literal>r-</literal>
+          [<option>--enable-debug</option>]
+        </term>
+        <listitem><para>Maximum number of cached pointers to scan in the
+        thread cache when checking for double-free errors on deallocation.
+        When debug is enabled, each deallocation into the tcache scans up to
+        this many recently cached pointers to detect whether the same pointer
+        is being freed twice.  Setting this to 0 disables the check.  This
+        option is set to 0 and has no effect when debug is not enabled.  The
+        default is 32.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="opt.disable_large_size_classes">
+        <term>
+          <mallctl>opt.disable_large_size_classes</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>When enabled (the default), large allocations
+        (i.e. allocations of size &gt;= <constant>SC_LARGE_MINCLASS</constant>)
+        are rounded up to the nearest page boundary rather than the nearest
+        large size class.  This minimizes memory overhead, especially when
+        using hugepages, at the cost of disabling the standard large size
+        class hierarchy.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="opt.process_madvise_max_batch">
+        <term>
+          <mallctl>opt.process_madvise_max_batch</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Maximum number of memory regions to include in each
+        <citerefentry><refentrytitle>process_madvise</refentrytitle>
+        <manvolnum>2</manvolnum></citerefentry> batch call.  When set to 0
+        (the default), process_madvise is not used, and the standard
+        <citerefentry><refentrytitle>madvise</refentrytitle>
+        <manvolnum>2</manvolnum></citerefentry> is used instead.  Setting this
+        to a positive value enables batched purging via process_madvise, which
+        can reduce the number of system calls needed for
+        purging.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="thread.arena">
         <term>
           <mallctl>thread.arena</mallctl>
@@ -1802,6 +1850,47 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         the developer may find manual flushing useful.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="thread.tcache.max">
+        <term>
+          <mallctl>thread.tcache.max</mallctl>
+          (<type>size_t</type>)
+          <literal>rw</literal>
+        </term>
+        <listitem><para>Get or set the maximum cached size class
+        (<varname>tcache_max</varname>) for the calling thread's tcache.  The
+        value is clamped to the maximum allowed limit and rounded up to the
+        nearest size class boundary.  Changing this value will resize the
+        thread cache accordingly.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="thread.tcache.ncached_max.read_sizeclass">
+        <term>
+          <mallctl>thread.tcache.ncached_max.read_sizeclass</mallctl>
+          (<type>size_t</type>)
+          <literal>rw</literal>
+        </term>
+        <listitem><para>Query the maximum number of cached objects
+        (<varname>ncached_max</varname>) for a given size class in the calling
+        thread's tcache.  The size class is passed in via
+        <parameter>newp</parameter>, and the corresponding
+        <varname>ncached_max</varname> is returned via
+        <parameter>oldp</parameter>.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="thread.tcache.ncached_max.write">
+        <term>
+          <mallctl>thread.tcache.ncached_max.write</mallctl>
+          (<type>char *</type>)
+          <literal>-w</literal>
+        </term>
+        <listitem><para>Set the maximum number of cached objects
+        (<varname>ncached_max</varname>) for size classes in the calling
+        thread's tcache.  The input is a string of pipe-separated settings,
+        where each setting specifies a size range and a count, in the same
+        format as the <mallctl>opt.tcache_ncached_max</mallctl> runtime
+        option.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="thread.prof.name">
         <term>
           <mallctl>thread.prof.name</mallctl>
@@ -1985,6 +2074,24 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         linkend="thread.arena"><mallctl>thread.arena</mallctl></link>.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="arena.i.name">
+        <term>
+          <mallctl>arena.&lt;i&gt;.name</mallctl>
+          (<type>char *</type>)
+          <literal>rw</literal>
+        </term>
+        <listitem><para>Get or set a descriptive name for arena &lt;i&gt;.
+        Arena names can be up to 32 characters long (including the null
+        terminator); longer names are truncated.  When reading, the caller
+        passes a pointer to a pre-allocated buffer (of at least 32 bytes) via
+        <parameter>oldp</parameter>, and
+        <parameter>*oldlenp</parameter> must be
+        <code language="C">sizeof(<type>char *</type>)</code>.
+        Arena names are also included in the output of <link
+        linkend="stats_print"><function>malloc_stats_print()</function></link>.
+        </para></listitem>
+      </varlistentry>
+
       <varlistentry id="arena.i.dss">
         <term>
           <mallctl>arena.&lt;i&gt;.dss</mallctl>
@@ -2342,6 +2449,18 @@ struct extent_hooks_s {
         <listitem><para>Page size.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="arenas.hugepage">
+        <term>
+          <mallctl>arenas.hugepage</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Hugepage size.  This value is also reported in the
+        output of <link
+        linkend="stats_print"><function>malloc_stats_print()</function></link>.
+        </para></listitem>
+      </varlistentry>
+
       <varlistentry id="arenas.tcache_max">
         <term>
           <mallctl>arenas.tcache_max</mallctl>
@@ -2561,6 +2680,24 @@ struct extent_hooks_s {
         option for additional information.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="approximate_stats.active">
+        <term>
+          <mallctl>approximate_stats.active</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Return the total number of bytes in active pages
+        collected in an unsynchronized manner, without requiring an
+        <link linkend="epoch"><mallctl>epoch</mallctl></link> update.
+        As a result, this value should NOT be compared with other
+        stats.  For example, the relative ordering between
+        <mallctl>approximate_stats.active</mallctl> and <link
+        linkend="stats.active"><mallctl>stats.active</mallctl></link> or <link
+        linkend="stats.resident"><mallctl>stats.resident</mallctl></link> is
+        not guaranteed.  This interface is intended for lightweight monitoring
+        where an approximate value is sufficient.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="stats.allocated">
         <term>
           <mallctl>stats.allocated</mallctl>

From b8646f4db33338411b590b67f1f04e8a1eedc061 Mon Sep 17 00:00:00 2001
From: Ian Ker-Seymer <ian.kerseymer@shopify.com>
Date: Fri, 10 Apr 2026 09:05:09 -0400
Subject: [PATCH 394/395] Fix `opt.max_background_threads` default in docs

---
 doc/jemalloc.xml.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 692658f8..8bbe8120 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1137,7 +1137,7 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
         </term>
         <listitem><para>Maximum number of background threads that will be created
         if <link linkend="background_thread">background_thread</link> is set.
-        Defaults to number of cpus.</para></listitem>
+        Defaults to 4.</para></listitem>
       </varlistentry>
 
       <varlistentry id="opt.dirty_decay_ms">

From 81034ce1f1373e37dc865038e1bc8eeecf559ce8 Mon Sep 17 00:00:00 2001
From: Guangli Dai <gdai@meta.com>
Date: Mon, 13 Apr 2026 17:12:37 -0700
Subject: [PATCH 395/395] Update ChangeLog for release 5.3.1

---
 ChangeLog | 148 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 148 insertions(+)

diff --git a/ChangeLog b/ChangeLog
index 32fde562..3bc84360 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,6 +4,154 @@ brevity.  Much more detail can be found in the git revision history:
 
     https://github.com/jemalloc/jemalloc
 
+* 5.3.1 (Apr 13, 2026)
+
+This release includes over 390 commits spanning bug fixes, new features,
+performance optimizations, and portability improvements.  Multiple percent
+of system-level metric improvements were measured in tested production
+workloads.  The release has gone through large-scale production testing
+at Meta.
+
+New features:
+  - Support pvalloc.  (@Lapenkov: 5b1f2cc5)
+  - Add double free detection for the debug build.  (@izaitsevfb:
+    36366f3c, @guangli-dai: 42daa1ac, @divanorama: 1897f185)
+  - Add compile-time option `--enable-pageid` to enable memory mapping
+    annotation.  (@devnexen: 4fc5c4fb)
+  - Add runtime option `prof_bt_max` to control the max stack depth for
+    profiling.  (@guangli-dai: a0734fd6)
+  - Add compile-time option `--enable-force-getenv` to use `getenv` instead
+    of `secure_getenv`.  (@interwq: 481bbfc9)
+  - Add compile-time option `--disable-dss` to disable the usage of
+    `sbrk(2)`.  (@Svetlitski: ea5b7bea)
+  - Add runtime option `tcache_ncached_max` to control the number of items
+    in each size bin in the thread cache.  (@guangli-dai: 8a22d10b)
+  - Add runtime option `calloc_madvise_threshold` to determine if kernel or
+    memset is used to zero the allocations for calloc.  (@nullptr0-0:
+    5081c16b)
+  - Add compile-time option `--disable-user-config` to disable reading the
+    runtime configurations from `/etc/malloc.conf` or environment variable
+    `MALLOC_CONF`.  (@roblabla: c17bf8b3)
+  - Add runtime option `disable_large_size_classes` to guard the new usable
+    size calculation, which minimizes the memory overhead for large
+    allocations, i.e., >= 4 * PAGE.  (@guangli-dai: c067a55c, 8347f104)
+  - Enable process_madvise usage, add runtime option
+    `process_madvise_max_batch` to control the max # of regions in each
+    madvise batch.  (@interwq: 22440a02, @spredolac: 4246475b)
+  - Add mallctl interfaces:
+    + `opt.prof_bt_max`  (@guangli-dai: a0734fd6)
+    + `arena.<i>.name` to set and get arena names.  (@guangli-dai: ba19d2cb)
+    + `thread.tcache.max` to set and get the `tcache_max` of the current
+      thread.  (@guangli-dai: a442d9b8)
+    + `thread.tcache.ncached_max.write` and
+      `thread.tcache.ncached_max.read_sizeclass` to set and get the
+      `ncached_max` setup of the current thread.  (@guangli-dai: 630f7de9,
+      6b197fdd)
+    + `arenas.hugepage` to return the hugepage size used, also exported to
+      malloc stats.  (@ilvokhin: 90c627ed)
+    + `approximate_stats.active` to return an estimate of the current active
+      bytes, which should not be compared with other stats retrieved.
+      (@guangli-dai: 0988583d)
+
+Bug fixes:
+  - Prevent potential deadlocks in decaying during reentrancy.  (@interwq:
+    434a68e2)
+  - Fix segfault in extent coalescing.  (@Svetlitski: 12311fe6)
+  - Add null pointer detections in mallctl calls.  (@Svetlitski: dc0a184f,
+    0288126d)
+  - Make mallctl `arenas.lookup` triable without crashing on invalid
+    pointers.  (@auxten: 019cccc2, 5bac3849)
+  - Demote sampled allocations for proper deallocations during
+    `arena_reset`.  (@Svetlitski: 62648c88)
+  - Fix jemalloc's `read(2)` and `write(2)`.  (@Svetlitski: d2c9ed3d, @lexprfuncall:
+    9fdc1160)
+  - Fix the pkg-config metadata file.  (@BtbN: ed7e6fe7, ce8ce99a)
+  - Fix the autogen.sh so that it accepts quoted extra options.
+    (@honggyukim: f6fe6abd)
+  - Fix `rallocx()` to set errno to ENOMEM upon OOMing.  (@arter97: 38056fea,
+    @interwq: 83b07578)
+  - Avoid stack overflow for internal variable array usage.  (@nullptr0-0:
+    47c9bcd4, 48f66cf4, @xinydev: 9169e927)
+  - Fix background thread initialization race.  (@puzpuzpuz: 4d0ffa07)
+  - Guard os_page_id against a NULL address.  (@lexprfuncall: 79cc7dcc)
+  - Handle tcache init failures gracefully.  (@lexprfuncall: a056c20d)
+  - Fix missing release of acquired neighbor edata in
+    extent_try_coalesce_impl.  (@spredolac: 675ab079)
+  - Fix memory leak of old curr_reg on san_bump_grow_locked failure.
+    (@spredolac: 5904a421)
+  - Fix large alloc nrequests under-counting on cache misses.  (@spredolac:
+    3cc56d32)
+
+Portability improvements:
+  - Fix the build in C99.  (@abaelhe: 56ddbea2)
+  - Add `pthread_setaffinity_np` detection for non Linux/BSD platforms.
+    (@devnexen: 4c95c953)
+  - Make `VARIABLE_ARRAY` compatible with compilers not supporting VLA,
+    i.e., Visual Studio C compiler in C11 or C17 modes.  (@madscientist:
+    be65438f)
+  - Fix the build on Linux using musl library.  (@marv: aba1645f, 45249cf5)
+  - Reduce the memory overhead in small allocation sampling for systems
+    with larger page sizes, e.g., ARM.  (@Svetlitski: 5a858c64)
+  - Add C23's `free_sized` and `free_aligned_sized`.  (@Svetlitski:
+    cdb2c0e0)
+  - Enable heap profiling on MacOS.  (@nullptr0-0: 4b555c11)
+  - Fix incorrect printing on 32bit.  (@sundb: 630434bb)
+  - Make `JEMALLOC_CXX_THROW` compatible with C++ versions newer than
+    C++17.  (@r-barnes, @guangli-dai: 21bcc0a8)
+  - Fix mmap tag conflicts on MacOS.  (@kdrag0n: c893fcd1)
+  - Fix monotonic timer assumption for win32.  (@burtonli: 8dc97b11)
+  - Fix VM over-reservation on systems with larger pages, e.g., aarch64.
+    (@interwq: cd05b19f)
+  - Remove `unreachable()` macro conditionally to prevent definition
+    conflicts for C23+.  (@appujee: d8486b26, 4b88bddb)
+  - Fix dlsym failure observed on FreeBSD.  (@rhelmot: 86bbabac)
+  - Change the default page size to 64KB on aarch64 Linux.  (@lexprfuncall:
+    9442300c)
+  - Update config.guess and config.sub to the latest version.
+    (@lexprfuncall: c51949ea)
+  - Determine the page size on Android from NDK header files.
+    (@lexprfuncall: c51abba1)
+  - Improve the portability of grep patterns in configure.ac.
+    (@lexprfuncall: 365747bc)
+  - Add compile-time option `--with-cxx-stdlib` to specify the C++ standard
+    library.  (@yuxuanchen1997: a10ef3e1)
+
+Optimizations and refactors:
+  - Enable tcache for deallocation-only threads.  (@interwq: 143e9c4a)
+  - Inline to accelerate operator delete.  (@guangli-dai: e8f9f138)
+  - Optimize pairing heap's performance.  (@deadalnix: 5266152d, be6da4f6,
+    543e2d61, 10d71315, 92aa52c0, @Svetlitski: 36ca0c1b)
+  - Inline the storage for thread name in the profiling data.  (@interwq:
+    ce0b7ab6, e62aa478)
+  - Optimize a hot function `edata_cmp_summary_comp` to accelerate it.
+    (@Svetlitski: 6841110b, @guangli-dai: 0181aaa4)
+  - Allocate thread cache using the base allocator, which enables thread
+    cache to use thp when `metadata_thp` is turned on.  (@interwq:
+    72cfdce7)
+  - Allow oversize arena not to purge immediately when background threads
+    are enabled, although the default decay time is 0 to be back compatible.
+    (@interwq: d1313313)
+  - Optimize thread-local storage implementation on Windows.  (@mcfi:
+    9e123a83, 3a0d9cda)
+  - Optimize fast path to allow static size class computation.  (@interwq:
+    323ed2e3)
+  - Redesign tcache GC to regulate the frequency and make it
+    locality-aware. The new design is default on, guarded by option
+    `experimental_tcache_gc`.  (@nullptr0-0: 0c88be9e, e2c9f3a9,
+    14d5dc13, @deadalnix: 5afff2e4)
+  - Reduce the arena switching overhead by avoiding forced purging when
+    background thread is enabled.  (@interwq: a3910b98)
+  - Improve the reuse efficiency by limiting the maximum coalesced size for
+    large extents.  (@jiebinn: 3c14707b)
+  - Refactor thread events to allow registration of users' thread events
+    and remove prof_threshold as the built-in event.  (@spredolac: e6864c60,
+    015b0179, 34ace916)
+
+Documentation:
+  - Update Windows building instructions.  (@Lapenkov: 37139328)
+  - Add vcpkg installation instructions.  (@LilyWangLL: c0c9783e)
+  - Update profiling internals with an example.  (@jordalgo: b04e7666)
+
 * 5.3.0 (May 6, 2022)
 
   This release contains many speed and space optimizations, from micro